summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Haas <rhaas@postgresql.org>2017-03-21 09:48:04 -0400
committerRobert Haas <rhaas@postgresql.org>2017-03-21 09:48:04 -0400
commitd3cc37f1d801a6b5cad9bf179274a8d767f1ee50 (patch)
tree88dd5a0133f46b2eb015b4fd2ce69f1b3c30bb56
parentd5286aa905c9173b3fc4f911682089149bd3caef (diff)
downloadpostgresql-d3cc37f1d801a6b5cad9bf179274a8d767f1ee50.tar.gz
Don't scan partitioned tables.
Partitioned tables do not contain any data; only their unpartitioned descendents need to be scanned. However, the partitioned tables still need to be locked, even though they're not scanned. To make that work, Append and MergeAppend relations now need to carry a list of (unscanned) partitioned relations that must be locked, and InitPlan must lock all partitioned result relations. Aside from the obvious advantage of avoiding some work at execution time, this has two other advantages. First, it may improve the planner's decision-making in some cases since the empty relation might throw things off. Second, it paves the way to getting rid of the storage for partitioned tables altogether. Amit Langote, reviewed by me. Discussion: http://postgr.es/m/6837c359-45c4-8044-34d1-736756335a15@lab.ntt.co.jp
-rw-r--r--src/backend/executor/execMain.c22
-rw-r--r--src/backend/executor/execParallel.c1
-rw-r--r--src/backend/executor/execUtils.c56
-rw-r--r--src/backend/executor/nodeAppend.c6
-rw-r--r--src/backend/executor/nodeMergeAppend.c6
-rw-r--r--src/backend/executor/nodeModifyTable.c19
-rw-r--r--src/backend/nodes/copyfuncs.c21
-rw-r--r--src/backend/nodes/equalfuncs.c12
-rw-r--r--src/backend/nodes/outfuncs.c21
-rw-r--r--src/backend/nodes/readfuncs.c4
-rw-r--r--src/backend/optimizer/path/allpaths.c43
-rw-r--r--src/backend/optimizer/path/joinrels.c2
-rw-r--r--src/backend/optimizer/plan/createplan.c14
-rw-r--r--src/backend/optimizer/plan/planner.c89
-rw-r--r--src/backend/optimizer/plan/setrefs.c21
-rw-r--r--src/backend/optimizer/prep/prepunion.c103
-rw-r--r--src/backend/optimizer/util/pathnode.c10
-rw-r--r--src/backend/utils/cache/plancache.c3
-rw-r--r--src/include/executor/executor.h1
-rw-r--r--src/include/nodes/nodes.h1
-rw-r--r--src/include/nodes/plannodes.h20
-rw-r--r--src/include/nodes/relation.h37
-rw-r--r--src/include/optimizer/pathnode.h8
-rw-r--r--src/include/optimizer/planner.h2
-rw-r--r--src/test/regress/expected/inherit.out147
-rw-r--r--src/test/regress/expected/tablesample.out4
-rw-r--r--src/test/regress/sql/inherit.sql28
27 files changed, 537 insertions, 164 deletions
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index f5cd65d8a0..023ea0081a 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -844,6 +844,22 @@ InitPlan(QueryDesc *queryDesc, int eflags)
estate->es_num_result_relations = numResultRelations;
/* es_result_relation_info is NULL except when within ModifyTable */
estate->es_result_relation_info = NULL;
+
+ /*
+ * In the partitioned result relation case, lock the non-leaf result
+ * relations too. We don't however need ResultRelInfos for them.
+ */
+ if (plannedstmt->nonleafResultRelations)
+ {
+ foreach(l, plannedstmt->nonleafResultRelations)
+ {
+ Index resultRelationIndex = lfirst_int(l);
+ Oid resultRelationOid;
+
+ resultRelationOid = getrelid(resultRelationIndex, rangeTable);
+ LockRelationOid(resultRelationOid, RowExclusiveLock);
+ }
+ }
}
else
{
@@ -858,7 +874,11 @@ InitPlan(QueryDesc *queryDesc, int eflags)
/*
* Similarly, we have to lock relations selected FOR [KEY] UPDATE/SHARE
* before we initialize the plan tree, else we'd be risking lock upgrades.
- * While we are at it, build the ExecRowMark list.
+ * While we are at it, build the ExecRowMark list. Any partitioned child
+ * tables are ignored here (because isParent=true) and will be locked by
+ * the first Append or MergeAppend node that references them. (Note that
+ * the RowMarks corresponding to partitioned child tables are present in
+ * the same list as the rest, i.e., plannedstmt->rowMarks.)
*/
estate->es_rowMarks = NIL;
foreach(l, plannedstmt->rowMarks)
diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c
index a1289e5f12..86db73be43 100644
--- a/src/backend/executor/execParallel.c
+++ b/src/backend/executor/execParallel.c
@@ -161,6 +161,7 @@ ExecSerializePlan(Plan *plan, EState *estate)
pstmt->planTree = plan;
pstmt->rtable = estate->es_range_table;
pstmt->resultRelations = NIL;
+ pstmt->nonleafResultRelations = NIL;
pstmt->subplans = estate->es_plannedstmt->subplans;
pstmt->rewindPlanIDs = NULL;
pstmt->rowMarks = NIL;
diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c
index 3d6a3801c0..a72cffeb6e 100644
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@@ -43,6 +43,7 @@
#include "executor/executor.h"
#include "nodes/nodeFuncs.h"
#include "parser/parsetree.h"
+#include "storage/lmgr.h"
#include "utils/memutils.h"
#include "utils/rel.h"
@@ -953,3 +954,58 @@ ShutdownExprContext(ExprContext *econtext, bool isCommit)
MemoryContextSwitchTo(oldcontext);
}
+
+/*
+ * ExecLockNonLeafAppendTables
+ *
+ * Locks, if necessary, the tables indicated by the RT indexes contained in
+ * the partitioned_rels list. These are the non-leaf tables in the partition
+ * tree controlled by a given Append or MergeAppend node.
+ */
+void
+ExecLockNonLeafAppendTables(List *partitioned_rels, EState *estate)
+{
+ PlannedStmt *stmt = estate->es_plannedstmt;
+ ListCell *lc;
+
+ foreach(lc, partitioned_rels)
+ {
+ ListCell *l;
+ Index rti = lfirst_int(lc);
+ bool is_result_rel = false;
+ Oid relid = getrelid(rti, estate->es_range_table);
+
+ /* If this is a result relation, already locked in InitPlan */
+ foreach(l, stmt->nonleafResultRelations)
+ {
+ if (rti == lfirst_int(l))
+ {
+ is_result_rel = true;
+ break;
+ }
+ }
+
+ /*
+ * Not a result relation; check if there is a RowMark that requires
+ * taking a RowShareLock on this rel.
+ */
+ if (!is_result_rel)
+ {
+ PlanRowMark *rc = NULL;
+
+ foreach(l, stmt->rowMarks)
+ {
+ if (((PlanRowMark *) lfirst(l))->rti == rti)
+ {
+ rc = lfirst(l);
+ break;
+ }
+ }
+
+ if (rc && RowMarkRequiresRowShareLock(rc->markType))
+ LockRelationOid(relid, RowShareLock);
+ else
+ LockRelationOid(relid, AccessShareLock);
+ }
+ }
+}
diff --git a/src/backend/executor/nodeAppend.c b/src/backend/executor/nodeAppend.c
index 6986caee6b..a107545b83 100644
--- a/src/backend/executor/nodeAppend.c
+++ b/src/backend/executor/nodeAppend.c
@@ -129,6 +129,12 @@ ExecInitAppend(Append *node, EState *estate, int eflags)
Assert(!(eflags & EXEC_FLAG_MARK));
/*
+ * Lock the non-leaf tables in the partition tree controlled by this
+ * node. It's a no-op for non-partitioned parent tables.
+ */
+ ExecLockNonLeafAppendTables(node->partitioned_rels, estate);
+
+ /*
* Set up empty vector of subplan states
*/
nplans = list_length(node->appendplans);
diff --git a/src/backend/executor/nodeMergeAppend.c b/src/backend/executor/nodeMergeAppend.c
index 7a20bf07a4..8a2e78266b 100644
--- a/src/backend/executor/nodeMergeAppend.c
+++ b/src/backend/executor/nodeMergeAppend.c
@@ -72,6 +72,12 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags)
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
/*
+ * Lock the non-leaf tables in the partition tree controlled by this
+ * node. It's a no-op for non-partitioned parent tables.
+ */
+ ExecLockNonLeafAppendTables(node->partitioned_rels, estate);
+
+ /*
* Set up empty vector of subplan states
*/
nplans = list_length(node->mergeplans);
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 95e158970c..29c6a6e1d8 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -45,6 +45,7 @@
#include "foreign/fdwapi.h"
#include "miscadmin.h"
#include "nodes/nodeFuncs.h"
+#include "parser/parsetree.h"
#include "storage/bufmgr.h"
#include "storage/lmgr.h"
#include "utils/builtins.h"
@@ -1725,8 +1726,20 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
estate->es_result_relation_info = saved_resultRelInfo;
+ /* The root table RT index is at the head of the partitioned_rels list */
+ if (node->partitioned_rels)
+ {
+ Index root_rti;
+ Oid root_oid;
+
+ root_rti = linitial_int(node->partitioned_rels);
+ root_oid = getrelid(root_rti, estate->es_range_table);
+ rel = heap_open(root_oid, NoLock); /* locked by InitPlan */
+ }
+ else
+ rel = mtstate->resultRelInfo->ri_RelationDesc;
+
/* Build state for INSERT tuple routing */
- rel = mtstate->resultRelInfo->ri_RelationDesc;
if (operation == CMD_INSERT &&
rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
@@ -1897,6 +1910,10 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
mtstate->ps.ps_ExprContext = NULL;
}
+ /* Close the root partitioned rel if we opened it above. */
+ if (rel != mtstate->resultRelInfo->ri_RelationDesc)
+ heap_close(rel, NoLock);
+
/*
* If needed, Initialize target list, projection and qual for ON CONFLICT
* DO UPDATE.
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index c799e3112c..67c7de670b 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -90,6 +90,7 @@ _copyPlannedStmt(const PlannedStmt *from)
COPY_NODE_FIELD(planTree);
COPY_NODE_FIELD(rtable);
COPY_NODE_FIELD(resultRelations);
+ COPY_NODE_FIELD(nonleafResultRelations);
COPY_NODE_FIELD(subplans);
COPY_BITMAPSET_FIELD(rewindPlanIDs);
COPY_NODE_FIELD(rowMarks);
@@ -200,6 +201,7 @@ _copyModifyTable(const ModifyTable *from)
COPY_SCALAR_FIELD(operation);
COPY_SCALAR_FIELD(canSetTag);
COPY_SCALAR_FIELD(nominalRelation);
+ COPY_NODE_FIELD(partitioned_rels);
COPY_NODE_FIELD(resultRelations);
COPY_SCALAR_FIELD(resultRelIndex);
COPY_NODE_FIELD(plans);
@@ -235,6 +237,7 @@ _copyAppend(const Append *from)
/*
* copy remainder of node
*/
+ COPY_NODE_FIELD(partitioned_rels);
COPY_NODE_FIELD(appendplans);
return newnode;
@@ -256,6 +259,7 @@ _copyMergeAppend(const MergeAppend *from)
/*
* copy remainder of node
*/
+ COPY_NODE_FIELD(partitioned_rels);
COPY_NODE_FIELD(mergeplans);
COPY_SCALAR_FIELD(numCols);
COPY_POINTER_FIELD(sortColIdx, from->numCols * sizeof(AttrNumber));
@@ -2205,6 +2209,20 @@ _copyAppendRelInfo(const AppendRelInfo *from)
}
/*
+ * _copyPartitionedChildRelInfo
+ */
+static PartitionedChildRelInfo *
+_copyPartitionedChildRelInfo(const PartitionedChildRelInfo *from)
+{
+ PartitionedChildRelInfo *newnode = makeNode(PartitionedChildRelInfo);
+
+ COPY_SCALAR_FIELD(parent_relid);
+ COPY_NODE_FIELD(child_rels);
+
+ return newnode;
+}
+
+/*
* _copyPlaceHolderInfo
*/
static PlaceHolderInfo *
@@ -4894,6 +4912,9 @@ copyObject(const void *from)
case T_AppendRelInfo:
retval = _copyAppendRelInfo(from);
break;
+ case T_PartitionedChildRelInfo:
+ retval = _copyPartitionedChildRelInfo(from);
+ break;
case T_PlaceHolderInfo:
retval = _copyPlaceHolderInfo(from);
break;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index b230f65e03..4d8e7fe9b0 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -896,6 +896,15 @@ _equalAppendRelInfo(const AppendRelInfo *a, const AppendRelInfo *b)
}
static bool
+_equalPartitionedChildRelInfo(const PartitionedChildRelInfo *a, const PartitionedChildRelInfo *b)
+{
+ COMPARE_SCALAR_FIELD(parent_relid);
+ COMPARE_NODE_FIELD(child_rels);
+
+ return true;
+}
+
+static bool
_equalPlaceHolderInfo(const PlaceHolderInfo *a, const PlaceHolderInfo *b)
{
COMPARE_SCALAR_FIELD(phid);
@@ -3104,6 +3113,9 @@ equal(const void *a, const void *b)
case T_AppendRelInfo:
retval = _equalAppendRelInfo(a, b);
break;
+ case T_PartitionedChildRelInfo:
+ retval = _equalPartitionedChildRelInfo(a, b);
+ break;
case T_PlaceHolderInfo:
retval = _equalPlaceHolderInfo(a, b);
break;
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 7418fbeded..1b9005fa53 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -252,6 +252,7 @@ _outPlannedStmt(StringInfo str, const PlannedStmt *node)
WRITE_NODE_FIELD(planTree);
WRITE_NODE_FIELD(rtable);
WRITE_NODE_FIELD(resultRelations);
+ WRITE_NODE_FIELD(nonleafResultRelations);
WRITE_NODE_FIELD(subplans);
WRITE_BITMAPSET_FIELD(rewindPlanIDs);
WRITE_NODE_FIELD(rowMarks);
@@ -344,6 +345,7 @@ _outModifyTable(StringInfo str, const ModifyTable *node)
WRITE_ENUM_FIELD(operation, CmdType);
WRITE_BOOL_FIELD(canSetTag);
WRITE_UINT_FIELD(nominalRelation);
+ WRITE_NODE_FIELD(partitioned_rels);
WRITE_NODE_FIELD(resultRelations);
WRITE_INT_FIELD(resultRelIndex);
WRITE_NODE_FIELD(plans);
@@ -368,6 +370,7 @@ _outAppend(StringInfo str, const Append *node)
_outPlanInfo(str, (const Plan *) node);
+ WRITE_NODE_FIELD(partitioned_rels);
WRITE_NODE_FIELD(appendplans);
}
@@ -380,6 +383,7 @@ _outMergeAppend(StringInfo str, const MergeAppend *node)
_outPlanInfo(str, (const Plan *) node);
+ WRITE_NODE_FIELD(partitioned_rels);
WRITE_NODE_FIELD(mergeplans);
WRITE_INT_FIELD(numCols);
@@ -1808,6 +1812,7 @@ _outAppendPath(StringInfo str, const AppendPath *node)
_outPathInfo(str, (const Path *) node);
+ WRITE_NODE_FIELD(partitioned_rels);
WRITE_NODE_FIELD(subpaths);
}
@@ -1818,6 +1823,7 @@ _outMergeAppendPath(StringInfo str, const MergeAppendPath *node)
_outPathInfo(str, (const Path *) node);
+ WRITE_NODE_FIELD(partitioned_rels);
WRITE_NODE_FIELD(subpaths);
WRITE_FLOAT_FIELD(limit_tuples, "%.0f");
}
@@ -2023,6 +2029,7 @@ _outModifyTablePath(StringInfo str, const ModifyTablePath *node)
WRITE_ENUM_FIELD(operation, CmdType);
WRITE_BOOL_FIELD(canSetTag);
WRITE_UINT_FIELD(nominalRelation);
+ WRITE_NODE_FIELD(partitioned_rels);
WRITE_NODE_FIELD(resultRelations);
WRITE_NODE_FIELD(subpaths);
WRITE_NODE_FIELD(subroots);
@@ -2099,6 +2106,7 @@ _outPlannerGlobal(StringInfo str, const PlannerGlobal *node)
WRITE_NODE_FIELD(finalrtable);
WRITE_NODE_FIELD(finalrowmarks);
WRITE_NODE_FIELD(resultRelations);
+ WRITE_NODE_FIELD(nonleafResultRelations);
WRITE_NODE_FIELD(relationOids);
WRITE_NODE_FIELD(invalItems);
WRITE_INT_FIELD(nParamExec);
@@ -2137,6 +2145,7 @@ _outPlannerInfo(StringInfo str, const PlannerInfo *node)
WRITE_NODE_FIELD(full_join_clauses);
WRITE_NODE_FIELD(join_info_list);
WRITE_NODE_FIELD(append_rel_list);
+ WRITE_NODE_FIELD(pcinfo_list);
WRITE_NODE_FIELD(rowMarks);
WRITE_NODE_FIELD(placeholder_list);
WRITE_NODE_FIELD(fkey_list);
@@ -2420,6 +2429,15 @@ _outAppendRelInfo(StringInfo str, const AppendRelInfo *node)
}
static void
+_outPartitionedChildRelInfo(StringInfo str, const PartitionedChildRelInfo *node)
+{
+ WRITE_NODE_TYPE("PARTITIONEDCHILDRELINFO");
+
+ WRITE_UINT_FIELD(parent_relid);
+ WRITE_NODE_FIELD(child_rels);
+}
+
+static void
_outPlaceHolderInfo(StringInfo str, const PlaceHolderInfo *node)
{
WRITE_NODE_TYPE("PLACEHOLDERINFO");
@@ -3906,6 +3924,9 @@ outNode(StringInfo str, const void *obj)
case T_AppendRelInfo:
_outAppendRelInfo(str, obj);
break;
+ case T_PartitionedChildRelInfo:
+ _outPartitionedChildRelInfo(str, obj);
+ break;
case T_PlaceHolderInfo:
_outPlaceHolderInfo(str, obj);
break;
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index d3bbc02f24..474f221a75 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -1444,6 +1444,7 @@ _readPlannedStmt(void)
READ_NODE_FIELD(planTree);
READ_NODE_FIELD(rtable);
READ_NODE_FIELD(resultRelations);
+ READ_NODE_FIELD(nonleafResultRelations);
READ_NODE_FIELD(subplans);
READ_BITMAPSET_FIELD(rewindPlanIDs);
READ_NODE_FIELD(rowMarks);
@@ -1535,6 +1536,7 @@ _readModifyTable(void)
READ_ENUM_FIELD(operation, CmdType);
READ_BOOL_FIELD(canSetTag);
READ_UINT_FIELD(nominalRelation);
+ READ_NODE_FIELD(partitioned_rels);
READ_NODE_FIELD(resultRelations);
READ_INT_FIELD(resultRelIndex);
READ_NODE_FIELD(plans);
@@ -1564,6 +1566,7 @@ _readAppend(void)
ReadCommonPlan(&local_node->plan);
+ READ_NODE_FIELD(partitioned_rels);
READ_NODE_FIELD(appendplans);
READ_DONE();
@@ -1579,6 +1582,7 @@ _readMergeAppend(void)
ReadCommonPlan(&local_node->plan);
+ READ_NODE_FIELD(partitioned_rels);
READ_NODE_FIELD(mergeplans);
READ_INT_FIELD(numCols);
READ_ATTRNUMBER_ARRAY(sortColIdx, local_node->numCols);
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 43bfd23804..a1e1a87c29 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -95,7 +95,8 @@ static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte);
static void generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
List *live_childrels,
- List *all_child_pathkeys);
+ List *all_child_pathkeys,
+ List *partitioned_rels);
static Path *get_cheapest_parameterized_child_path(PlannerInfo *root,
RelOptInfo *rel,
Relids required_outer);
@@ -346,6 +347,14 @@ set_rel_size(PlannerInfo *root, RelOptInfo *rel,
/* Foreign table */
set_foreign_size(root, rel, rte);
}
+ else if (rte->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ /*
+ * A partitioned table without leaf partitions is marked
+ * as a dummy rel.
+ */
+ set_dummy_rel_pathlist(rel);
+ }
else if (rte->tablesample != NULL)
{
/* Sampled relation */
@@ -1259,6 +1268,16 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
List *all_child_pathkeys = NIL;
List *all_child_outers = NIL;
ListCell *l;
+ List *partitioned_rels = NIL;
+ RangeTblEntry *rte;
+
+ rte = planner_rt_fetch(rel->relid, root);
+ if (rte->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ partitioned_rels = get_partitioned_child_rels(root, rel->relid);
+ /* The root partitioned table is included as a child rel */
+ Assert(list_length(partitioned_rels) >= 1);
+ }
/*
* For every non-dummy child, remember the cheapest path. Also, identify
@@ -1359,7 +1378,8 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
* if we have zero or one live subpath due to constraint exclusion.)
*/
if (subpaths_valid)
- add_path(rel, (Path *) create_append_path(rel, subpaths, NULL, 0));
+ add_path(rel, (Path *) create_append_path(rel, subpaths, NULL, 0,
+ partitioned_rels));
/*
* Consider an append of partial unordered, unparameterized partial paths.
@@ -1386,7 +1406,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
/* Generate a partial append path. */
appendpath = create_append_path(rel, partial_subpaths, NULL,
- parallel_workers);
+ parallel_workers, partitioned_rels);
add_partial_path(rel, (Path *) appendpath);
}
@@ -1396,7 +1416,8 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
*/
if (subpaths_valid)
generate_mergeappend_paths(root, rel, live_childrels,
- all_child_pathkeys);
+ all_child_pathkeys,
+ partitioned_rels);
/*
* Build Append paths for each parameterization seen among the child rels.
@@ -1438,7 +1459,8 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
if (subpaths_valid)
add_path(rel, (Path *)
- create_append_path(rel, subpaths, required_outer, 0));
+ create_append_path(rel, subpaths, required_outer, 0,
+ partitioned_rels));
}
}
@@ -1468,7 +1490,8 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
static void
generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
List *live_childrels,
- List *all_child_pathkeys)
+ List *all_child_pathkeys,
+ List *partitioned_rels)
{
ListCell *lcp;
@@ -1532,13 +1555,15 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
rel,
startup_subpaths,
pathkeys,
- NULL));
+ NULL,
+ partitioned_rels));
if (startup_neq_total)
add_path(rel, (Path *) create_merge_append_path(root,
rel,
total_subpaths,
pathkeys,
- NULL));
+ NULL,
+ partitioned_rels));
}
}
@@ -1671,7 +1696,7 @@ set_dummy_rel_pathlist(RelOptInfo *rel)
rel->pathlist = NIL;
rel->partial_pathlist = NIL;
- add_path(rel, (Path *) create_append_path(rel, NIL, NULL, 0));
+ add_path(rel, (Path *) create_append_path(rel, NIL, NULL, 0, NIL));
/*
* We set the cheapest path immediately, to ensure that IS_DUMMY_REL()
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index 0551668976..6a0c67b9ab 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -1217,7 +1217,7 @@ mark_dummy_rel(RelOptInfo *rel)
rel->partial_pathlist = NIL;
/* Set up the dummy path */
- add_path(rel, (Path *) create_append_path(rel, NIL, NULL, 0));
+ add_path(rel, (Path *) create_append_path(rel, NIL, NULL, 0, NIL));
/* Set or update cheapest_total_path and related fields */
set_cheapest(rel);
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 89e1946fc2..c80c9992c9 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -199,7 +199,7 @@ static CteScan *make_ctescan(List *qptlist, List *qpqual,
Index scanrelid, int ctePlanId, int cteParam);
static WorkTableScan *make_worktablescan(List *qptlist, List *qpqual,
Index scanrelid, int wtParam);
-static Append *make_append(List *appendplans, List *tlist);
+static Append *make_append(List *appendplans, List *tlist, List *partitioned_rels);
static RecursiveUnion *make_recursive_union(List *tlist,
Plan *lefttree,
Plan *righttree,
@@ -273,7 +273,7 @@ static Result *make_result(List *tlist, Node *resconstantqual, Plan *subplan);
static ProjectSet *make_project_set(List *tlist, Plan *subplan);
static ModifyTable *make_modifytable(PlannerInfo *root,
CmdType operation, bool canSetTag,
- Index nominalRelation,
+ Index nominalRelation, List *partitioned_rels,
List *resultRelations, List *subplans,
List *withCheckOptionLists, List *returningLists,
List *rowMarks, OnConflictExpr *onconflict, int epqParam);
@@ -1026,7 +1026,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
* parent-rel Vars it'll be asked to emit.
*/
- plan = make_append(subplans, tlist);
+ plan = make_append(subplans, tlist, best_path->partitioned_rels);
copy_generic_path_info(&plan->plan, (Path *) best_path);
@@ -1134,6 +1134,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
subplans = lappend(subplans, subplan);
}
+ node->partitioned_rels = best_path->partitioned_rels;
node->mergeplans = subplans;
return (Plan *) node;
@@ -2314,6 +2315,7 @@ create_modifytable_plan(PlannerInfo *root, ModifyTablePath *best_path)
best_path->operation,
best_path->canSetTag,
best_path->nominalRelation,
+ best_path->partitioned_rels,
best_path->resultRelations,
subplans,
best_path->withCheckOptionLists,
@@ -5161,7 +5163,7 @@ make_foreignscan(List *qptlist,
}
static Append *
-make_append(List *appendplans, List *tlist)
+make_append(List *appendplans, List *tlist, List *partitioned_rels)
{
Append *node = makeNode(Append);
Plan *plan = &node->plan;
@@ -5170,6 +5172,7 @@ make_append(List *appendplans, List *tlist)
plan->qual = NIL;
plan->lefttree = NULL;
plan->righttree = NULL;
+ node->partitioned_rels = partitioned_rels;
node->appendplans = appendplans;
return node;
@@ -6282,7 +6285,7 @@ make_project_set(List *tlist,
static ModifyTable *
make_modifytable(PlannerInfo *root,
CmdType operation, bool canSetTag,
- Index nominalRelation,
+ Index nominalRelation, List *partitioned_rels,
List *resultRelations, List *subplans,
List *withCheckOptionLists, List *returningLists,
List *rowMarks, OnConflictExpr *onconflict, int epqParam)
@@ -6308,6 +6311,7 @@ make_modifytable(PlannerInfo *root,
node->operation = operation;
node->canSetTag = canSetTag;
node->nominalRelation = nominalRelation;
+ node->partitioned_rels = partitioned_rels;
node->resultRelations = resultRelations;
node->resultRelIndex = -1; /* will be set correctly in setrefs.c */
node->plans = subplans;
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 02286d9c52..cbdea1f537 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -212,6 +212,7 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
glob->finalrtable = NIL;
glob->finalrowmarks = NIL;
glob->resultRelations = NIL;
+ glob->nonleafResultRelations = NIL;
glob->relationOids = NIL;
glob->invalItems = NIL;
glob->nParamExec = 0;
@@ -380,6 +381,7 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
Assert(glob->finalrtable == NIL);
Assert(glob->finalrowmarks == NIL);
Assert(glob->resultRelations == NIL);
+ Assert(glob->nonleafResultRelations == NIL);
top_plan = set_plan_references(root, top_plan);
/* ... and the subplans (both regular subplans and initplans) */
Assert(list_length(glob->subplans) == list_length(glob->subroots));
@@ -405,6 +407,7 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
result->planTree = top_plan;
result->rtable = glob->finalrtable;
result->resultRelations = glob->resultRelations;
+ result->nonleafResultRelations = glob->nonleafResultRelations;
result->subplans = glob->subplans;
result->rewindPlanIDs = glob->rewindPlanIDs;
result->rowMarks = glob->finalrowmarks;
@@ -474,6 +477,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
root->multiexpr_params = NIL;
root->eq_classes = NIL;
root->append_rel_list = NIL;
+ root->pcinfo_list = NIL;
root->rowMarks = NIL;
memset(root->upper_rels, 0, sizeof(root->upper_rels));
memset(root->upper_targets, 0, sizeof(root->upper_targets));
@@ -1007,6 +1011,8 @@ inheritance_planner(PlannerInfo *root)
RelOptInfo *final_rel;
ListCell *lc;
Index rti;
+ RangeTblEntry *parent_rte;
+ List *partitioned_rels = NIL;
Assert(parse->commandType != CMD_INSERT);
@@ -1065,13 +1071,24 @@ inheritance_planner(PlannerInfo *root)
}
/*
+ * If the parent RTE is a partitioned table, we should use that as the
+ * nominal relation, because the RTEs added for partitioned tables
+ * (including the root parent) as child members of the inheritance set
+ * do not appear anywhere else in the plan. The situation is exactly
+ * the opposite in the case of non-partitioned inheritance parent as
+ * described below.
+ */
+ parent_rte = rt_fetch(parentRTindex, root->parse->rtable);
+ if (parent_rte->relkind == RELKIND_PARTITIONED_TABLE)
+ nominalRelation = parentRTindex;
+
+ /*
* And now we can get on with generating a plan for each child table.
*/
foreach(lc, root->append_rel_list)
{
AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc);
PlannerInfo *subroot;
- RangeTblEntry *parent_rte;
RangeTblEntry *child_rte;
RelOptInfo *sub_final_rel;
Path *subpath;
@@ -1216,15 +1233,25 @@ inheritance_planner(PlannerInfo *root)
grouping_planner(subroot, true, 0.0 /* retrieve all tuples */ );
/*
- * We'll use the first child relation (even if it's excluded) as the
- * nominal target relation of the ModifyTable node. Because of the
- * way expand_inherited_rtentry works, this should always be the RTE
- * representing the parent table in its role as a simple member of the
- * inheritance set. (It would be logically cleaner to use the
- * inheritance parent RTE as the nominal target; but since that RTE
- * will not be otherwise referenced in the plan, doing so would give
- * rise to confusing use of multiple aliases in EXPLAIN output for
- * what the user will think is the "same" table.)
+ * Set the nomimal target relation of the ModifyTable node if not
+ * already done. We use the inheritance parent RTE as the nominal
+ * target relation if it's a partitioned table (see just above this
+ * loop). In the non-partitioned parent case, we'll use the first
+ * child relation (even if it's excluded) as the nominal target
+ * relation. Because of the way expand_inherited_rtentry works, the
+ * latter should be the RTE representing the parent table in its role
+ * as a simple member of the inheritance set.
+ *
+ * It would be logically cleaner to *always* use the inheritance
+ * parent RTE as the nominal relation; but that RTE is not otherwise
+ * referenced in the plan in the non-partitioned inheritance case.
+ * Instead the duplicate child RTE created by expand_inherited_rtentry
+ * is used elsewhere in the plan, so using the original parent RTE
+ * would give rise to confusing use of multiple aliases in EXPLAIN
+ * output for what the user will think is the "same" table. OTOH,
+ * it's not a problem in the partitioned inheritance case, because
+ * the duplicate child RTE added for the parent does not appear
+ * anywhere else in the plan tree.
*/
if (nominalRelation < 0)
nominalRelation = appinfo->child_relid;
@@ -1298,6 +1325,13 @@ inheritance_planner(PlannerInfo *root)
Assert(!parse->onConflict);
}
+ if (parent_rte->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ partitioned_rels = get_partitioned_child_rels(root, parentRTindex);
+ /* The root partitioned table is included as a child rel */
+ Assert(list_length(partitioned_rels) >= 1);
+ }
+
/* Result path must go into outer query's FINAL upperrel */
final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
@@ -1351,6 +1385,7 @@ inheritance_planner(PlannerInfo *root)
parse->commandType,
parse->canSetTag,
nominalRelation,
+ partitioned_rels,
resultRelations,
subpaths,
subroots,
@@ -2046,6 +2081,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
parse->commandType,
parse->canSetTag,
parse->resultRelation,
+ NIL,
list_make1_int(parse->resultRelation),
list_make1(path),
list_make1(root),
@@ -3348,7 +3384,8 @@ create_grouping_paths(PlannerInfo *root,
create_append_path(grouped_rel,
paths,
NULL,
- 0);
+ 0,
+ NIL);
path->pathtarget = target;
}
else
@@ -5470,3 +5507,33 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid)
return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost);
}
+
+/*
+ * get_partitioned_child_rels
+ * Returns a list of the RT indexes of the partitioned child relations
+ * with rti as the root parent RT index.
+ *
+ * Note: Only call this function on RTEs known to be partitioned tables.
+ */
+List *
+get_partitioned_child_rels(PlannerInfo *root, Index rti)
+{
+ List *result = NIL;
+ ListCell *l;
+
+ foreach(l, root->pcinfo_list)
+ {
+ PartitionedChildRelInfo *pc = lfirst(l);
+
+ if (pc->parent_relid == rti)
+ {
+ result = pc->child_rels;
+ break;
+ }
+ }
+
+ /* The root partitioned table is included as a child rel */
+ Assert(list_length(result) >= 1);
+
+ return result;
+}
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 5f3027e96f..5930747eba 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -835,6 +835,10 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
splan->nominalRelation += rtoffset;
splan->exclRelRTI += rtoffset;
+ foreach(l, splan->partitioned_rels)
+ {
+ lfirst_int(l) += rtoffset;
+ }
foreach(l, splan->resultRelations)
{
lfirst_int(l) += rtoffset;
@@ -863,6 +867,15 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
root->glob->resultRelations =
list_concat(root->glob->resultRelations,
list_copy(splan->resultRelations));
+
+ /*
+ * If the main target relation is a partitioned table, the
+ * following list contains the RT indexes of partitioned child
+ * relations, which are not included in the above list.
+ */
+ root->glob->nonleafResultRelations =
+ list_concat(root->glob->nonleafResultRelations,
+ list_copy(splan->partitioned_rels));
}
break;
case T_Append:
@@ -875,6 +888,10 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
*/
set_dummy_tlist_references(plan, rtoffset);
Assert(splan->plan.qual == NIL);
+ foreach(l, splan->partitioned_rels)
+ {
+ lfirst_int(l) += rtoffset;
+ }
foreach(l, splan->appendplans)
{
lfirst(l) = set_plan_refs(root,
@@ -893,6 +910,10 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
*/
set_dummy_tlist_references(plan, rtoffset);
Assert(splan->plan.qual == NIL);
+ foreach(l, splan->partitioned_rels)
+ {
+ lfirst_int(l) += rtoffset;
+ }
foreach(l, splan->mergeplans)
{
lfirst(l) = set_plan_refs(root,
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 1389db18ba..d88738ec7c 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -566,7 +566,7 @@ generate_union_path(SetOperationStmt *op, PlannerInfo *root,
/*
* Append the child results together.
*/
- path = (Path *) create_append_path(result_rel, pathlist, NULL, 0);
+ path = (Path *) create_append_path(result_rel, pathlist, NULL, 0, NIL);
/* We have to manually jam the right tlist into the path; ick */
path->pathtarget = create_pathtarget(root, tlist);
@@ -678,7 +678,7 @@ generate_nonunion_path(SetOperationStmt *op, PlannerInfo *root,
/*
* Append the child results together.
*/
- path = (Path *) create_append_path(result_rel, pathlist, NULL, 0);
+ path = (Path *) create_append_path(result_rel, pathlist, NULL, 0, NIL);
/* We have to manually jam the right tlist into the path; ick */
path->pathtarget = create_pathtarget(root, tlist);
@@ -1364,6 +1364,9 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
List *inhOIDs;
List *appinfos;
ListCell *l;
+ bool need_append;
+ PartitionedChildRelInfo *pcinfo;
+ List *partitioned_child_rels = NIL;
/* Does RT entry allow inheritance? */
if (!rte->inh)
@@ -1435,6 +1438,7 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
/* Scan the inheritance set and expand it */
appinfos = NIL;
+ need_append = false;
foreach(l, inhOIDs)
{
Oid childOID = lfirst_oid(l);
@@ -1483,36 +1487,46 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
childRTindex = list_length(parse->rtable);
/*
- * Build an AppendRelInfo for this parent and child.
+ * Build an AppendRelInfo for this parent and child, unless the child
+ * is a partitioned table.
*/
- appinfo = makeNode(AppendRelInfo);
- appinfo->parent_relid = rti;
- appinfo->child_relid = childRTindex;
- appinfo->parent_reltype = oldrelation->rd_rel->reltype;
- appinfo->child_reltype = newrelation->rd_rel->reltype;
- make_inh_translation_list(oldrelation, newrelation, childRTindex,
- &appinfo->translated_vars);
- appinfo->parent_reloid = parentOID;
- appinfos = lappend(appinfos, appinfo);
-
- /*
- * Translate the column permissions bitmaps to the child's attnums (we
- * have to build the translated_vars list before we can do this). But
- * if this is the parent table, leave copyObject's result alone.
- *
- * Note: we need to do this even though the executor won't run any
- * permissions checks on the child RTE. The insertedCols/updatedCols
- * bitmaps may be examined for trigger-firing purposes.
- */
- if (childOID != parentOID)
+ if (childrte->relkind != RELKIND_PARTITIONED_TABLE)
{
- childrte->selectedCols = translate_col_privs(rte->selectedCols,
+ need_append = true;
+ appinfo = makeNode(AppendRelInfo);
+ appinfo->parent_relid = rti;
+ appinfo->child_relid = childRTindex;
+ appinfo->parent_reltype = oldrelation->rd_rel->reltype;
+ appinfo->child_reltype = newrelation->rd_rel->reltype;
+ make_inh_translation_list(oldrelation, newrelation, childRTindex,
+ &appinfo->translated_vars);
+ appinfo->parent_reloid = parentOID;
+ appinfos = lappend(appinfos, appinfo);
+
+ /*
+ * Translate the column permissions bitmaps to the child's attnums
+ * (we have to build the translated_vars list before we can do
+ * this). But if this is the parent table, leave copyObject's
+ * result alone.
+ *
+ * Note: we need to do this even though the executor won't run any
+ * permissions checks on the child RTE. The
+ * insertedCols/updatedCols bitmaps may be examined for
+ * trigger-firing purposes.
+ */
+ if (childOID != parentOID)
+ {
+ childrte->selectedCols = translate_col_privs(rte->selectedCols,
appinfo->translated_vars);
- childrte->insertedCols = translate_col_privs(rte->insertedCols,
+ childrte->insertedCols = translate_col_privs(rte->insertedCols,
appinfo->translated_vars);
- childrte->updatedCols = translate_col_privs(rte->updatedCols,
+ childrte->updatedCols = translate_col_privs(rte->updatedCols,
appinfo->translated_vars);
+ }
}
+ else
+ partitioned_child_rels = lappend_int(partitioned_child_rels,
+ childRTindex);
/*
* Build a PlanRowMark if parent is marked FOR UPDATE/SHARE.
@@ -1529,7 +1543,13 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
newrc->allMarkTypes = (1 << newrc->markType);
newrc->strength = oldrc->strength;
newrc->waitPolicy = oldrc->waitPolicy;
- newrc->isParent = false;
+
+ /*
+ * We mark RowMarks for partitioned child tables as parent RowMarks
+ * so that the executor ignores them (except their existence means
+ * that the child tables be locked using appropriate mode).
+ */
+ newrc->isParent = (childrte->relkind == RELKIND_PARTITIONED_TABLE);
/* Include child's rowmark type in parent's allMarkTypes */
oldrc->allMarkTypes |= newrc->allMarkTypes;
@@ -1545,18 +1565,37 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
heap_close(oldrelation, NoLock);
/*
- * If all the children were temp tables, pretend it's a non-inheritance
- * situation. The duplicate RTE we added for the parent table is
- * harmless, so we don't bother to get rid of it; ditto for the useless
- * PlanRowMark node.
+ * If all the children were temp tables or a partitioned parent did not
+ * have any leaf partitions, pretend it's a non-inheritance situation; we
+ * don't need Append node in that case. The duplicate RTE we added for
+ * the parent table is harmless, so we don't bother to get rid of it;
+ * ditto for the useless PlanRowMark node.
*/
- if (list_length(appinfos) < 2)
+ if (!need_append)
{
/* Clear flag before returning */
rte->inh = false;
return;
}
+ /*
+ * We keep a list of objects in root, each of which maps a partitioned
+ * parent RT index to the list of RT indexes of its partitioned child
+ * tables. When creating an Append or a ModifyTable path for the parent,
+ * we copy the child RT index list verbatim to the path so that it could
+ * be carried over to the executor so that the latter could identify
+ * the partitioned child tables.
+ */
+ if (partitioned_child_rels != NIL)
+ {
+ pcinfo = makeNode(PartitionedChildRelInfo);
+
+ Assert(rte->relkind == RELKIND_PARTITIONED_TABLE);
+ pcinfo->parent_relid = rti;
+ pcinfo->child_rels = partitioned_child_rels;
+ root->pcinfo_list = lappend(root->pcinfo_list, pcinfo);
+ }
+
/* Otherwise, OK to add to root->append_rel_list */
root->append_rel_list = list_concat(root->append_rel_list, appinfos);
}
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 8ce772d274..fca96eb001 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1201,7 +1201,7 @@ create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals,
*/
AppendPath *
create_append_path(RelOptInfo *rel, List *subpaths, Relids required_outer,
- int parallel_workers)
+ int parallel_workers, List *partitioned_rels)
{
AppendPath *pathnode = makeNode(AppendPath);
ListCell *l;
@@ -1216,6 +1216,7 @@ create_append_path(RelOptInfo *rel, List *subpaths, Relids required_outer,
pathnode->path.parallel_workers = parallel_workers;
pathnode->path.pathkeys = NIL; /* result is always considered
* unsorted */
+ pathnode->partitioned_rels = partitioned_rels;
pathnode->subpaths = subpaths;
/*
@@ -1258,7 +1259,8 @@ create_merge_append_path(PlannerInfo *root,
RelOptInfo *rel,
List *subpaths,
List *pathkeys,
- Relids required_outer)
+ Relids required_outer,
+ List *partitioned_rels)
{
MergeAppendPath *pathnode = makeNode(MergeAppendPath);
Cost input_startup_cost;
@@ -1274,6 +1276,7 @@ create_merge_append_path(PlannerInfo *root,
pathnode->path.parallel_safe = rel->consider_parallel;
pathnode->path.parallel_workers = 0;
pathnode->path.pathkeys = pathkeys;
+ pathnode->partitioned_rels = partitioned_rels;
pathnode->subpaths = subpaths;
/*
@@ -3105,7 +3108,7 @@ create_lockrows_path(PlannerInfo *root, RelOptInfo *rel,
ModifyTablePath *
create_modifytable_path(PlannerInfo *root, RelOptInfo *rel,
CmdType operation, bool canSetTag,
- Index nominalRelation,
+ Index nominalRelation, List *partitioned_rels,
List *resultRelations, List *subpaths,
List *subroots,
List *withCheckOptionLists, List *returningLists,
@@ -3172,6 +3175,7 @@ create_modifytable_path(PlannerInfo *root, RelOptInfo *rel,
pathnode->operation = operation;
pathnode->canSetTag = canSetTag;
pathnode->nominalRelation = nominalRelation;
+ pathnode->partitioned_rels = partitioned_rels;
pathnode->resultRelations = resultRelations;
pathnode->subpaths = subpaths;
pathnode->subroots = subroots;
diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c
index fa02374811..d284ab7d3d 100644
--- a/src/backend/utils/cache/plancache.c
+++ b/src/backend/utils/cache/plancache.c
@@ -1514,7 +1514,8 @@ AcquireExecutorLocks(List *stmt_list, bool acquire)
* fail if it's been dropped entirely --- we'll just transiently
* acquire a non-conflicting lock.
*/
- if (list_member_int(plannedstmt->resultRelations, rt_index))
+ if (list_member_int(plannedstmt->resultRelations, rt_index) ||
+ list_member_int(plannedstmt->nonleafResultRelations, rt_index))
lockmode = RowExclusiveLock;
else if ((rc = get_plan_rowmark(plannedstmt->rowMarks, rt_index)) != NULL &&
RowMarkRequiresRowShareLock(rc->markType))
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 02dbe7b228..e64d6fb93f 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -375,6 +375,7 @@ extern void RegisterExprContextCallback(ExprContext *econtext,
extern void UnregisterExprContextCallback(ExprContext *econtext,
ExprContextCallbackFunction function,
Datum arg);
+extern void ExecLockNonLeafAppendTables(List *partitioned_rels, EState *estate);
/*
* prototypes from functions in execIndexing.c
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 2bc7a5df11..2cbd6d77b8 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -275,6 +275,7 @@ typedef enum NodeTag
T_PlaceHolderVar,
T_SpecialJoinInfo,
T_AppendRelInfo,
+ T_PartitionedChildRelInfo,
T_PlaceHolderInfo,
T_MinMaxAggInfo,
T_PlannerParamItem,
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index b880dc16cf..4a95e16b69 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -65,6 +65,9 @@ typedef struct PlannedStmt
/* rtable indexes of target relations for INSERT/UPDATE/DELETE */
List *resultRelations; /* integer list of RT indexes, or NIL */
+ /* rtable indexes of non-leaf target relations for INSERT/UPDATE/DELETE */
+ List *nonleafResultRelations;
+
List *subplans; /* Plan trees for SubPlan expressions */
Bitmapset *rewindPlanIDs; /* indices of subplans that require REWIND */
@@ -202,6 +205,8 @@ typedef struct ModifyTable
CmdType operation; /* INSERT, UPDATE, or DELETE */
bool canSetTag; /* do we set the command tag/es_processed? */
Index nominalRelation; /* Parent RT index for use of EXPLAIN */
+ /* RT indexes of non-leaf tables in a partition tree */
+ List *partitioned_rels;
List *resultRelations; /* integer list of RT indexes */
int resultRelIndex; /* index of first resultRel in plan's list */
List *plans; /* plan(s) producing source data */
@@ -227,6 +232,8 @@ typedef struct ModifyTable
typedef struct Append
{
Plan plan;
+ /* RT indexes of non-leaf tables in a partition tree */
+ List *partitioned_rels;
List *appendplans;
} Append;
@@ -238,6 +245,8 @@ typedef struct Append
typedef struct MergeAppend
{
Plan plan;
+ /* RT indexes of non-leaf tables in a partition tree */
+ List *partitioned_rels;
List *mergeplans;
/* remaining fields are just like the sort-key info in struct Sort */
int numCols; /* number of sort-key columns */
@@ -937,11 +946,12 @@ typedef enum RowMarkType
* When the planner discovers that a relation is the root of an inheritance
* tree, it sets isParent true, and adds an additional PlanRowMark to the
* list for each child relation (including the target rel itself in its role
- * as a child). The child entries have rti == child rel's RT index and
- * prti == parent's RT index, and can therefore be recognized as children by
- * the fact that prti != rti. The parent's allMarkTypes field gets the OR
- * of (1<<markType) across all its children (this definition allows children
- * to use different markTypes).
+ * as a child). isParent is also set to true for the partitioned child
+ * relations, which are not scanned just like the root parent. The child
+ * entries have rti == child rel's RT index and prti == parent's RT index,
+ * and can therefore be recognized as children by the fact that prti != rti.
+ * The parent's allMarkTypes field gets the OR of (1<<markType) across all
+ * its children (this definition allows children to use different markTypes).
*
* The planner also adds resjunk output columns to the plan that carry
* information sufficient to identify the locked or fetched rows. When
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 05d6f07aea..1c88a79a21 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -107,6 +107,8 @@ typedef struct PlannerGlobal
List *resultRelations; /* "flat" list of integer RT indexes */
+ List *nonleafResultRelations; /* "flat" list of integer RT indexes */
+
List *relationOids; /* OIDs of relations the plan depends on */
List *invalItems; /* other dependencies, as PlanInvalItems */
@@ -248,6 +250,8 @@ typedef struct PlannerInfo
List *append_rel_list; /* list of AppendRelInfos */
+ List *pcinfo_list; /* list of PartitionedChildRelInfos */
+
List *rowMarks; /* list of PlanRowMarks */
List *placeholder_list; /* list of PlaceHolderInfos */
@@ -1116,6 +1120,8 @@ typedef struct CustomPath
typedef struct AppendPath
{
Path path;
+ /* RT indexes of non-leaf tables in a partition tree */
+ List *partitioned_rels;
List *subpaths; /* list of component Paths */
} AppendPath;
@@ -1134,6 +1140,8 @@ typedef struct AppendPath
typedef struct MergeAppendPath
{
Path path;
+ /* RT indexes of non-leaf tables in a partition tree */
+ List *partitioned_rels;
List *subpaths; /* list of component Paths */
double limit_tuples; /* hard limit on output tuples, or -1 */
} MergeAppendPath;
@@ -1482,6 +1490,8 @@ typedef struct ModifyTablePath
CmdType operation; /* INSERT, UPDATE, or DELETE */
bool canSetTag; /* do we set the command tag/es_processed? */
Index nominalRelation; /* Parent RT index for use of EXPLAIN */
+ /* RT indexes of non-leaf tables in a partition tree */
+ List *partitioned_rels;
List *resultRelations; /* integer list of RT indexes */
List *subpaths; /* Path(s) producing source data */
List *subroots; /* per-target-table PlannerInfos */
@@ -1836,10 +1846,10 @@ typedef struct SpecialJoinInfo
*
* When we expand an inheritable table or a UNION-ALL subselect into an
* "append relation" (essentially, a list of child RTEs), we build an
- * AppendRelInfo for each child RTE. The list of AppendRelInfos indicates
- * which child RTEs must be included when expanding the parent, and each
- * node carries information needed to translate Vars referencing the parent
- * into Vars referencing that child.
+ * AppendRelInfo for each non-partitioned child RTE. The list of
+ * AppendRelInfos indicates which child RTEs must be included when expanding
+ * the parent, and each node carries information needed to translate Vars
+ * referencing the parent into Vars referencing that child.
*
* These structs are kept in the PlannerInfo node's append_rel_list.
* Note that we just throw all the structs into one list, and scan the
@@ -1914,6 +1924,25 @@ typedef struct AppendRelInfo
} AppendRelInfo;
/*
+ * For a partitioned table, this maps its RT index to the list of RT indexes
+ * of the partitioned child tables in the partition tree. We need to
+ * separately store this information, because we do not create AppendRelInfos
+ * for the partitioned child tables of a parent table, since AppendRelInfos
+ * contain information that is unnecessary for the partitioned child tables.
+ * The child_rels list must contain at least one element, because the parent
+ * partitioned table is itself counted as a child.
+ *
+ * These structs are kept in the PlannerInfo node's pcinfo_list.
+ */
+typedef struct PartitionedChildRelInfo
+{
+ NodeTag type;
+
+ Index parent_relid;
+ List *child_rels;
+} PartitionedChildRelInfo;
+
+/*
* For each distinct placeholder expression generated during planning, we
* store a PlaceHolderInfo node in the PlannerInfo node's placeholder_list.
* This stores info that is needed centrally rather than in each copy of the
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 373c7221a8..81640de7ab 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -64,12 +64,14 @@ extern BitmapOrPath *create_bitmap_or_path(PlannerInfo *root,
extern TidPath *create_tidscan_path(PlannerInfo *root, RelOptInfo *rel,
List *tidquals, Relids required_outer);
extern AppendPath *create_append_path(RelOptInfo *rel, List *subpaths,
- Relids required_outer, int parallel_workers);
+ Relids required_outer, int parallel_workers,
+ List *partitioned_rels);
extern MergeAppendPath *create_merge_append_path(PlannerInfo *root,
RelOptInfo *rel,
List *subpaths,
List *pathkeys,
- Relids required_outer);
+ Relids required_outer,
+ List *partitioned_rels);
extern ResultPath *create_result_path(PlannerInfo *root, RelOptInfo *rel,
PathTarget *target, List *resconstantqual);
extern MaterialPath *create_material_path(RelOptInfo *rel, Path *subpath);
@@ -232,7 +234,7 @@ extern LockRowsPath *create_lockrows_path(PlannerInfo *root, RelOptInfo *rel,
extern ModifyTablePath *create_modifytable_path(PlannerInfo *root,
RelOptInfo *rel,
CmdType operation, bool canSetTag,
- Index nominalRelation,
+ Index nominalRelation, List *partitioned_rels,
List *resultRelations, List *subpaths,
List *subroots,
List *withCheckOptionLists, List *returningLists,
diff --git a/src/include/optimizer/planner.h b/src/include/optimizer/planner.h
index 6922933e36..f3aaa23975 100644
--- a/src/include/optimizer/planner.h
+++ b/src/include/optimizer/planner.h
@@ -57,4 +57,6 @@ extern Expr *preprocess_phv_expression(PlannerInfo *root, Expr *expr);
extern bool plan_cluster_use_sort(Oid tableOid, Oid indexOid);
+extern List *get_partitioned_child_rels(PlannerInfo *root, Index rti);
+
#endif /* PLANNER_H */
diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out
index 6494b205c4..6163ed8117 100644
--- a/src/test/regress/expected/inherit.out
+++ b/src/test/regress/expected/inherit.out
@@ -588,6 +588,45 @@ select tableoid::regclass::text as relname, bar.* from bar order by 1,2;
bar2 | 4 | 104
(8 rows)
+-- Check UPDATE with *partitioned* inherited target and an appendrel subquery
+create table some_tab (a int);
+insert into some_tab values (0);
+create table some_tab_child () inherits (some_tab);
+insert into some_tab_child values (1);
+create table parted_tab (a int, b char) partition by list (a);
+create table parted_tab_part1 partition of parted_tab for values in (1);
+create table parted_tab_part2 partition of parted_tab for values in (2);
+create table parted_tab_part3 partition of parted_tab for values in (3);
+insert into parted_tab values (1, 'a'), (2, 'a'), (3, 'a');
+update parted_tab set b = 'b'
+from
+ (select a from some_tab union all select a+1 from some_tab) ss (a)
+where parted_tab.a = ss.a;
+select tableoid::regclass::text as relname, parted_tab.* from parted_tab order by 1,2;
+ relname | a | b
+------------------+---+---
+ parted_tab_part1 | 1 | b
+ parted_tab_part2 | 2 | b
+ parted_tab_part3 | 3 | a
+(3 rows)
+
+truncate parted_tab;
+insert into parted_tab values (1, 'a'), (2, 'a'), (3, 'a');
+update parted_tab set b = 'b'
+from
+ (select 0 from parted_tab union all select 1 from parted_tab) ss (a)
+where parted_tab.a = ss.a;
+select tableoid::regclass::text as relname, parted_tab.* from parted_tab order by 1,2;
+ relname | a | b
+------------------+---+---
+ parted_tab_part1 | 1 | b
+ parted_tab_part2 | 2 | a
+ parted_tab_part3 | 3 | a
+(3 rows)
+
+drop table parted_tab;
+drop table some_tab cascade;
+NOTICE: drop cascades to table some_tab_child
/* Test multiple inheritance of column defaults */
CREATE TABLE firstparent (tomorrow date default now()::date + 1);
CREATE TABLE secondparent (tomorrow date default now() :: date + 1);
@@ -1611,71 +1650,60 @@ explain (costs off) select * from list_parted;
QUERY PLAN
--------------------------------
Append
- -> Seq Scan on list_parted
-> Seq Scan on part_ab_cd
-> Seq Scan on part_ef_gh
-> Seq Scan on part_null_xy
-(5 rows)
+(4 rows)
explain (costs off) select * from list_parted where a is null;
QUERY PLAN
--------------------------------
Append
- -> Seq Scan on list_parted
- Filter: (a IS NULL)
-> Seq Scan on part_null_xy
Filter: (a IS NULL)
-(5 rows)
+(3 rows)
explain (costs off) select * from list_parted where a is not null;
QUERY PLAN
---------------------------------
Append
- -> Seq Scan on list_parted
- Filter: (a IS NOT NULL)
-> Seq Scan on part_ab_cd
Filter: (a IS NOT NULL)
-> Seq Scan on part_ef_gh
Filter: (a IS NOT NULL)
-> Seq Scan on part_null_xy
Filter: (a IS NOT NULL)
-(9 rows)
+(7 rows)
explain (costs off) select * from list_parted where a in ('ab', 'cd', 'ef');
QUERY PLAN
----------------------------------------------------------
Append
- -> Seq Scan on list_parted
- Filter: ((a)::text = ANY ('{ab,cd,ef}'::text[]))
-> Seq Scan on part_ab_cd
Filter: ((a)::text = ANY ('{ab,cd,ef}'::text[]))
-> Seq Scan on part_ef_gh
Filter: ((a)::text = ANY ('{ab,cd,ef}'::text[]))
-(7 rows)
+(5 rows)
explain (costs off) select * from list_parted where a = 'ab' or a in (null, 'cd');
QUERY PLAN
---------------------------------------------------------------------------------------
Append
- -> Seq Scan on list_parted
- Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[])))
-> Seq Scan on part_ab_cd
Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[])))
-> Seq Scan on part_ef_gh
Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[])))
-> Seq Scan on part_null_xy
Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[])))
-(9 rows)
+(7 rows)
explain (costs off) select * from list_parted where a = 'ab';
QUERY PLAN
------------------------------------------
Append
- -> Seq Scan on list_parted
- Filter: ((a)::text = 'ab'::text)
-> Seq Scan on part_ab_cd
Filter: ((a)::text = 'ab'::text)
-(5 rows)
+(3 rows)
create table range_list_parted (
a int,
@@ -1695,14 +1723,9 @@ create table part_40_inf_ab partition of part_40_inf for values in ('ab');
create table part_40_inf_cd partition of part_40_inf for values in ('cd');
create table part_40_inf_null partition of part_40_inf for values in (null);
explain (costs off) select * from range_list_parted;
- QUERY PLAN
--------------------------------------
+ QUERY PLAN
+------------------------------------
Append
- -> Seq Scan on range_list_parted
- -> Seq Scan on part_1_10
- -> Seq Scan on part_10_20
- -> Seq Scan on part_21_30
- -> Seq Scan on part_40_inf
-> Seq Scan on part_1_10_ab
-> Seq Scan on part_1_10_cd
-> Seq Scan on part_10_20_ab
@@ -1712,36 +1735,22 @@ explain (costs off) select * from range_list_parted;
-> Seq Scan on part_40_inf_ab
-> Seq Scan on part_40_inf_cd
-> Seq Scan on part_40_inf_null
-(15 rows)
+(10 rows)
explain (costs off) select * from range_list_parted where a = 5;
- QUERY PLAN
--------------------------------------
+ QUERY PLAN
+--------------------------------
Append
- -> Seq Scan on range_list_parted
- Filter: (a = 5)
- -> Seq Scan on part_1_10
- Filter: (a = 5)
-> Seq Scan on part_1_10_ab
Filter: (a = 5)
-> Seq Scan on part_1_10_cd
Filter: (a = 5)
-(9 rows)
+(5 rows)
explain (costs off) select * from range_list_parted where b = 'ab';
- QUERY PLAN
--------------------------------------
+ QUERY PLAN
+------------------------------------
Append
- -> Seq Scan on range_list_parted
- Filter: (b = 'ab'::bpchar)
- -> Seq Scan on part_1_10
- Filter: (b = 'ab'::bpchar)
- -> Seq Scan on part_10_20
- Filter: (b = 'ab'::bpchar)
- -> Seq Scan on part_21_30
- Filter: (b = 'ab'::bpchar)
- -> Seq Scan on part_40_inf
- Filter: (b = 'ab'::bpchar)
-> Seq Scan on part_1_10_ab
Filter: (b = 'ab'::bpchar)
-> Seq Scan on part_10_20_ab
@@ -1750,27 +1759,19 @@ explain (costs off) select * from range_list_parted where b = 'ab';
Filter: (b = 'ab'::bpchar)
-> Seq Scan on part_40_inf_ab
Filter: (b = 'ab'::bpchar)
-(19 rows)
+(9 rows)
explain (costs off) select * from range_list_parted where a between 3 and 23 and b in ('ab');
QUERY PLAN
-----------------------------------------------------------------
Append
- -> Seq Scan on range_list_parted
- Filter: ((a >= 3) AND (a <= 23) AND (b = 'ab'::bpchar))
- -> Seq Scan on part_1_10
- Filter: ((a >= 3) AND (a <= 23) AND (b = 'ab'::bpchar))
- -> Seq Scan on part_10_20
- Filter: ((a >= 3) AND (a <= 23) AND (b = 'ab'::bpchar))
- -> Seq Scan on part_21_30
- Filter: ((a >= 3) AND (a <= 23) AND (b = 'ab'::bpchar))
-> Seq Scan on part_1_10_ab
Filter: ((a >= 3) AND (a <= 23) AND (b = 'ab'::bpchar))
-> Seq Scan on part_10_20_ab
Filter: ((a >= 3) AND (a <= 23) AND (b = 'ab'::bpchar))
-> Seq Scan on part_21_30_ab
Filter: ((a >= 3) AND (a <= 23) AND (b = 'ab'::bpchar))
-(15 rows)
+(7 rows)
/* Should select no rows because range partition key cannot be null */
explain (costs off) select * from range_list_parted where a is null;
@@ -1782,37 +1783,17 @@ explain (costs off) select * from range_list_parted where a is null;
/* Should only select rows from the null-accepting partition */
explain (costs off) select * from range_list_parted where b is null;
- QUERY PLAN
--------------------------------------
+ QUERY PLAN
+------------------------------------
Append
- -> Seq Scan on range_list_parted
- Filter: (b IS NULL)
- -> Seq Scan on part_1_10
- Filter: (b IS NULL)
- -> Seq Scan on part_10_20
- Filter: (b IS NULL)
- -> Seq Scan on part_21_30
- Filter: (b IS NULL)
- -> Seq Scan on part_40_inf
- Filter: (b IS NULL)
-> Seq Scan on part_40_inf_null
Filter: (b IS NULL)
-(13 rows)
+(3 rows)
explain (costs off) select * from range_list_parted where a is not null and a < 67;
QUERY PLAN
------------------------------------------------
Append
- -> Seq Scan on range_list_parted
- Filter: ((a IS NOT NULL) AND (a < 67))
- -> Seq Scan on part_1_10
- Filter: ((a IS NOT NULL) AND (a < 67))
- -> Seq Scan on part_10_20
- Filter: ((a IS NOT NULL) AND (a < 67))
- -> Seq Scan on part_21_30
- Filter: ((a IS NOT NULL) AND (a < 67))
- -> Seq Scan on part_40_inf
- Filter: ((a IS NOT NULL) AND (a < 67))
-> Seq Scan on part_1_10_ab
Filter: ((a IS NOT NULL) AND (a < 67))
-> Seq Scan on part_1_10_cd
@@ -1831,23 +1812,19 @@ explain (costs off) select * from range_list_parted where a is not null and a <
Filter: ((a IS NOT NULL) AND (a < 67))
-> Seq Scan on part_40_inf_null
Filter: ((a IS NOT NULL) AND (a < 67))
-(29 rows)
+(19 rows)
explain (costs off) select * from range_list_parted where a >= 30;
- QUERY PLAN
--------------------------------------
+ QUERY PLAN
+------------------------------------
Append
- -> Seq Scan on range_list_parted
- Filter: (a >= 30)
- -> Seq Scan on part_40_inf
- Filter: (a >= 30)
-> Seq Scan on part_40_inf_ab
Filter: (a >= 30)
-> Seq Scan on part_40_inf_cd
Filter: (a >= 30)
-> Seq Scan on part_40_inf_null
Filter: (a >= 30)
-(11 rows)
+(7 rows)
drop table list_parted;
drop table range_list_parted;
diff --git a/src/test/regress/expected/tablesample.out b/src/test/regress/expected/tablesample.out
index b18e420e9b..d3794140fb 100644
--- a/src/test/regress/expected/tablesample.out
+++ b/src/test/regress/expected/tablesample.out
@@ -322,12 +322,10 @@ explain (costs off)
QUERY PLAN
-------------------------------------------
Append
- -> Sample Scan on parted_sample
- Sampling: bernoulli ('100'::real)
-> Sample Scan on parted_sample_1
Sampling: bernoulli ('100'::real)
-> Sample Scan on parted_sample_2
Sampling: bernoulli ('100'::real)
-(7 rows)
+(5 rows)
drop table parted_sample, parted_sample_1, parted_sample_2;
diff --git a/src/test/regress/sql/inherit.sql b/src/test/regress/sql/inherit.sql
index e3e9e34895..d43b75c4a7 100644
--- a/src/test/regress/sql/inherit.sql
+++ b/src/test/regress/sql/inherit.sql
@@ -128,6 +128,34 @@ where bar.f1 = ss.f1;
select tableoid::regclass::text as relname, bar.* from bar order by 1,2;
+-- Check UPDATE with *partitioned* inherited target and an appendrel subquery
+create table some_tab (a int);
+insert into some_tab values (0);
+create table some_tab_child () inherits (some_tab);
+insert into some_tab_child values (1);
+create table parted_tab (a int, b char) partition by list (a);
+create table parted_tab_part1 partition of parted_tab for values in (1);
+create table parted_tab_part2 partition of parted_tab for values in (2);
+create table parted_tab_part3 partition of parted_tab for values in (3);
+insert into parted_tab values (1, 'a'), (2, 'a'), (3, 'a');
+
+update parted_tab set b = 'b'
+from
+ (select a from some_tab union all select a+1 from some_tab) ss (a)
+where parted_tab.a = ss.a;
+select tableoid::regclass::text as relname, parted_tab.* from parted_tab order by 1,2;
+
+truncate parted_tab;
+insert into parted_tab values (1, 'a'), (2, 'a'), (3, 'a');
+update parted_tab set b = 'b'
+from
+ (select 0 from parted_tab union all select 1 from parted_tab) ss (a)
+where parted_tab.a = ss.a;
+select tableoid::regclass::text as relname, parted_tab.* from parted_tab order by 1,2;
+
+drop table parted_tab;
+drop table some_tab cascade;
+
/* Test multiple inheritance of column defaults */
CREATE TABLE firstparent (tomorrow date default now()::date + 1);