summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDavid Rowley <drowley@postgresql.org>2023-01-09 17:15:08 +1300
committerDavid Rowley <drowley@postgresql.org>2023-01-09 17:15:08 +1300
commit3c569049b7b502bb4952483d19ce622ff0af5fd6 (patch)
tree7ce433043ae025e93ed62e5a763d1ed01942f0c4 /src
parent216a784829c2c5f03ab0c43e009126cbb819e9b2 (diff)
downloadpostgresql-3c569049b7b502bb4952483d19ce622ff0af5fd6.tar.gz
Allow left join removals and unique joins on partitioned tables
This allows left join removals and unique joins to work with partitioned tables. The planner just lacked sufficient proofs that a given join would not cause any row duplication. Unique indexes currently serve as that proof, so have get_relation_info() populate the indexlist for partitioned tables too. Author: Arne Roland Reviewed-by: Alvaro Herrera, Zhihong Yu, Amit Langote, David Rowley Discussion: https://postgr.es/m/c3b2408b7a39433b8230bbcd02e9f302@index.de
Diffstat (limited to 'src')
-rw-r--r--src/backend/optimizer/util/plancat.c264
-rw-r--r--src/backend/utils/adt/selfuncs.c4
-rw-r--r--src/include/nodes/pathnodes.h10
-rw-r--r--src/test/regress/expected/join.out10
-rw-r--r--src/test/regress/expected/partition_join.out4
-rw-r--r--src/test/regress/sql/join.sql7
-rw-r--r--src/test/regress/sql/partition_join.sql4
7 files changed, 179 insertions, 124 deletions
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 9f158f2421..d58c4a1078 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -109,7 +109,9 @@ static void set_baserel_partition_constraint(Relation relation,
* If inhparent is true, all we need to do is set up the attr arrays:
* the RelOptInfo actually represents the appendrel formed by an inheritance
* tree, and so the parent rel's physical size and index information isn't
- * important for it.
+ * important for it, however, for partitioned tables, we do populate the
+ * indexlist as the planner uses unique indexes as unique proofs for certain
+ * optimizations.
*/
void
get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
@@ -175,10 +177,14 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
/*
* Make list of indexes. Ignore indexes on system catalogs if told to.
- * Don't bother with indexes for an inheritance parent, either.
+ * Don't bother with indexes from traditional inheritance parents. For
+ * partitioned tables, we need a list of at least unique indexes as these
+ * serve as unique proofs for certain planner optimizations. However,
+ * let's not discriminate here and just record all partitioned indexes
+ * whether they're unique indexes or not.
*/
- if (inhparent ||
- (IgnoreSystemIndexes && IsSystemRelation(relation)))
+ if ((inhparent && relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+ || (IgnoreSystemIndexes && IsSystemRelation(relation)))
hasindex = false;
else
hasindex = relation->rd_rel->relhasindex;
@@ -232,16 +238,6 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
}
/*
- * Ignore partitioned indexes, since they are not usable for
- * queries.
- */
- if (indexRelation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
- {
- index_close(indexRelation, NoLock);
- continue;
- }
-
- /*
* If the index is valid, but cannot yet be used, ignore it; but
* mark the plan we are generating as transient. See
* src/backend/access/heap/README.HOT for discussion.
@@ -285,105 +281,129 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
info->relam = indexRelation->rd_rel->relam;
- /* We copy just the fields we need, not all of rd_indam */
- amroutine = indexRelation->rd_indam;
- info->amcanorderbyop = amroutine->amcanorderbyop;
- info->amoptionalkey = amroutine->amoptionalkey;
- info->amsearcharray = amroutine->amsearcharray;
- info->amsearchnulls = amroutine->amsearchnulls;
- info->amcanparallel = amroutine->amcanparallel;
- info->amhasgettuple = (amroutine->amgettuple != NULL);
- info->amhasgetbitmap = amroutine->amgetbitmap != NULL &&
- relation->rd_tableam->scan_bitmap_next_block != NULL;
- info->amcanmarkpos = (amroutine->ammarkpos != NULL &&
- amroutine->amrestrpos != NULL);
- info->amcostestimate = amroutine->amcostestimate;
- Assert(info->amcostestimate != NULL);
-
- /* Fetch index opclass options */
- info->opclassoptions = RelationGetIndexAttOptions(indexRelation, true);
-
/*
- * Fetch the ordering information for the index, if any.
+ * We don't have an AM for partitioned indexes, so we'll just
+ * NULLify the AM related fields for those.
*/
- if (info->relam == BTREE_AM_OID)
+ if (indexRelation->rd_rel->relkind != RELKIND_PARTITIONED_INDEX)
{
+ /* We copy just the fields we need, not all of rd_indam */
+ amroutine = indexRelation->rd_indam;
+ info->amcanorderbyop = amroutine->amcanorderbyop;
+ info->amoptionalkey = amroutine->amoptionalkey;
+ info->amsearcharray = amroutine->amsearcharray;
+ info->amsearchnulls = amroutine->amsearchnulls;
+ info->amcanparallel = amroutine->amcanparallel;
+ info->amhasgettuple = (amroutine->amgettuple != NULL);
+ info->amhasgetbitmap = amroutine->amgetbitmap != NULL &&
+ relation->rd_tableam->scan_bitmap_next_block != NULL;
+ info->amcanmarkpos = (amroutine->ammarkpos != NULL &&
+ amroutine->amrestrpos != NULL);
+ info->amcostestimate = amroutine->amcostestimate;
+ Assert(info->amcostestimate != NULL);
+
+ /* Fetch index opclass options */
+ info->opclassoptions = RelationGetIndexAttOptions(indexRelation, true);
+
/*
- * If it's a btree index, we can use its opfamily OIDs
- * directly as the sort ordering opfamily OIDs.
+ * Fetch the ordering information for the index, if any.
*/
- Assert(amroutine->amcanorder);
-
- info->sortopfamily = info->opfamily;
- info->reverse_sort = (bool *) palloc(sizeof(bool) * nkeycolumns);
- info->nulls_first = (bool *) palloc(sizeof(bool) * nkeycolumns);
-
- for (i = 0; i < nkeycolumns; i++)
+ if (info->relam == BTREE_AM_OID)
{
- int16 opt = indexRelation->rd_indoption[i];
+ /*
+ * If it's a btree index, we can use its opfamily OIDs
+ * directly as the sort ordering opfamily OIDs.
+ */
+ Assert(amroutine->amcanorder);
- info->reverse_sort[i] = (opt & INDOPTION_DESC) != 0;
- info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0;
- }
- }
- else if (amroutine->amcanorder)
- {
- /*
- * Otherwise, identify the corresponding btree opfamilies by
- * trying to map this index's "<" operators into btree. Since
- * "<" uniquely defines the behavior of a sort order, this is
- * a sufficient test.
- *
- * XXX This method is rather slow and also requires the
- * undesirable assumption that the other index AM numbers its
- * strategies the same as btree. It'd be better to have a way
- * to explicitly declare the corresponding btree opfamily for
- * each opfamily of the other index type. But given the lack
- * of current or foreseeable amcanorder index types, it's not
- * worth expending more effort on now.
- */
- info->sortopfamily = (Oid *) palloc(sizeof(Oid) * nkeycolumns);
- info->reverse_sort = (bool *) palloc(sizeof(bool) * nkeycolumns);
- info->nulls_first = (bool *) palloc(sizeof(bool) * nkeycolumns);
+ info->sortopfamily = info->opfamily;
+ info->reverse_sort = (bool *) palloc(sizeof(bool) * nkeycolumns);
+ info->nulls_first = (bool *) palloc(sizeof(bool) * nkeycolumns);
- for (i = 0; i < nkeycolumns; i++)
- {
- int16 opt = indexRelation->rd_indoption[i];
- Oid ltopr;
- Oid btopfamily;
- Oid btopcintype;
- int16 btstrategy;
-
- info->reverse_sort[i] = (opt & INDOPTION_DESC) != 0;
- info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0;
-
- ltopr = get_opfamily_member(info->opfamily[i],
- info->opcintype[i],
- info->opcintype[i],
- BTLessStrategyNumber);
- if (OidIsValid(ltopr) &&
- get_ordering_op_properties(ltopr,
- &btopfamily,
- &btopcintype,
- &btstrategy) &&
- btopcintype == info->opcintype[i] &&
- btstrategy == BTLessStrategyNumber)
+ for (i = 0; i < nkeycolumns; i++)
{
- /* Successful mapping */
- info->sortopfamily[i] = btopfamily;
+ int16 opt = indexRelation->rd_indoption[i];
+
+ info->reverse_sort[i] = (opt & INDOPTION_DESC) != 0;
+ info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0;
}
- else
+ }
+ else if (amroutine->amcanorder)
+ {
+ /*
+ * Otherwise, identify the corresponding btree opfamilies
+ * by trying to map this index's "<" operators into btree.
+ * Since "<" uniquely defines the behavior of a sort
+ * order, this is a sufficient test.
+ *
+ * XXX This method is rather slow and also requires the
+ * undesirable assumption that the other index AM numbers
+ * its strategies the same as btree. It'd be better to
+ * have a way to explicitly declare the corresponding
+ * btree opfamily for each opfamily of the other index
+ * type. But given the lack of current or foreseeable
+ * amcanorder index types, it's not worth expending more
+ * effort on now.
+ */
+ info->sortopfamily = (Oid *) palloc(sizeof(Oid) * nkeycolumns);
+ info->reverse_sort = (bool *) palloc(sizeof(bool) * nkeycolumns);
+ info->nulls_first = (bool *) palloc(sizeof(bool) * nkeycolumns);
+
+ for (i = 0; i < nkeycolumns; i++)
{
- /* Fail ... quietly treat index as unordered */
- info->sortopfamily = NULL;
- info->reverse_sort = NULL;
- info->nulls_first = NULL;
- break;
+ int16 opt = indexRelation->rd_indoption[i];
+ Oid ltopr;
+ Oid btopfamily;
+ Oid btopcintype;
+ int16 btstrategy;
+
+ info->reverse_sort[i] = (opt & INDOPTION_DESC) != 0;
+ info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0;
+
+ ltopr = get_opfamily_member(info->opfamily[i],
+ info->opcintype[i],
+ info->opcintype[i],
+ BTLessStrategyNumber);
+ if (OidIsValid(ltopr) &&
+ get_ordering_op_properties(ltopr,
+ &btopfamily,
+ &btopcintype,
+ &btstrategy) &&
+ btopcintype == info->opcintype[i] &&
+ btstrategy == BTLessStrategyNumber)
+ {
+ /* Successful mapping */
+ info->sortopfamily[i] = btopfamily;
+ }
+ else
+ {
+ /* Fail ... quietly treat index as unordered */
+ info->sortopfamily = NULL;
+ info->reverse_sort = NULL;
+ info->nulls_first = NULL;
+ break;
+ }
}
}
+ else
+ {
+ info->sortopfamily = NULL;
+ info->reverse_sort = NULL;
+ info->nulls_first = NULL;
+ }
}
else
{
+ info->amcanorderbyop = false;
+ info->amoptionalkey = false;
+ info->amsearcharray = false;
+ info->amsearchnulls = false;
+ info->amcanparallel = false;
+ info->amhasgettuple = false;
+ info->amhasgetbitmap = false;
+ info->amcanmarkpos = false;
+ info->amcostestimate = NULL;
+
info->sortopfamily = NULL;
info->reverse_sort = NULL;
info->nulls_first = NULL;
@@ -416,31 +436,45 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
* the number-of-tuples estimate to equal the parent table; if it
* is partial then we have to use the same methods as we would for
* a table, except we can be sure that the index is not larger
- * than the table.
+ * than the table. We must ignore partitioned indexes here as as
+ * there are not physical indexes.
*/
- if (info->indpred == NIL)
+ if (indexRelation->rd_rel->relkind != RELKIND_PARTITIONED_INDEX)
{
- info->pages = RelationGetNumberOfBlocks(indexRelation);
- info->tuples = rel->tuples;
- }
- else
- {
- double allvisfrac; /* dummy */
-
- estimate_rel_size(indexRelation, NULL,
- &info->pages, &info->tuples, &allvisfrac);
- if (info->tuples > rel->tuples)
+ if (info->indpred == NIL)
+ {
+ info->pages = RelationGetNumberOfBlocks(indexRelation);
info->tuples = rel->tuples;
- }
+ }
+ else
+ {
+ double allvisfrac; /* dummy */
- if (info->relam == BTREE_AM_OID)
- {
- /* For btrees, get tree height while we have the index open */
- info->tree_height = _bt_getrootheight(indexRelation);
+ estimate_rel_size(indexRelation, NULL,
+ &info->pages, &info->tuples, &allvisfrac);
+ if (info->tuples > rel->tuples)
+ info->tuples = rel->tuples;
+ }
+
+ if (info->relam == BTREE_AM_OID)
+ {
+ /*
+ * For btrees, get tree height while we have the index
+ * open
+ */
+ info->tree_height = _bt_getrootheight(indexRelation);
+ }
+ else
+ {
+ /* For other index types, just set it to "unknown" for now */
+ info->tree_height = -1;
+ }
}
else
{
- /* For other index types, just set it to "unknown" for now */
+ /* Zero these out for partitioned indexes */
+ info->pages = 0;
+ info->tuples = 0.0;
info->tree_height = -1;
}
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index f50e58adbd..57de51f0db 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -5994,6 +5994,10 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
rte = root->simple_rte_array[rel->relid];
Assert(rte->rtekind == RTE_RELATION);
+ /* ignore partitioned tables. Any indexes here are not real indexes */
+ if (rte->relkind == RELKIND_PARTITIONED_TABLE)
+ return false;
+
/* Search through the indexes to see if any match our problem */
foreach(lc, rel->indexlist)
{
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 1827e50647..c20b7298a3 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -653,7 +653,7 @@ typedef struct PartitionSchemeData *PartitionScheme;
* lateral_referencers - relids of rels that reference this one laterally
* (includes both direct and indirect lateral references)
* indexlist - list of IndexOptInfo nodes for relation's indexes
- * (always NIL if it's not a table)
+ * (always NIL if it's not a table or partitioned table)
* pages - number of disk pages in relation (zero if not a table)
* tuples - number of tuples in relation (not considering restrictions)
* allvisfrac - fraction of disk pages that are marked all-visible
@@ -1097,11 +1097,11 @@ struct IndexOptInfo
Oid *opfamily pg_node_attr(array_size(nkeycolumns));
/* OIDs of opclass declared input data types */
Oid *opcintype pg_node_attr(array_size(nkeycolumns));
- /* OIDs of btree opfamilies, if orderable */
+ /* OIDs of btree opfamilies, if orderable. NULL if partitioned index */
Oid *sortopfamily pg_node_attr(array_size(nkeycolumns));
- /* is sort order descending? */
+ /* is sort order descending? or NULL if partitioned index */
bool *reverse_sort pg_node_attr(array_size(nkeycolumns));
- /* do NULLs come first in the sort order? */
+ /* do NULLs come first in the sort order? or NULL if partitioned index */
bool *nulls_first pg_node_attr(array_size(nkeycolumns));
/* opclass-specific options for columns */
bytea **opclassoptions pg_node_attr(read_write_ignore);
@@ -1139,7 +1139,7 @@ struct IndexOptInfo
/*
* Remaining fields are copied from the index AM's API struct
- * (IndexAmRoutine).
+ * (IndexAmRoutine). These fields are not set for partitioned indexes.
*/
bool amcanorderbyop;
bool amoptionalkey;
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 3ddea3b683..c2b85d2795 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -4860,6 +4860,16 @@ select 1 from (select a.id FROM a left join b on a.b_id = b.id) q,
Filter: (a.id = i)
(4 rows)
+CREATE TEMP TABLE parted_b (id int PRIMARY KEY) partition by range(id);
+CREATE TEMP TABLE parted_b1 partition of parted_b for values from (0) to (10);
+-- test join removals on a partitioned table
+explain (costs off)
+select a.* from a left join parted_b pb on a.b_id = pb.id;
+ QUERY PLAN
+---------------
+ Seq Scan on a
+(1 row)
+
rollback;
create temp table parent (k int primary key, pd int);
create temp table child (k int unique, cd int);
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index c59caf1cb3..c649c4aeaa 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -4874,7 +4874,7 @@ ANALYZE fract_t;
SET max_parallel_workers_per_gather = 0;
SET enable_partitionwise_join = on;
EXPLAIN (COSTS OFF)
-SELECT * FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY id ASC LIMIT 10;
+SELECT x.id, y.id FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY x.id ASC LIMIT 10;
QUERY PLAN
-----------------------------------------------------------------------
Limit
@@ -4891,7 +4891,7 @@ SELECT * FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY id ASC LIMIT 10;
(11 rows)
EXPLAIN (COSTS OFF)
-SELECT * FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY id DESC LIMIT 10;
+SELECT x.id, y.id FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY x.id DESC LIMIT 10;
QUERY PLAN
--------------------------------------------------------------------------------
Limit
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 9fc6ef4376..027927354c 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -1709,6 +1709,13 @@ explain (costs off)
select 1 from (select a.id FROM a left join b on a.b_id = b.id) q,
lateral generate_series(1, q.id) gs(i) where q.id = gs.i;
+CREATE TEMP TABLE parted_b (id int PRIMARY KEY) partition by range(id);
+CREATE TEMP TABLE parted_b1 partition of parted_b for values from (0) to (10);
+
+-- test join removals on a partitioned table
+explain (costs off)
+select a.* from a left join parted_b pb on a.b_id = pb.id;
+
rollback;
create temp table parent (k int primary key, pd int);
diff --git a/src/test/regress/sql/partition_join.sql b/src/test/regress/sql/partition_join.sql
index 67f506361f..9e16f1ca55 100644
--- a/src/test/regress/sql/partition_join.sql
+++ b/src/test/regress/sql/partition_join.sql
@@ -1157,10 +1157,10 @@ SET max_parallel_workers_per_gather = 0;
SET enable_partitionwise_join = on;
EXPLAIN (COSTS OFF)
-SELECT * FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY id ASC LIMIT 10;
+SELECT x.id, y.id FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY x.id ASC LIMIT 10;
EXPLAIN (COSTS OFF)
-SELECT * FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY id DESC LIMIT 10;
+SELECT x.id, y.id FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY x.id DESC LIMIT 10;
-- cleanup
DROP TABLE fract_t;