summaryrefslogtreecommitdiff
path: root/src/backend/optimizer/path/joinpath.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2009-09-17 20:49:29 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2009-09-17 20:49:29 +0000
commit488d70ab46311386801c10691196ec8d755f2283 (patch)
tree043fc65616c8690113e2b5c18222bcd230197c9b /src/backend/optimizer/path/joinpath.c
parente3f027115a5a5109527238886bde7f6d5b4a5b96 (diff)
downloadpostgresql-488d70ab46311386801c10691196ec8d755f2283.tar.gz
Implement "join removal" for cases where the inner side of a left join
is unique and is not referenced above the join. In this case the inner side doesn't affect the query result and can be thrown away entirely. Although perhaps nobody would ever write such a thing by hand, it's a reasonably common case in machine-generated SQL. The current implementation only recognizes the case where the inner side is a simple relation with a unique index matching the query conditions. This is enough for the use-cases that have been shown so far, but we might want to try to handle other cases later. Robert Haas, somewhat rewritten by Tom
Diffstat (limited to 'src/backend/optimizer/path/joinpath.c')
-rw-r--r--src/backend/optimizer/path/joinpath.c196
1 files changed, 195 insertions, 1 deletions
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 269c4824b6..827e18a502 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.123 2009/09/12 22:12:04 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.124 2009/09/17 20:49:28 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -22,6 +22,11 @@
#include "optimizer/paths.h"
+static bool join_is_removable(PlannerInfo *root, RelOptInfo *joinrel,
+ RelOptInfo *outerrel, RelOptInfo *innerrel,
+ List *restrictlist, JoinType jointype);
+static void generate_outer_only(PlannerInfo *root, RelOptInfo *joinrel,
+ RelOptInfo *outerrel);
static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
List *restrictlist, List *mergeclause_list,
@@ -79,10 +84,25 @@ add_paths_to_joinrel(PlannerInfo *root,
List *mergeclause_list = NIL;
/*
+ * 0. Consider join removal. This is always the most efficient strategy,
+ * so if it works, there's no need to consider anything further.
+ */
+ if (join_is_removable(root, joinrel, outerrel, innerrel,
+ restrictlist, jointype))
+ {
+ generate_outer_only(root, joinrel, outerrel);
+ return;
+ }
+
+ /*
* Find potential mergejoin clauses. We can skip this if we are not
* interested in doing a mergejoin. However, mergejoin is currently our
* only way of implementing full outer joins, so override mergejoin
* disable if it's a full join.
+ *
+ * Note: do this after join_is_removable(), because this sets the
+ * outer_is_left flags in the mergejoin clauses, while join_is_removable
+ * uses those flags for its own purposes.
*/
if (enable_mergejoin || jointype == JOIN_FULL)
mergeclause_list = select_mergejoin_clauses(root,
@@ -134,6 +154,180 @@ add_paths_to_joinrel(PlannerInfo *root,
}
/*
+ * join_is_removable
+ * Determine whether we need not perform the join at all, because
+ * it will just duplicate its left input.
+ *
+ * This is true for a left join for which the join condition cannot match
+ * more than one inner-side row. (There are other possibly interesting
+ * cases, but we don't have the infrastructure to prove them.)
+ *
+ * Note: there is no need to consider the symmetrical case of duplicating the
+ * right input, because add_paths_to_joinrel() will be called with each rel
+ * on the outer side.
+ */
+static bool
+join_is_removable(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *outerrel,
+ RelOptInfo *innerrel,
+ List *restrictlist,
+ JoinType jointype)
+{
+ List *clause_list = NIL;
+ ListCell *l;
+ int attroff;
+
+ /*
+ * Currently, we only know how to remove left joins to a baserel with
+ * unique indexes. We can check most of these criteria pretty trivially
+ * to avoid doing useless extra work. But checking whether any of the
+ * indexes are unique would require iterating over the indexlist, so for
+ * now we just make sure there are indexes of some sort or other. If none
+ * of them are unique, join removal will still fail, just slightly later.
+ */
+ if (jointype != JOIN_LEFT ||
+ innerrel->reloptkind == RELOPT_JOINREL ||
+ innerrel->rtekind != RTE_RELATION ||
+ innerrel->indexlist == NIL)
+ return false;
+
+ /*
+ * We can't remove the join if any inner-rel attributes are used above
+ * the join.
+ *
+ * As a micro-optimization, it seems better to start with max_attr and
+ * count down rather than starting with min_attr and counting up, on the
+ * theory that the system attributes are somewhat less likely to be wanted
+ * and should be tested last.
+ */
+ for (attroff = innerrel->max_attr - innerrel->min_attr;
+ attroff >= 0;
+ attroff--)
+ {
+ if (!bms_is_subset(innerrel->attr_needed[attroff], joinrel->relids))
+ return false;
+ }
+
+ /*
+ * Search for mergejoinable clauses that constrain the inner rel against
+ * either the outer rel or a pseudoconstant. If an operator is
+ * mergejoinable then it behaves like equality for some btree opclass,
+ * so it's what we want. The mergejoinability test also eliminates
+ * clauses containing volatile functions, which we couldn't depend on.
+ */
+ foreach(l, restrictlist)
+ {
+ RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
+
+ /*
+ * We are always considering an outer join here, so ignore pushed-down
+ * clauses. Also ignore anything that doesn't have a mergejoinable
+ * operator.
+ */
+ if (restrictinfo->is_pushed_down)
+ continue;
+
+ if (!restrictinfo->can_join ||
+ restrictinfo->mergeopfamilies == NIL)
+ continue; /* not mergejoinable */
+
+ /*
+ * Check if clause is usable with these input rels. All the vars
+ * needed on each side of the clause must be available from one or the
+ * other of the input rels.
+ */
+ if (bms_is_subset(restrictinfo->left_relids, outerrel->relids) &&
+ bms_is_subset(restrictinfo->right_relids, innerrel->relids))
+ {
+ /* righthand side is inner */
+ restrictinfo->outer_is_left = true;
+ }
+ else if (bms_is_subset(restrictinfo->left_relids, innerrel->relids) &&
+ bms_is_subset(restrictinfo->right_relids, outerrel->relids))
+ {
+ /* lefthand side is inner */
+ restrictinfo->outer_is_left = false;
+ }
+ else
+ continue; /* no good for these input relations */
+
+ /* OK, add to list */
+ clause_list = lappend(clause_list, restrictinfo);
+ }
+
+ /* Now examine the rel's restriction clauses for var = const clauses */
+ foreach(l, innerrel->baserestrictinfo)
+ {
+ RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
+
+ /*
+ * Note: can_join won't be set for a restriction clause, but
+ * mergeopfamilies will be if it has a mergejoinable operator
+ * and doesn't contain volatile functions.
+ */
+ if (restrictinfo->mergeopfamilies == NIL)
+ continue; /* not mergejoinable */
+
+ /*
+ * The clause certainly doesn't refer to anything but the given
+ * rel. If either side is pseudoconstant then we can use it.
+ */
+ if (bms_is_empty(restrictinfo->left_relids))
+ {
+ /* righthand side is inner */
+ restrictinfo->outer_is_left = true;
+ }
+ else if (bms_is_empty(restrictinfo->right_relids))
+ {
+ /* lefthand side is inner */
+ restrictinfo->outer_is_left = false;
+ }
+ else
+ continue;
+
+ /* OK, add to list */
+ clause_list = lappend(clause_list, restrictinfo);
+ }
+
+ /* Now examine the indexes to see if we have a matching unique index */
+ if (relation_has_unique_index_for(root, innerrel, clause_list))
+ return true;
+
+ /*
+ * Some day it would be nice to check for other methods of establishing
+ * distinctness.
+ */
+ return false;
+}
+
+/*
+ * generate_outer_only
+ * Generate "join" paths when we have found the join is removable.
+ */
+static void
+generate_outer_only(PlannerInfo *root, RelOptInfo *joinrel,
+ RelOptInfo *outerrel)
+{
+ ListCell *lc;
+
+ /*
+ * For the moment, replicate all of the outerrel's paths as join paths.
+ * Some of them might not really be interesting above the join, if they
+ * have sort orderings that have no real use except to do a mergejoin
+ * for the join we've just found we don't need. But distinguishing that
+ * case probably isn't worth the extra code it would take.
+ */
+ foreach(lc, outerrel->pathlist)
+ {
+ Path *outerpath = (Path *) lfirst(lc);
+
+ add_path(joinrel, (Path *)
+ create_noop_path(root, joinrel, outerpath));
+ }
+}
+
+/*
* sort_inner_and_outer
* Create mergejoin join paths by explicitly sorting both the outer and
* inner join relations on each available merge ordering.