summaryrefslogtreecommitdiff
path: root/src/backend/optimizer/path/orindxpath.c
blob: 739d6c0a8d53754c1572bab8a976748a3fd4a619 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
/*-------------------------------------------------------------------------
 *
 * orindxpath.c
 *	  Routines to find index paths that match a set of OR clauses
 *
 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
 *	  $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.90 2009/06/11 14:48:59 momjian Exp $
 *
 *-------------------------------------------------------------------------
 */

#include "postgres.h"

#include "optimizer/cost.h"
#include "optimizer/paths.h"
#include "optimizer/restrictinfo.h"


/*----------
 * create_or_index_quals
 *	  Examine join OR-of-AND quals to see if any useful restriction OR
 *	  clauses can be extracted.  If so, add them to the query.
 *
 * Although a join clause must reference other relations overall,
 * an OR of ANDs clause might contain sub-clauses that reference just this
 * relation and can be used to build a restriction clause.
 * For example consider
 *		WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45));
 * We can transform this into
 *		WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45))
 *			AND (a.x = 42 OR a.x = 44)
 *			AND (b.y = 43 OR b.z = 45);
 * which opens the potential to build OR indexscans on a and b.  In essence
 * this is a partial transformation to CNF (AND of ORs format).  It is not
 * complete, however, because we do not unravel the original OR --- doing so
 * would usually bloat the qualification expression to little gain.
 *
 * The added quals are partially redundant with the original OR, and therefore
 * will cause the size of the joinrel to be underestimated when it is finally
 * formed.  (This would be true of a full transformation to CNF as well; the
 * fault is not really in the transformation, but in clauselist_selectivity's
 * inability to recognize redundant conditions.)  To minimize the collateral
 * damage, we want to minimize the number of quals added.  Therefore we do
 * not add every possible extracted restriction condition to the query.
 * Instead, we search for the single restriction condition that generates
 * the most useful (cheapest) OR indexscan, and add only that condition.
 * This is a pretty ad-hoc heuristic, but quite useful.
 *
 * We can then compensate for the redundancy of the added qual by poking
 * the recorded selectivity of the original OR clause, thereby ensuring
 * the added qual doesn't change the estimated size of the joinrel when
 * it is finally formed.  This is a MAJOR HACK: it depends on the fact
 * that clause selectivities are cached and on the fact that the same
 * RestrictInfo node will appear in every joininfo list that might be used
 * when the joinrel is formed.  And it probably isn't right in cases where
 * the size estimation is nonlinear (i.e., outer and IN joins).  But it
 * beats not doing anything.
 *
 * NOTE: one might think this messiness could be worked around by generating
 * the indexscan path with a small path->rows value, and not touching the
 * rel's baserestrictinfo or rel->rows.  However, that does not work.
 * The optimizer's fundamental design assumes that every general-purpose
 * Path for a given relation generates the same number of rows.  Without
 * this assumption we'd not be able to optimize solely on the cost of Paths,
 * but would have to take number of output rows into account as well.
 * (Perhaps someday that'd be worth doing, but it's a pretty big change...)
 *
 * 'rel' is the relation entry for which quals are to be created
 *
 * If successful, adds qual(s) to rel->baserestrictinfo and returns TRUE.
 * If no quals available, returns FALSE and doesn't change rel.
 *
 * Note: check_partial_indexes() must have been run previously.
 *----------
 */
bool
create_or_index_quals(PlannerInfo *root, RelOptInfo *rel)
{
	BitmapOrPath *bestpath = NULL;
	RestrictInfo *bestrinfo = NULL;
	List	   *newrinfos;
	RestrictInfo *or_rinfo;
	Selectivity or_selec,
				orig_selec;
	ListCell   *i;

	/*
	 * Find potentially interesting OR joinclauses.
	 *
	 * We must ignore clauses for which the target rel is in nullable_relids;
	 * that means there's an outer join below the clause and so it can't be
	 * enforced at the relation scan level.
	 *
	 * We must also ignore clauses that are marked !is_pushed_down (ie they
	 * are themselves outer-join clauses).  It would be safe to extract an
	 * index condition from such a clause if we are within the nullable rather
	 * than the non-nullable side of its join, but we haven't got enough
	 * context here to tell which applies.  OR clauses in outer-join quals
	 * aren't exactly common, so we'll let that case go unoptimized for now.
	 */
	foreach(i, rel->joininfo)
	{
		RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);

		if (restriction_is_or_clause(rinfo) &&
			rinfo->is_pushed_down &&
			!bms_is_member(rel->relid, rinfo->nullable_relids))
		{
			/*
			 * Use the generate_bitmap_or_paths() machinery to estimate the
			 * value of each OR clause.  We can use regular restriction
			 * clauses along with the OR clause contents to generate
			 * indexquals.  We pass outer_rel = NULL so that sub-clauses that
			 * are actually joins will be ignored.
			 */
			List	   *orpaths;
			ListCell   *k;

			orpaths = generate_bitmap_or_paths(root, rel,
											   list_make1(rinfo),
											   rel->baserestrictinfo,
											   NULL);

			/* Locate the cheapest OR path */
			foreach(k, orpaths)
			{
				BitmapOrPath *path = (BitmapOrPath *) lfirst(k);

				Assert(IsA(path, BitmapOrPath));
				if (bestpath == NULL ||
					path->path.total_cost < bestpath->path.total_cost)
				{
					bestpath = path;
					bestrinfo = rinfo;
				}
			}
		}
	}

	/* Fail if no suitable clauses found */
	if (bestpath == NULL)
		return false;

	/*
	 * Convert the path's indexclauses structure to a RestrictInfo tree. We
	 * include any partial-index predicates so as to get a reasonable
	 * representation of what the path is actually scanning.
	 */
	newrinfos = make_restrictinfo_from_bitmapqual((Path *) bestpath,
												  true, true);

	/* It's possible we get back something other than a single OR clause */
	if (list_length(newrinfos) != 1)
		return false;
	or_rinfo = (RestrictInfo *) linitial(newrinfos);
	Assert(IsA(or_rinfo, RestrictInfo));
	if (!restriction_is_or_clause(or_rinfo))
		return false;

	/*
	 * OK, add it to the rel's restriction list.
	 */
	rel->baserestrictinfo = list_concat(rel->baserestrictinfo, newrinfos);

	/*
	 * Adjust the original OR clause's cached selectivity to compensate for
	 * the selectivity of the added (but redundant) lower-level qual. This
	 * should result in the join rel getting approximately the same rows
	 * estimate as it would have gotten without all these shenanigans. (XXX
	 * major hack alert ... this depends on the assumption that the
	 * selectivity will stay cached ...)
	 */
	or_selec = clause_selectivity(root, (Node *) or_rinfo,
								  0, JOIN_INNER, NULL);
	if (or_selec > 0 && or_selec < 1)
	{
		orig_selec = clause_selectivity(root, (Node *) bestrinfo,
										0, JOIN_INNER, NULL);
		bestrinfo->norm_selec = orig_selec / or_selec;
		/* clamp result to sane range */
		if (bestrinfo->norm_selec > 1)
			bestrinfo->norm_selec = 1;
		/* It isn't an outer join clause, so no need to adjust outer_selec */
	}

	/* Tell caller to recompute partial index status and rowcount estimate */
	return true;
}