summaryrefslogtreecommitdiff
path: root/src/backend/optimizer/util/orclauses.c
blob: ca8b5ff92fc17cac6bae5f49cd3cc3821ad046d5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
/*-------------------------------------------------------------------------
 *
 * orclauses.c
 *	  Routines to extract restriction OR clauses from join OR clauses
 *
 * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
 *	  src/backend/optimizer/util/orclauses.c
 *
 *-------------------------------------------------------------------------
 */

#include "postgres.h"

#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/optimizer.h"
#include "optimizer/orclauses.h"
#include "optimizer/restrictinfo.h"


static bool is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel);
static Expr *extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel);
static void consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
								   Expr *orclause, RestrictInfo *join_or_rinfo);


/*
 * extract_restriction_or_clauses
 *	  Examine join OR-of-AND clauses to see if any useful restriction OR
 *	  clauses can be extracted.  If so, add them to the query.
 *
 * Although a join clause must reference multiple relations overall,
 * an OR of ANDs clause might contain sub-clauses that reference just one
 * relation and can be used to build a restriction clause for that rel.
 * For example consider
 *		WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45));
 * We can transform this into
 *		WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45))
 *			AND (a.x = 42 OR a.x = 44)
 *			AND (b.y = 43 OR b.z = 45);
 * which allows the latter clauses to be applied during the scans of a and b,
 * perhaps as index qualifications, and in any case reducing the number of
 * rows arriving at the join.  In essence this is a partial transformation to
 * CNF (AND of ORs format).  It is not complete, however, because we do not
 * unravel the original OR --- doing so would usually bloat the qualification
 * expression to little gain.
 *
 * The added quals are partially redundant with the original OR, and therefore
 * would cause the size of the joinrel to be underestimated when it is finally
 * formed.  (This would be true of a full transformation to CNF as well; the
 * fault is not really in the transformation, but in clauselist_selectivity's
 * inability to recognize redundant conditions.)  We can compensate for this
 * redundancy by changing the cached selectivity of the original OR clause,
 * canceling out the (valid) reduction in the estimated sizes of the base
 * relations so that the estimated joinrel size remains the same.  This is
 * a MAJOR HACK: it depends on the fact that clause selectivities are cached
 * and on the fact that the same RestrictInfo node will appear in every
 * joininfo list that might be used when the joinrel is formed.
 * And it doesn't work in cases where the size estimation is nonlinear
 * (i.e., outer and IN joins).  But it beats not doing anything.
 *
 * We examine each base relation to see if join clauses associated with it
 * contain extractable restriction conditions.  If so, add those conditions
 * to the rel's baserestrictinfo and update the cached selectivities of the
 * join clauses.  Note that the same join clause will be examined afresh
 * from the point of view of each baserel that participates in it, so its
 * cached selectivity may get updated multiple times.
 */
void
extract_restriction_or_clauses(PlannerInfo *root)
{
	Index		rti;

	/* Examine each baserel for potential join OR clauses */
	for (rti = 1; rti < root->simple_rel_array_size; rti++)
	{
		RelOptInfo *rel = root->simple_rel_array[rti];
		ListCell   *lc;

		/* there may be empty slots corresponding to non-baserel RTEs */
		if (rel == NULL)
			continue;

		Assert(rel->relid == rti);	/* sanity check on array */

		/* ignore RTEs that are "other rels" */
		if (rel->reloptkind != RELOPT_BASEREL)
			continue;

		/*
		 * Find potentially interesting OR joinclauses.  We can use any
		 * joinclause that is considered safe to move to this rel by the
		 * parameterized-path machinery, even though what we are going to do
		 * with it is not exactly a parameterized path.
		 */
		foreach(lc, rel->joininfo)
		{
			RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);

			if (restriction_is_or_clause(rinfo) &&
				join_clause_is_movable_to(rinfo, rel))
			{
				/* Try to extract a qual for this rel only */
				Expr	   *orclause = extract_or_clause(rinfo, rel);

				/*
				 * If successful, decide whether we want to use the clause,
				 * and insert it into the rel's restrictinfo list if so.
				 */
				if (orclause)
					consider_new_or_clause(root, rel, orclause, rinfo);
			}
		}
	}
}

/*
 * Is the given primitive (non-OR) RestrictInfo safe to move to the rel?
 */
static bool
is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel)
{
	/*
	 * We want clauses that mention the rel, and only the rel.  So in
	 * particular pseudoconstant clauses can be rejected quickly.  Then check
	 * the clause's Var membership.
	 */
	if (rinfo->pseudoconstant)
		return false;
	if (!bms_equal(rinfo->clause_relids, rel->relids))
		return false;

	/* We don't want extra evaluations of any volatile functions */
	if (contain_volatile_functions((Node *) rinfo->clause))
		return false;

	return true;
}

/*
 * Try to extract a restriction clause mentioning only "rel" from the given
 * join OR-clause.
 *
 * We must be able to extract at least one qual for this rel from each of
 * the arms of the OR, else we can't use it.
 *
 * Returns an OR clause (not a RestrictInfo!) pertaining to rel, or NULL
 * if no OR clause could be extracted.
 */
static Expr *
extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel)
{
	List	   *clauselist = NIL;
	ListCell   *lc;

	/*
	 * Scan each arm of the input OR clause.  Notice we descend into
	 * or_rinfo->orclause, which has RestrictInfo nodes embedded below the
	 * toplevel OR/AND structure.  This is useful because we can use the info
	 * in those nodes to make is_safe_restriction_clause_for()'s checks
	 * cheaper.  We'll strip those nodes from the returned tree, though,
	 * meaning that fresh ones will be built if the clause is accepted as a
	 * restriction clause.  This might seem wasteful --- couldn't we re-use
	 * the existing RestrictInfos?	But that'd require assuming that
	 * selectivity and other cached data is computed exactly the same way for
	 * a restriction clause as for a join clause, which seems undesirable.
	 */
	Assert(is_orclause(or_rinfo->orclause));
	foreach(lc, ((BoolExpr *) or_rinfo->orclause)->args)
	{
		Node	   *orarg = (Node *) lfirst(lc);
		List	   *subclauses = NIL;
		Node	   *subclause;

		/* OR arguments should be ANDs or sub-RestrictInfos */
		if (is_andclause(orarg))
		{
			List	   *andargs = ((BoolExpr *) orarg)->args;
			ListCell   *lc2;

			foreach(lc2, andargs)
			{
				RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);

				if (restriction_is_or_clause(rinfo))
				{
					/*
					 * Recurse to deal with nested OR.  Note we *must* recurse
					 * here, this isn't just overly-tense optimization: we
					 * have to descend far enough to find and strip all
					 * RestrictInfos in the expression.
					 */
					Expr	   *suborclause;

					suborclause = extract_or_clause(rinfo, rel);
					if (suborclause)
						subclauses = lappend(subclauses, suborclause);
				}
				else if (is_safe_restriction_clause_for(rinfo, rel))
					subclauses = lappend(subclauses, rinfo->clause);
			}
		}
		else
		{
			RestrictInfo *rinfo = castNode(RestrictInfo, orarg);

			Assert(!restriction_is_or_clause(rinfo));
			if (is_safe_restriction_clause_for(rinfo, rel))
				subclauses = lappend(subclauses, rinfo->clause);
		}

		/*
		 * If nothing could be extracted from this arm, we can't do anything
		 * with this OR clause.
		 */
		if (subclauses == NIL)
			return NULL;

		/*
		 * OK, add subclause(s) to the result OR.  If we found more than one,
		 * we need an AND node.  But if we found only one, and it is itself an
		 * OR node, add its subclauses to the result instead; this is needed
		 * to preserve AND/OR flatness (ie, no OR directly underneath OR).
		 */
		subclause = (Node *) make_ands_explicit(subclauses);
		if (is_orclause(subclause))
			clauselist = list_concat(clauselist,
									 ((BoolExpr *) subclause)->args);
		else
			clauselist = lappend(clauselist, subclause);
	}

	/*
	 * If we got a restriction clause from every arm, wrap them up in an OR
	 * node.  (In theory the OR node might be unnecessary, if there was only
	 * one arm --- but then the input OR node was also redundant.)
	 */
	if (clauselist != NIL)
		return make_orclause(clauselist);
	return NULL;
}

/*
 * Consider whether a successfully-extracted restriction OR clause is
 * actually worth using.  If so, add it to the planner's data structures,
 * and adjust the original join clause (join_or_rinfo) to compensate.
 */
static void
consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
					   Expr *orclause, RestrictInfo *join_or_rinfo)
{
	RestrictInfo *or_rinfo;
	Selectivity or_selec,
				orig_selec;

	/*
	 * Build a RestrictInfo from the new OR clause.  We can assume it's valid
	 * as a base restriction clause.
	 */
	or_rinfo = make_restrictinfo(root,
								 orclause,
								 true,
								 false,
								 join_or_rinfo->security_level,
								 NULL,
								 NULL);

	/*
	 * Estimate its selectivity.  (We could have done this earlier, but doing
	 * it on the RestrictInfo representation allows the result to get cached,
	 * saving work later.)
	 */
	or_selec = clause_selectivity(root, (Node *) or_rinfo,
								  0, JOIN_INNER, NULL);

	/*
	 * The clause is only worth adding to the query if it rejects a useful
	 * fraction of the base relation's rows; otherwise, it's just going to
	 * cause duplicate computation (since we will still have to check the
	 * original OR clause when the join is formed).  Somewhat arbitrarily, we
	 * set the selectivity threshold at 0.9.
	 */
	if (or_selec > 0.9)
		return;					/* forget it */

	/*
	 * OK, add it to the rel's restriction-clause list.
	 */
	rel->baserestrictinfo = lappend(rel->baserestrictinfo, or_rinfo);
	rel->baserestrict_min_security = Min(rel->baserestrict_min_security,
										 or_rinfo->security_level);

	/*
	 * Adjust the original join OR clause's cached selectivity to compensate
	 * for the selectivity of the added (but redundant) lower-level qual. This
	 * should result in the join rel getting approximately the same rows
	 * estimate as it would have gotten without all these shenanigans.
	 *
	 * XXX major hack alert: this depends on the assumption that the
	 * selectivity will stay cached.
	 *
	 * XXX another major hack: we adjust only norm_selec, the cached
	 * selectivity for JOIN_INNER semantics, even though the join clause
	 * might've been an outer-join clause.  This is partly because we can't
	 * easily identify the relevant SpecialJoinInfo here, and partly because
	 * the linearity assumption we're making would fail anyway.  (If it is an
	 * outer-join clause, "rel" must be on the nullable side, else we'd not
	 * have gotten here.  So the computation of the join size is going to be
	 * quite nonlinear with respect to the size of "rel", so it's not clear
	 * how we ought to adjust outer_selec even if we could compute its
	 * original value correctly.)
	 */
	if (or_selec > 0)
	{
		SpecialJoinInfo sjinfo;

		/*
		 * Make up a SpecialJoinInfo for JOIN_INNER semantics.  (Compare
		 * approx_tuple_count() in costsize.c.)
		 */
		sjinfo.type = T_SpecialJoinInfo;
		sjinfo.min_lefthand = bms_difference(join_or_rinfo->clause_relids,
											 rel->relids);
		sjinfo.min_righthand = rel->relids;
		sjinfo.syn_lefthand = sjinfo.min_lefthand;
		sjinfo.syn_righthand = sjinfo.min_righthand;
		sjinfo.jointype = JOIN_INNER;
		sjinfo.ojrelid = 0;
		sjinfo.commute_above_l = NULL;
		sjinfo.commute_above_r = NULL;
		sjinfo.commute_below_l = NULL;
		sjinfo.commute_below_r = NULL;
		/* we don't bother trying to make the remaining fields valid */
		sjinfo.lhs_strict = false;
		sjinfo.semi_can_btree = false;
		sjinfo.semi_can_hash = false;
		sjinfo.semi_operators = NIL;
		sjinfo.semi_rhs_exprs = NIL;

		/* Compute inner-join size */
		orig_selec = clause_selectivity(root, (Node *) join_or_rinfo,
										0, JOIN_INNER, &sjinfo);

		/* And hack cached selectivity so join size remains the same */
		join_or_rinfo->norm_selec = orig_selec / or_selec;
		/* ensure result stays in sane range */
		if (join_or_rinfo->norm_selec > 1)
			join_or_rinfo->norm_selec = 1;
		/* as explained above, we don't touch outer_selec */
	}
}