summaryrefslogtreecommitdiff
path: root/src/backend/optimizer/geqo/geqo_eval.c
blob: 353ed1aa1ac2979fd5225669b661620a9d0e6d26 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
/*------------------------------------------------------------------------
 *
 * geqo_eval.c
 *	  Routines to evaluate query trees
 *
 * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.93 2010/02/26 02:00:43 momjian Exp $
 *
 *-------------------------------------------------------------------------
 */

/* contributed by:
   =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
   *  Martin Utesch				 * Institute of Automatic Control	   *
   =							 = University of Mining and Technology =
   *  utesch@aut.tu-freiberg.de  * Freiberg, Germany				   *
   =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
 */

#include "postgres.h"

#include <float.h>
#include <limits.h>
#include <math.h>

#include "optimizer/geqo.h"
#include "optimizer/joininfo.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "utils/memutils.h"


/* A "clump" of already-joined relations within gimme_tree */
typedef struct
{
	RelOptInfo *joinrel;		/* joinrel for the set of relations */
	int			size;			/* number of input relations in clump */
} Clump;

static List *merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump,
			bool force);
static bool desirable_join(PlannerInfo *root,
			   RelOptInfo *outer_rel, RelOptInfo *inner_rel);


/*
 * geqo_eval
 *
 * Returns cost of a query tree as an individual of the population.
 */
Cost
geqo_eval(PlannerInfo *root, Gene *tour, int num_gene)
{
	MemoryContext mycontext;
	MemoryContext oldcxt;
	RelOptInfo *joinrel;
	Cost		fitness;
	int			savelength;
	struct HTAB *savehash;

	/*
	 * Create a private memory context that will hold all temp storage
	 * allocated inside gimme_tree().
	 *
	 * Since geqo_eval() will be called many times, we can't afford to let all
	 * that memory go unreclaimed until end of statement.  Note we make the
	 * temp context a child of the planner's normal context, so that it will
	 * be freed even if we abort via ereport(ERROR).
	 */
	mycontext = AllocSetContextCreate(CurrentMemoryContext,
									  "GEQO",
									  ALLOCSET_DEFAULT_MINSIZE,
									  ALLOCSET_DEFAULT_INITSIZE,
									  ALLOCSET_DEFAULT_MAXSIZE);
	oldcxt = MemoryContextSwitchTo(mycontext);

	/*
	 * gimme_tree will add entries to root->join_rel_list, which may or may
	 * not already contain some entries.  The newly added entries will be
	 * recycled by the MemoryContextDelete below, so we must ensure that the
	 * list is restored to its former state before exiting.  We can do this by
	 * truncating the list to its original length.	NOTE this assumes that any
	 * added entries are appended at the end!
	 *
	 * We also must take care not to mess up the outer join_rel_hash, if there
	 * is one.	We can do this by just temporarily setting the link to NULL.
	 * (If we are dealing with enough join rels, which we very likely are, a
	 * new hash table will get built and used locally.)
	 *
	 * join_rel_level[] shouldn't be in use, so just Assert it isn't.
	 */
	savelength = list_length(root->join_rel_list);
	savehash = root->join_rel_hash;
	Assert(root->join_rel_level == NULL);

	root->join_rel_hash = NULL;

	/* construct the best path for the given combination of relations */
	joinrel = gimme_tree(root, tour, num_gene);

	/*
	 * compute fitness
	 *
	 * XXX geqo does not currently support optimization for partial result
	 * retrieval --- how to fix?
	 */
	fitness = joinrel->cheapest_total_path->total_cost;

	/*
	 * Restore join_rel_list to its former state, and put back original
	 * hashtable if any.
	 */
	root->join_rel_list = list_truncate(root->join_rel_list,
										savelength);
	root->join_rel_hash = savehash;

	/* release all the memory acquired within gimme_tree */
	MemoryContextSwitchTo(oldcxt);
	MemoryContextDelete(mycontext);

	return fitness;
}

/*
 * gimme_tree
 *	  Form planner estimates for a join tree constructed in the specified
 *	  order.
 *
 *	 'tour' is the proposed join order, of length 'num_gene'
 *
 * Returns a new join relation whose cheapest path is the best plan for
 * this join order.
 *
 * The original implementation of this routine always joined in the specified
 * order, and so could only build left-sided plans (and right-sided and
 * mixtures, as a byproduct of the fact that make_join_rel() is symmetric).
 * It could never produce a "bushy" plan.  This had a couple of big problems,
 * of which the worst was that there are situations involving join order
 * restrictions where the only valid plans are bushy.
 *
 * The present implementation takes the given tour as a guideline, but
 * postpones joins that are illegal or seem unsuitable according to some
 * heuristic rules.  This allows correct bushy plans to be generated at need,
 * and as a nice side-effect it seems to materially improve the quality of the
 * generated plans.
 */
RelOptInfo *
gimme_tree(PlannerInfo *root, Gene *tour, int num_gene)
{
	GeqoPrivateData *private = (GeqoPrivateData *) root->join_search_private;
	List	   *clumps;
	int			rel_count;

	/*
	 * Sometimes, a relation can't yet be joined to others due to heuristics
	 * or actual semantic restrictions.  We maintain a list of "clumps" of
	 * successfully joined relations, with larger clumps at the front. Each
	 * new relation from the tour is added to the first clump it can be joined
	 * to; if there is none then it becomes a new clump of its own. When we
	 * enlarge an existing clump we check to see if it can now be merged with
	 * any other clumps.  After the tour is all scanned, we forget about the
	 * heuristics and try to forcibly join any remaining clumps.  Some forced
	 * joins might still fail due to semantics, but we should always be able
	 * to find some join order that works.
	 */
	clumps = NIL;

	for (rel_count = 0; rel_count < num_gene; rel_count++)
	{
		int			cur_rel_index;
		RelOptInfo *cur_rel;
		Clump	   *cur_clump;

		/* Get the next input relation */
		cur_rel_index = (int) tour[rel_count];
		cur_rel = (RelOptInfo *) list_nth(private->initial_rels,
										  cur_rel_index - 1);

		/* Make it into a single-rel clump */
		cur_clump = (Clump *) palloc(sizeof(Clump));
		cur_clump->joinrel = cur_rel;
		cur_clump->size = 1;

		/* Merge it into the clumps list, using only desirable joins */
		clumps = merge_clump(root, clumps, cur_clump, false);
	}

	if (list_length(clumps) > 1)
	{
		/* Force-join the remaining clumps in some legal order */
		List	   *fclumps;
		ListCell   *lc;

		fclumps = NIL;
		foreach(lc, clumps)
		{
			Clump	   *clump = (Clump *) lfirst(lc);

			fclumps = merge_clump(root, fclumps, clump, true);
		}
		clumps = fclumps;
	}

	/* Did we succeed in forming a single join relation? */
	if (list_length(clumps) != 1)
		elog(ERROR, "failed to join all relations together");

	return ((Clump *) linitial(clumps))->joinrel;
}

/*
 * Merge a "clump" into the list of existing clumps for gimme_tree.
 *
 * We try to merge the clump into some existing clump, and repeat if
 * successful.	When no more merging is possible, insert the clump
 * into the list, preserving the list ordering rule (namely, that
 * clumps of larger size appear earlier).
 *
 * If force is true, merge anywhere a join is legal, even if it causes
 * a cartesian join to be performed.  When force is false, do only
 * "desirable" joins.
 */
static List *
merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, bool force)
{
	ListCell   *prev;
	ListCell   *lc;

	/* Look for a clump that new_clump can join to */
	prev = NULL;
	foreach(lc, clumps)
	{
		Clump	   *old_clump = (Clump *) lfirst(lc);

		if (force ||
			desirable_join(root, old_clump->joinrel, new_clump->joinrel))
		{
			RelOptInfo *joinrel;

			/*
			 * Construct a RelOptInfo representing the join of these two input
			 * relations.  Note that we expect the joinrel not to exist in
			 * root->join_rel_list yet, and so the paths constructed for it
			 * will only include the ones we want.
			 */
			joinrel = make_join_rel(root,
									old_clump->joinrel,
									new_clump->joinrel);

			/* Keep searching if join order is not valid */
			if (joinrel)
			{
				/* Find and save the cheapest paths for this joinrel */
				set_cheapest(joinrel);

				/* Absorb new clump into old */
				old_clump->joinrel = joinrel;
				old_clump->size += new_clump->size;
				pfree(new_clump);

				/* Remove old_clump from list */
				clumps = list_delete_cell(clumps, lc, prev);

				/*
				 * Recursively try to merge the enlarged old_clump with
				 * others.	When no further merge is possible, we'll reinsert
				 * it into the list.
				 */
				return merge_clump(root, clumps, old_clump, force);
			}
		}
		prev = lc;
	}

	/*
	 * No merging is possible, so add new_clump as an independent clump, in
	 * proper order according to size.	We can be fast for the common case
	 * where it has size 1 --- it should always go at the end.
	 */
	if (clumps == NIL || new_clump->size == 1)
		return lappend(clumps, new_clump);

	/* Check if it belongs at the front */
	lc = list_head(clumps);
	if (new_clump->size > ((Clump *) lfirst(lc))->size)
		return lcons(new_clump, clumps);

	/* Else search for the place to insert it */
	for (;;)
	{
		ListCell   *nxt = lnext(lc);

		if (nxt == NULL || new_clump->size > ((Clump *) lfirst(nxt))->size)
			break;				/* it belongs after 'lc', before 'nxt' */
		lc = nxt;
	}
	lappend_cell(clumps, lc, new_clump);

	return clumps;
}

/*
 * Heuristics for gimme_tree: do we want to join these two relations?
 */
static bool
desirable_join(PlannerInfo *root,
			   RelOptInfo *outer_rel, RelOptInfo *inner_rel)
{
	/*
	 * Join if there is an applicable join clause, or if there is a join order
	 * restriction forcing these rels to be joined.
	 */
	if (have_relevant_joinclause(root, outer_rel, inner_rel) ||
		have_join_order_restriction(root, outer_rel, inner_rel))
		return true;

	/* Otherwise postpone the join till later. */
	return false;
}