Improve planner's estimation of the space needed for HashAgg plans:

look at the actual aggregate transition datatypes and the actual overhead needed by nodeAgg.c, instead of using pessimistic round numbers. Per a discussion with Michael Tiemann.
author: Tom Lane <tgl@sss.pgh.pa.us> 2005-01-28 19:36:33 +0000
committer: Tom Lane <tgl@sss.pgh.pa.us> 2005-01-28 19:36:33 +0000
commit: a098f533d15efe5552c0b6cd661cc123231c8c54 (patch)
tree: ebe55a8f91b584c66805ad50600647a1a756d2c4
parent: af5cd5ba92036f07f8414b15ec96da096ded8d7e (diff)
download: postgresql-a098f533d15efe5552c0b6cd661cc123231c8c54.tar.gz
5 files changed, 140 insertions, 70 deletions
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 97e30b7e58..d19b3f6518 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -45,7 +45,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeAgg.c,v 1.126.4.1 2005/01/27 23:42:44 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeAgg.c,v 1.126.4.2 2005/01/28 19:35:56 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -605,6 +605,26 @@ build_hash_table(AggState *aggstate)
 }
 
 /*
+ * Estimate per-hash-table-entry overhead for the planner.
+ *
+ * Note that the estimate does not include space for pass-by-reference
+ * transition data values.
+ */
+Size
+hash_agg_entry_size(int numAggs)
+{
+	Size		entrysize;
+
+	/* This must match build_hash_table */
+	entrysize = sizeof(AggHashEntryData) +
+		(numAggs - 1) *sizeof(AggStatePerGroupData);
+	/* Account for hashtable overhead */
+	entrysize += 2 * sizeof(void *);
+	entrysize = MAXALIGN(entrysize);
+	return entrysize;
+}
+
+/*
  * Find or create a hashtable entry for the tuple group containing the
  * given tuple.
  *
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 3a19abbc4c..252f211689 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.177 2004/12/31 22:00:09 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.177.4.1 2005/01/28 19:36:02 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -20,6 +20,7 @@
 #include "catalog/pg_operator.h"
 #include "catalog/pg_type.h"
 #include "executor/executor.h"
+#include "executor/nodeAgg.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
 #ifdef OPTIMIZER_DEBUG
@@ -660,10 +661,12 @@ grouping_planner(Query *parse, double tuple_fraction)
 		Path	   *sorted_path;
 		double		dNumGroups = 0;
 		long		numGroups = 0;
-		int			numAggs = 0;
+		AggClauseCounts agg_counts;
 		int			numGroupCols = list_length(parse->groupClause);
 		bool		use_hashed_grouping = false;
 
+		MemSet(&agg_counts, 0, sizeof(AggClauseCounts));
+
 		/* Preprocess targetlist in case we are inside an INSERT/UPDATE. */
 		tlist = preprocess_targetlist(tlist,
 									  parse->commandType,
@@ -752,8 +755,10 @@ grouping_planner(Query *parse, double tuple_fraction)
 		 * the aggregate semantics (eg, producing only one output row).
 		 */
 		if (parse->hasAggs)
-			numAggs = count_agg_clause((Node *) tlist) +
-				count_agg_clause(parse->havingQual);
+		{
+			count_agg_clauses((Node *) tlist, &agg_counts);
+			count_agg_clauses(parse->havingQual, &agg_counts);
+		}
 
 		/*
 		 * Figure out whether we need a sorted result from query_planner.
@@ -990,9 +995,7 @@ grouping_planner(Query *parse, double tuple_fraction)
 			 */
 			if (!enable_hashagg || !hash_safe_grouping(parse))
 				use_hashed_grouping = false;
-			else if (parse->hasAggs &&
-					 (contain_distinct_agg_clause((Node *) tlist) ||
-					  contain_distinct_agg_clause(parse->havingQual)))
+			else if (agg_counts.numDistinctAggs != 0)
 				use_hashed_grouping = false;
 			else
 			{
@@ -1003,13 +1006,15 @@ grouping_planner(Query *parse, double tuple_fraction)
 				 * the need for sorted input is usually a win, the fact
 				 * that the output won't be sorted may be a loss; so we
 				 * need to do an actual cost comparison.
-				 *
-				 * In most cases we have no good way to estimate the size of
-				 * the transition value needed by an aggregate;
-				 * arbitrarily assume it is 100 bytes.	Also set the
-				 * overhead per hashtable entry at 64 bytes.
 				 */
-				int			hashentrysize = cheapest_path_width + 64 + numAggs * 100;
+				Size		hashentrysize;
+
+				/* Estimate per-hash-entry space at tuple width... */
+				hashentrysize = cheapest_path_width;
+				/* plus space for pass-by-ref transition values... */
+				hashentrysize += agg_counts.transitionSpace;
+				/* plus the per-hash-entry overhead */
+				hashentrysize += hash_agg_entry_size(agg_counts.numAggs);
 
 				if (hashentrysize * dNumGroups <= work_mem * 1024L)
 				{
@@ -1030,7 +1035,7 @@ grouping_planner(Query *parse, double tuple_fraction)
 					Path		sorted_p;
 
 					cost_agg(&hashed_p, parse,
-							 AGG_HASHED, numAggs,
+							 AGG_HASHED, agg_counts.numAggs,
 							 numGroupCols, dNumGroups,
 							 cheapest_path->startup_cost,
 							 cheapest_path->total_cost,
@@ -1065,7 +1070,7 @@ grouping_planner(Query *parse, double tuple_fraction)
 					}
 					if (parse->hasAggs)
 						cost_agg(&sorted_p, parse,
-								 AGG_SORTED, numAggs,
+								 AGG_SORTED, agg_counts.numAggs,
 								 numGroupCols, dNumGroups,
 								 sorted_p.startup_cost,
 								 sorted_p.total_cost,
@@ -1202,7 +1207,7 @@ grouping_planner(Query *parse, double tuple_fraction)
 											numGroupCols,
 											groupColIdx,
 											numGroups,
-											numAggs,
+											agg_counts.numAggs,
 											result_plan);
 			/* Hashed aggregation produces randomly-ordered results */
 			current_pathkeys = NIL;
@@ -1244,7 +1249,7 @@ grouping_planner(Query *parse, double tuple_fraction)
 											numGroupCols,
 											groupColIdx,
 											numGroups,
-											numAggs,
+											agg_counts.numAggs,
 											result_plan);
 		}
 		else
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index c580759b0e..c7219c3ec8 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.186 2004/12/31 22:00:23 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.186.4.1 2005/01/28 19:36:14 tgl Exp $
  *
  * HISTORY
  *	  AUTHOR			DATE			MAJOR EVENT
@@ -19,6 +19,7 @@
 
 #include "postgres.h"
 
+#include "catalog/pg_aggregate.h"
 #include "catalog/pg_language.h"
 #include "catalog/pg_proc.h"
 #include "catalog/pg_type.h"
@@ -33,6 +34,7 @@
 #include "optimizer/var.h"
 #include "parser/analyze.h"
 #include "parser/parse_clause.h"
+#include "parser/parse_coerce.h"
 #include "parser/parse_expr.h"
 #include "tcop/tcopprot.h"
 #include "utils/acl.h"
@@ -58,8 +60,7 @@ typedef struct
 } substitute_actual_parameters_context;
 
 static bool contain_agg_clause_walker(Node *node, void *context);
-static bool contain_distinct_agg_clause_walker(Node *node, void *context);
-static bool count_agg_clause_walker(Node *node, int *count);
+static bool count_agg_clauses_walker(Node *node, AggClauseCounts *counts);
 static bool expression_returns_set_walker(Node *node, void *context);
 static bool contain_subplans_walker(Node *node, void *context);
 static bool contain_mutable_functions_walker(Node *node, void *context);
@@ -358,71 +359,108 @@ contain_agg_clause_walker(Node *node, void *context)
 }
 
 /*
- * contain_distinct_agg_clause
- *	  Recursively search for DISTINCT Aggref nodes within a clause.
+ * count_agg_clauses
+ *	  Recursively count the Aggref nodes in an expression tree.
+ *
+ *	  Note: this also checks for nested aggregates, which are an error.
  *
- *	  Returns true if any DISTINCT aggregate found.
+ * We not only count the nodes, but attempt to estimate the total space
+ * needed for their transition state values if all are evaluated in parallel
+ * (as would be done in a HashAgg plan).  See AggClauseCounts for the exact
+ * set of statistics returned.
+ *
+ * NOTE that the counts are ADDED to those already in *counts ... so the
+ * caller is responsible for zeroing the struct initially.
  *
  * This does not descend into subqueries, and so should be used only after
  * reduction of sublinks to subplans, or in contexts where it's known there
  * are no subqueries.  There mustn't be outer-aggregate references either.
  */
-bool
-contain_distinct_agg_clause(Node *clause)
+void
+count_agg_clauses(Node *clause, AggClauseCounts *counts)
 {
-	return contain_distinct_agg_clause_walker(clause, NULL);
+	/* no setup needed */
+	count_agg_clauses_walker(clause, counts);
 }
 
 static bool
-contain_distinct_agg_clause_walker(Node *node, void *context)
+count_agg_clauses_walker(Node *node, AggClauseCounts *counts)
 {
 	if (node == NULL)
 		return false;
 	if (IsA(node, Aggref))
 	{
-		Assert(((Aggref *) node)->agglevelsup == 0);
-		if (((Aggref *) node)->aggdistinct)
-			return true;		/* abort the tree traversal and return
-								 * true */
-	}
-	Assert(!IsA(node, SubLink));
-	return expression_tree_walker(node, contain_distinct_agg_clause_walker, context);
-}
+		Aggref	   *aggref = (Aggref *) node;
+		Oid			inputType;
+		HeapTuple	aggTuple;
+		Form_pg_aggregate aggform;
+		Oid			aggtranstype;
+
+		Assert(aggref->agglevelsup == 0);
+		counts->numAggs++;
+		if (aggref->aggdistinct)
+			counts->numDistinctAggs++;
+
+		inputType = exprType((Node *) aggref->target);
+
+		/* fetch aggregate transition datatype from pg_aggregate */
+		aggTuple = SearchSysCache(AGGFNOID,
+								  ObjectIdGetDatum(aggref->aggfnoid),
+								  0, 0, 0);
+		if (!HeapTupleIsValid(aggTuple))
+			elog(ERROR, "cache lookup failed for aggregate %u",
+				 aggref->aggfnoid);
+		aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
+		aggtranstype = aggform->aggtranstype;
+		ReleaseSysCache(aggTuple);
+
+		/* resolve actual type of transition state, if polymorphic */
+		if (aggtranstype == ANYARRAYOID || aggtranstype == ANYELEMENTOID)
+		{
+			/* have to fetch the agg's declared input type... */
+			Oid			agg_arg_types[FUNC_MAX_ARGS];
+			int			agg_nargs;
+
+			(void) get_func_signature(aggref->aggfnoid,
+									  agg_arg_types, &agg_nargs);
+			Assert(agg_nargs == 1);
+			aggtranstype = resolve_generic_type(aggtranstype,
+												inputType,
+												agg_arg_types[0]);
+		}
 
-/*
- * count_agg_clause
- *	  Recursively count the Aggref nodes in an expression tree.
- *
- *	  Note: this also checks for nested aggregates, which are an error.
- *
- * This does not descend into subqueries, and so should be used only after
- * reduction of sublinks to subplans, or in contexts where it's known there
- * are no subqueries.  There mustn't be outer-aggregate references either.
- */
-int
-count_agg_clause(Node *clause)
-{
-	int			result = 0;
+		/*
+		 * If the transition type is pass-by-value then it doesn't add
+		 * anything to the required size of the hashtable.  If it is
+		 * pass-by-reference then we have to add the estimated size of
+		 * the value itself, plus palloc overhead.
+		 */
+		if (!get_typbyval(aggtranstype))
+		{
+			int32		aggtranstypmod;
+			int32		avgwidth;
 
-	count_agg_clause_walker(clause, &result);
-	return result;
-}
+			/*
+			 * If transition state is of same type as input, assume it's the
+			 * same typmod (same width) as well.  This works for cases like
+			 * MAX/MIN and is probably somewhat reasonable otherwise.
+			 */
+			if (aggtranstype == inputType)
+				aggtranstypmod = exprTypmod((Node *) aggref->target);
+			else
+				aggtranstypmod = -1;
 
-static bool
-count_agg_clause_walker(Node *node, int *count)
-{
-	if (node == NULL)
-		return false;
-	if (IsA(node, Aggref))
-	{
-		Assert(((Aggref *) node)->agglevelsup == 0);
-		(*count)++;
+			avgwidth = get_typavgwidth(aggtranstype, aggtranstypmod);
+			avgwidth = MAXALIGN(avgwidth);
+
+			counts->transitionSpace += avgwidth + 2 * sizeof(void *);
+		}
 
 		/*
 		 * Complain if the aggregate's argument contains any aggregates;
 		 * nested agg functions are semantically nonsensical.
 		 */
-		if (contain_agg_clause((Node *) ((Aggref *) node)->target))
+		if (contain_agg_clause((Node *) aggref->target))
 			ereport(ERROR,
 					(errcode(ERRCODE_GROUPING_ERROR),
 				  errmsg("aggregate function calls may not be nested")));
@@ -433,8 +471,8 @@ count_agg_clause_walker(Node *node, int *count)
 		return false;
 	}
 	Assert(!IsA(node, SubLink));
-	return expression_tree_walker(node, count_agg_clause_walker,
-								  (void *) count);
+	return expression_tree_walker(node, count_agg_clauses_walker,
+								  (void *) counts);
 }
 
 
diff --git a/src/include/executor/nodeAgg.h b/src/include/executor/nodeAgg.h
index 32449c22aa..cb9631ea26 100644
--- a/src/include/executor/nodeAgg.h
+++ b/src/include/executor/nodeAgg.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/executor/nodeAgg.h,v 1.23 2004/12/31 22:03:29 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/executor/nodeAgg.h,v 1.23.4.1 2005/01/28 19:36:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -23,6 +23,8 @@ extern TupleTableSlot *ExecAgg(AggState *node);
 extern void ExecEndAgg(AggState *node);
 extern void ExecReScanAgg(AggState *node, ExprContext *exprCtxt);
 
+extern Size hash_agg_entry_size(int numAggs);
+
 extern Datum aggregate_dummy(PG_FUNCTION_ARGS);
 
 #endif   /* NODEAGG_H */
diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h
index 368c69f68b..a3ea447746 100644
--- a/src/include/optimizer/clauses.h
+++ b/src/include/optimizer/clauses.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.77 2004/12/31 22:03:36 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.77.4.1 2005/01/28 19:36:33 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -17,11 +17,17 @@
 #include "nodes/relation.h"
 
 
-
 #define is_opclause(clause)		((clause) != NULL && IsA(clause, OpExpr))
 #define is_funcclause(clause)	((clause) != NULL && IsA(clause, FuncExpr))
 #define is_subplan(clause)		((clause) != NULL && IsA(clause, SubPlan))
 
+typedef struct
+{
+	int			numAggs;			/* total number of aggregate calls */
+	int			numDistinctAggs;	/* number that use DISTINCT */
+	Size		transitionSpace;	/* for pass-by-ref transition data */
+} AggClauseCounts;
+
 
 extern Expr *make_opclause(Oid opno, Oid opresulttype, bool opretset,
 			  Expr *leftop, Expr *rightop);
@@ -42,8 +48,7 @@ extern Expr *make_ands_explicit(List *andclauses);
 extern List *make_ands_implicit(Expr *clause);
 
 extern bool contain_agg_clause(Node *clause);
-extern bool contain_distinct_agg_clause(Node *clause);
-extern int	count_agg_clause(Node *clause);
+extern void count_agg_clauses(Node *clause, AggClauseCounts *counts);
 
 extern bool expression_returns_set(Node *clause);
author	Tom Lane <tgl@sss.pgh.pa.us>	2005-01-28 19:36:33 +0000
committer	Tom Lane <tgl@sss.pgh.pa.us>	2005-01-28 19:36:33 +0000
commit	a098f533d15efe5552c0b6cd661cc123231c8c54 (patch)
tree	ebe55a8f91b584c66805ad50600647a1a756d2c4
parent	af5cd5ba92036f07f8414b15ec96da096ded8d7e (diff)
download	postgresql-a098f533d15efe5552c0b6cd661cc123231c8c54.tar.gz