Build out the planner support function infrastructure.

Add support function requests for estimating the selectivity, cost, and number of result rows (if a SRF) of the target function. The lack of a way to estimate selectivity of a boolean-returning function in WHERE has been a recognized deficiency of the planner since Berkeley days. This commit finally fixes it. In addition, non-constant estimates of cost and number of output rows are now possible. We still fall back to looking at procost and prorows if the support function doesn't service the request, of course. To make concrete use of the possibility of estimating output rowcount for SRFs, this commit adds support functions for array_unnest(anyarray) and the integer variants of generate_series; the lack of plausible rowcount estimates for those, even when it's obvious to a human, has been a repeated subject of complaints. Obviously, much more could now be done in this line, but I'm mostly just trying to get the infrastructure in place. Discussion: https://postgr.es/m/15193.1548028093@sss.pgh.pa.us
author: Tom Lane <tgl@sss.pgh.pa.us> 2019-02-09 18:32:23 -0500
committer: Tom Lane <tgl@sss.pgh.pa.us> 2019-02-09 18:32:23 -0500
commit: a391ff3c3d418e404a2c6e4ff0865a107752827b (patch)
tree: f076c5785d06e7ccf082b08c1651b17d95af4563 /src/backend/optimizer/util
parent: 1fb57af92069ee104c09e2016af9e0e620681be3 (diff)
download: postgresql-a391ff3c3d418e404a2c6e4ff0865a107752827b.tar.gz
3 files changed, 199 insertions, 12 deletions
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 002c29a5f5..86e4753a5b 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -36,8 +36,8 @@
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/optimizer.h"
+#include "optimizer/plancat.h"
 #include "optimizer/planmain.h"
-#include "optimizer/prep.h"
 #include "parser/analyze.h"
 #include "parser/parse_agg.h"
 #include "parser/parse_coerce.h"
@@ -343,19 +343,24 @@ get_agg_clause_costs_walker(Node *node, get_agg_clause_costs_context *context)
 		if (DO_AGGSPLIT_COMBINE(context->aggsplit))
 		{
 			/* charge for combining previously aggregated states */
-			costs->transCost.per_tuple += get_func_cost(aggcombinefn) * cpu_operator_cost;
+			add_function_cost(context->root, aggcombinefn, NULL,
+							  &costs->transCost);
 		}
 		else
-			costs->transCost.per_tuple += get_func_cost(aggtransfn) * cpu_operator_cost;
+			add_function_cost(context->root, aggtransfn, NULL,
+							  &costs->transCost);
 		if (DO_AGGSPLIT_DESERIALIZE(context->aggsplit) &&
 			OidIsValid(aggdeserialfn))
-			costs->transCost.per_tuple += get_func_cost(aggdeserialfn) * cpu_operator_cost;
+			add_function_cost(context->root, aggdeserialfn, NULL,
+							  &costs->transCost);
 		if (DO_AGGSPLIT_SERIALIZE(context->aggsplit) &&
 			OidIsValid(aggserialfn))
-			costs->finalCost += get_func_cost(aggserialfn) * cpu_operator_cost;
+			add_function_cost(context->root, aggserialfn, NULL,
+							  &costs->finalCost);
 		if (!DO_AGGSPLIT_SKIPFINAL(context->aggsplit) &&
 			OidIsValid(aggfinalfn))
-			costs->finalCost += get_func_cost(aggfinalfn) * cpu_operator_cost;
+			add_function_cost(context->root, aggfinalfn, NULL,
+							  &costs->finalCost);
 
 		/*
 		 * These costs are incurred only by the initial aggregate node, so we
@@ -392,8 +397,8 @@ get_agg_clause_costs_walker(Node *node, get_agg_clause_costs_context *context)
 		{
 			cost_qual_eval_node(&argcosts, (Node *) aggref->aggdirectargs,
 								context->root);
-			costs->transCost.startup += argcosts.startup;
-			costs->finalCost += argcosts.per_tuple;
+			costs->finalCost.startup += argcosts.startup;
+			costs->finalCost.per_tuple += argcosts.per_tuple;
 		}
 
 		/*
@@ -561,7 +566,7 @@ find_window_functions_walker(Node *node, WindowFuncLists *lists)
  * Note: keep this in sync with expression_returns_set() in nodes/nodeFuncs.c.
  */
 double
-expression_returns_set_rows(Node *clause)
+expression_returns_set_rows(PlannerInfo *root, Node *clause)
 {
 	if (clause == NULL)
 		return 1.0;
@@ -570,7 +575,7 @@ expression_returns_set_rows(Node *clause)
 		FuncExpr   *expr = (FuncExpr *) clause;
 
 		if (expr->funcretset)
-			return clamp_row_est(get_func_rows(expr->funcid));
+			return clamp_row_est(get_function_rows(root, expr->funcid, clause));
 	}
 	if (IsA(clause, OpExpr))
 	{
@@ -579,7 +584,7 @@ expression_returns_set_rows(Node *clause)
 		if (expr->opretset)
 		{
 			set_opfuncid(expr);
-			return clamp_row_est(get_func_rows(expr->opfuncid));
+			return clamp_row_est(get_function_rows(root, expr->opfuncid, clause));
 		}
 	}
 	return 1.0;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index a3e64110d3..169e51e792 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -2711,7 +2711,7 @@ create_set_projection_path(PlannerInfo *root,
 		Node	   *node = (Node *) lfirst(lc);
 		double		itemrows;
 
-		itemrows = expression_returns_set_rows(node);
+		itemrows = expression_returns_set_rows(root, node);
 		if (tlist_rows < itemrows)
 			tlist_rows = itemrows;
 	}
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 3efa1bdc1a..d6dc83ca80 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -29,10 +29,12 @@
 #include "catalog/heap.h"
 #include "catalog/partition.h"
 #include "catalog/pg_am.h"
+#include "catalog/pg_proc.h"
 #include "catalog/pg_statistic_ext.h"
 #include "foreign/fdwapi.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
+#include "nodes/supportnodes.h"
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/optimizer.h"
@@ -1772,6 +1774,8 @@ restriction_selectivity(PlannerInfo *root,
  * Returns the selectivity of a specified join operator clause.
  * This code executes registered procedures stored in the
  * operator relation, by calling the function manager.
+ *
+ * See clause_selectivity() for the meaning of the additional parameters.
  */
 Selectivity
 join_selectivity(PlannerInfo *root,
@@ -1806,6 +1810,184 @@ join_selectivity(PlannerInfo *root,
 }
 
 /*
+ * function_selectivity
+ *
+ * Returns the selectivity of a specified boolean function clause.
+ * This code executes registered procedures stored in the
+ * pg_proc relation, by calling the function manager.
+ *
+ * See clause_selectivity() for the meaning of the additional parameters.
+ */
+Selectivity
+function_selectivity(PlannerInfo *root,
+					 Oid funcid,
+					 List *args,
+					 Oid inputcollid,
+					 bool is_join,
+					 int varRelid,
+					 JoinType jointype,
+					 SpecialJoinInfo *sjinfo)
+{
+	RegProcedure prosupport = get_func_support(funcid);
+	SupportRequestSelectivity req;
+	SupportRequestSelectivity *sresult;
+
+	/*
+	 * If no support function is provided, use our historical default
+	 * estimate, 0.3333333.  This seems a pretty unprincipled choice, but
+	 * Postgres has been using that estimate for function calls since 1992.
+	 * The hoariness of this behavior suggests that we should not be in too
+	 * much hurry to use another value.
+	 */
+	if (!prosupport)
+		return (Selectivity) 0.3333333;
+
+	req.type = T_SupportRequestSelectivity;
+	req.root = root;
+	req.funcid = funcid;
+	req.args = args;
+	req.inputcollid = inputcollid;
+	req.is_join = is_join;
+	req.varRelid = varRelid;
+	req.jointype = jointype;
+	req.sjinfo = sjinfo;
+	req.selectivity = -1;		/* to catch failure to set the value */
+
+	sresult = (SupportRequestSelectivity *)
+		DatumGetPointer(OidFunctionCall1(prosupport,
+										 PointerGetDatum(&req)));
+
+	/* If support function fails, use default */
+	if (sresult != &req)
+		return (Selectivity) 0.3333333;
+
+	if (req.selectivity < 0.0 || req.selectivity > 1.0)
+		elog(ERROR, "invalid function selectivity: %f", req.selectivity);
+
+	return (Selectivity) req.selectivity;
+}
+
+/*
+ * add_function_cost
+ *
+ * Get an estimate of the execution cost of a function, and *add* it to
+ * the contents of *cost.  The estimate may include both one-time and
+ * per-tuple components, since QualCost does.
+ *
+ * The funcid must always be supplied.  If it is being called as the
+ * implementation of a specific parsetree node (FuncExpr, OpExpr,
+ * WindowFunc, etc), pass that as "node", else pass NULL.
+ *
+ * In some usages root might be NULL, too.
+ */
+void
+add_function_cost(PlannerInfo *root, Oid funcid, Node *node,
+				  QualCost *cost)
+{
+	HeapTuple	proctup;
+	Form_pg_proc procform;
+
+	proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid));
+	if (!HeapTupleIsValid(proctup))
+		elog(ERROR, "cache lookup failed for function %u", funcid);
+	procform = (Form_pg_proc) GETSTRUCT(proctup);
+
+	if (OidIsValid(procform->prosupport))
+	{
+		SupportRequestCost req;
+		SupportRequestCost *sresult;
+
+		req.type = T_SupportRequestCost;
+		req.root = root;
+		req.funcid = funcid;
+		req.node = node;
+
+		/* Initialize cost fields so that support function doesn't have to */
+		req.startup = 0;
+		req.per_tuple = 0;
+
+		sresult = (SupportRequestCost *)
+			DatumGetPointer(OidFunctionCall1(procform->prosupport,
+											 PointerGetDatum(&req)));
+
+		if (sresult == &req)
+		{
+			/* Success, so accumulate support function's estimate into *cost */
+			cost->startup += req.startup;
+			cost->per_tuple += req.per_tuple;
+			ReleaseSysCache(proctup);
+			return;
+		}
+	}
+
+	/* No support function, or it failed, so rely on procost */
+	cost->per_tuple += procform->procost * cpu_operator_cost;
+
+	ReleaseSysCache(proctup);
+}
+
+/*
+ * get_function_rows
+ *
+ * Get an estimate of the number of rows returned by a set-returning function.
+ *
+ * The funcid must always be supplied.  In current usage, the calling node
+ * will always be supplied, and will be either a FuncExpr or OpExpr.
+ * But it's a good idea to not fail if it's NULL.
+ *
+ * In some usages root might be NULL, too.
+ *
+ * Note: this returns the unfiltered result of the support function, if any.
+ * It's usually a good idea to apply clamp_row_est() to the result, but we
+ * leave it to the caller to do so.
+ */
+double
+get_function_rows(PlannerInfo *root, Oid funcid, Node *node)
+{
+	HeapTuple	proctup;
+	Form_pg_proc procform;
+	double		result;
+
+	proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid));
+	if (!HeapTupleIsValid(proctup))
+		elog(ERROR, "cache lookup failed for function %u", funcid);
+	procform = (Form_pg_proc) GETSTRUCT(proctup);
+
+	Assert(procform->proretset);	/* else caller error */
+
+	if (OidIsValid(procform->prosupport))
+	{
+		SupportRequestRows req;
+		SupportRequestRows *sresult;
+
+		req.type = T_SupportRequestRows;
+		req.root = root;
+		req.funcid = funcid;
+		req.node = node;
+
+		req.rows = 0;			/* just for sanity */
+
+		sresult = (SupportRequestRows *)
+			DatumGetPointer(OidFunctionCall1(procform->prosupport,
+											 PointerGetDatum(&req)));
+
+		if (sresult == &req)
+		{
+			/* Success */
+			ReleaseSysCache(proctup);
+			return req.rows;
+		}
+	}
+
+	/* No support function, or it failed, so rely on prorows */
+	result = procform->prorows;
+
+	ReleaseSysCache(proctup);
+
+	return result;
+}
+
+/*
  * has_unique_index
  *
  * Detect whether there is a unique index on the specified attribute
author	Tom Lane <tgl@sss.pgh.pa.us>	2019-02-09 18:32:23 -0500
committer	Tom Lane <tgl@sss.pgh.pa.us>	2019-02-09 18:32:23 -0500
commit	a391ff3c3d418e404a2c6e4ff0865a107752827b (patch)
tree	f076c5785d06e7ccf082b08c1651b17d95af4563 /src/backend/optimizer/util
parent	1fb57af92069ee104c09e2016af9e0e620681be3 (diff)
download	postgresql-a391ff3c3d418e404a2c6e4ff0865a107752827b.tar.gz