summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Rowley <drowley@postgresql.org>2022-08-02 23:11:45 +1200
committerDavid Rowley <drowley@postgresql.org>2022-08-02 23:11:45 +1200
commit1349d2790bf48a4de072931c722f39337e72055e (patch)
tree3b525f30da6d37513522cdb5ea34ce14b653de87
parenta69959fab2f3633992b5cabec85acecbac6074c8 (diff)
downloadpostgresql-1349d2790bf48a4de072931c722f39337e72055e.tar.gz
Improve performance of ORDER BY / DISTINCT aggregates
ORDER BY / DISTINCT aggreagtes have, since implemented in Postgres, been executed by always performing a sort in nodeAgg.c to sort the tuples in the current group into the correct order before calling the transition function on the sorted tuples. This was not great as often there might be an index that could have provided pre-sorted input and allowed the transition functions to be called as the rows come in, rather than having to store them in a tuplestore in order to sort them once all the tuples for the group have arrived. Here we change the planner so it requests a path with a sort order which supports the most amount of ORDER BY / DISTINCT aggregate functions and add new code to the executor to allow it to support the processing of ORDER BY / DISTINCT aggregates where the tuples are already sorted in the correct order. Since there can be many ORDER BY / DISTINCT aggregates in any given query level, it's very possible that we can't find an order that suits all of these aggregates. The sort order that the planner chooses is simply the one that suits the most aggregate functions. We take the most strictly sorted variation of each order and see how many aggregate functions can use that, then we try again with the order of the remaining aggregates to see if another order would suit more aggregate functions. For example: SELECT agg(a ORDER BY a),agg2(a ORDER BY a,b) ... would request the sort order to be {a, b} because {a} is a subset of the sort order of {a,b}, but; SELECT agg(a ORDER BY a),agg2(a ORDER BY c) ... would just pick a plan ordered by {a} (we give precedence to aggregates which are earlier in the targetlist). SELECT agg(a ORDER BY a),agg2(a ORDER BY b),agg3(a ORDER BY b) ... would choose to order by {b} since two aggregates suit that vs just one that requires input ordered by {a}. Author: David Rowley Reviewed-by: Ronan Dunklau, James Coleman, Ranier Vilela, Richard Guo, Tom Lane Discussion: https://postgr.es/m/CAApHDvpHzfo92%3DR4W0%2BxVua3BUYCKMckWAmo-2t_KiXN-wYH%3Dw%40mail.gmail.com
-rw-r--r--contrib/postgres_fdw/expected/postgres_fdw.out32
-rw-r--r--contrib/postgres_fdw/sql/postgres_fdw.sql2
-rw-r--r--src/backend/executor/execExpr.c52
-rw-r--r--src/backend/executor/execExprInterp.c102
-rw-r--r--src/backend/executor/nodeAgg.c34
-rw-r--r--src/backend/jit/llvm/llvmjit_expr.c48
-rw-r--r--src/backend/jit/llvm/llvmjit_types.c2
-rw-r--r--src/backend/optimizer/path/pathkeys.c45
-rw-r--r--src/backend/optimizer/plan/planagg.c2
-rw-r--r--src/backend/optimizer/plan/planner.c310
-rw-r--r--src/backend/optimizer/prep/prepagg.c7
-rw-r--r--src/backend/parser/parse_expr.c1
-rw-r--r--src/backend/parser/parse_func.c1
-rw-r--r--src/include/catalog/catversion.h2
-rw-r--r--src/include/executor/execExpr.h17
-rw-r--r--src/include/executor/nodeAgg.h8
-rw-r--r--src/include/nodes/pathnodes.h16
-rw-r--r--src/include/nodes/primnodes.h7
-rw-r--r--src/include/optimizer/paths.h4
-rw-r--r--src/test/regress/expected/aggregates.out80
-rw-r--r--src/test/regress/expected/partition_aggregate.out118
-rw-r--r--src/test/regress/expected/sqljson.out12
-rw-r--r--src/test/regress/expected/tuplesort.out42
-rw-r--r--src/test/regress/sql/aggregates.sql43
24 files changed, 849 insertions, 138 deletions
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index ade797159d..52d0c3f3ed 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -3295,15 +3295,18 @@ create operator class my_op_class for type int using btree family my_op_family a
-- extension yet.
explain (verbose, costs off)
select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
- QUERY PLAN
---------------------------------------------------------------------------------------------
+ QUERY PLAN
+--------------------------------------------------------------------------------------------------
GroupAggregate
Output: array_agg(c1 ORDER BY c1 USING <^ NULLS LAST), c2
Group Key: ft2.c2
- -> Foreign Scan on public.ft2
- Output: c1, c2
- Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
-(6 rows)
+ -> Sort
+ Output: c2, c1
+ Sort Key: ft2.c1 USING <^
+ -> Foreign Scan on public.ft2
+ Output: c2, c1
+ Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
+(9 rows)
-- This should not be pushed either.
explain (verbose, costs off)
@@ -3329,6 +3332,7 @@ alter extension postgres_fdw add operator public.=^(int, int);
alter extension postgres_fdw add operator public.>^(int, int);
alter server loopback options (set extensions 'postgres_fdw');
-- Now this will be pushed as sort operator is part of the extension.
+alter server loopback options (add fdw_tuple_cost '0.5');
explain (verbose, costs off)
select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
QUERY PLAN
@@ -3345,6 +3349,7 @@ select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6
{6,16,26,36,46,56,66,76,86,96}
(1 row)
+alter server loopback options (drop fdw_tuple_cost);
-- This should be pushed too.
explain (verbose, costs off)
select * from ft2 order by c1 using operator(public.<^);
@@ -3366,15 +3371,18 @@ alter server loopback options (set extensions 'postgres_fdw');
-- This will not be pushed as sort operator is now removed from the extension.
explain (verbose, costs off)
select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
- QUERY PLAN
---------------------------------------------------------------------------------------------
+ QUERY PLAN
+--------------------------------------------------------------------------------------------------
GroupAggregate
Output: array_agg(c1 ORDER BY c1 USING <^ NULLS LAST), c2
Group Key: ft2.c2
- -> Foreign Scan on public.ft2
- Output: c1, c2
- Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
-(6 rows)
+ -> Sort
+ Output: c2, c1
+ Sort Key: ft2.c1 USING <^
+ -> Foreign Scan on public.ft2
+ Output: c2, c1
+ Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
+(9 rows)
-- Cleanup
drop operator class my_op_class using btree;
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index b7817c5a41..2a250ee632 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -943,9 +943,11 @@ alter extension postgres_fdw add operator public.>^(int, int);
alter server loopback options (set extensions 'postgres_fdw');
-- Now this will be pushed as sort operator is part of the extension.
+alter server loopback options (add fdw_tuple_cost '0.5');
explain (verbose, costs off)
select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
+alter server loopback options (drop fdw_tuple_cost);
-- This should be pushed too.
explain (verbose, costs off)
diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index c8d7145fe3..d0a57c7aae 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -3666,13 +3666,17 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
scratch.resnull = &state->resnull;
}
argno++;
+
+ Assert(pertrans->numInputs == argno);
}
- else if (pertrans->numSortCols == 0)
+ else if (!pertrans->aggsortrequired)
{
ListCell *arg;
/*
- * Normal transition function without ORDER BY / DISTINCT.
+ * Normal transition function without ORDER BY / DISTINCT or with
+ * ORDER BY / DISTINCT but the planner has given us pre-sorted
+ * input.
*/
strictargs = trans_fcinfo->args + 1;
@@ -3681,6 +3685,13 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
TargetEntry *source_tle = (TargetEntry *) lfirst(arg);
/*
+ * Don't initialize args for any ORDER BY clause that might
+ * exist in a presorted aggregate.
+ */
+ if (argno == pertrans->numTransInputs)
+ break;
+
+ /*
* Start from 1, since the 0th arg will be the transition
* value
*/
@@ -3689,11 +3700,13 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
&trans_fcinfo->args[argno + 1].isnull);
argno++;
}
+ Assert(pertrans->numTransInputs == argno);
}
else if (pertrans->numInputs == 1)
{
/*
- * DISTINCT and/or ORDER BY case, with a single column sorted on.
+ * Non-presorted DISTINCT and/or ORDER BY case, with a single
+ * column sorted on.
*/
TargetEntry *source_tle =
(TargetEntry *) linitial(pertrans->aggref->args);
@@ -3705,11 +3718,14 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
&state->resnull);
strictnulls = &state->resnull;
argno++;
+
+ Assert(pertrans->numInputs == argno);
}
else
{
/*
- * DISTINCT and/or ORDER BY case, with multiple columns sorted on.
+ * Non-presorted DISTINCT and/or ORDER BY case, with multiple
+ * columns sorted on.
*/
Datum *values = pertrans->sortslot->tts_values;
bool *nulls = pertrans->sortslot->tts_isnull;
@@ -3725,8 +3741,8 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
&values[argno], &nulls[argno]);
argno++;
}
+ Assert(pertrans->numInputs == argno);
}
- Assert(pertrans->numInputs == argno);
/*
* For a strict transfn, nothing happens when there's a NULL input; we
@@ -3748,6 +3764,21 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
state->steps_len - 1);
}
+ /* Handle DISTINCT aggregates which have pre-sorted input */
+ if (pertrans->numDistinctCols > 0 && !pertrans->aggsortrequired)
+ {
+ if (pertrans->numDistinctCols > 1)
+ scratch.opcode = EEOP_AGG_PRESORTED_DISTINCT_MULTI;
+ else
+ scratch.opcode = EEOP_AGG_PRESORTED_DISTINCT_SINGLE;
+
+ scratch.d.agg_presorted_distinctcheck.pertrans = pertrans;
+ scratch.d.agg_presorted_distinctcheck.jumpdistinct = -1; /* adjust later */
+ ExprEvalPushStep(state, &scratch);
+ adjust_bailout = lappend_int(adjust_bailout,
+ state->steps_len - 1);
+ }
+
/*
* Call transition function (once for each concurrently evaluated
* grouping set). Do so for both sort and hash based computations, as
@@ -3808,6 +3839,12 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
Assert(as->d.agg_deserialize.jumpnull == -1);
as->d.agg_deserialize.jumpnull = state->steps_len;
}
+ else if (as->opcode == EEOP_AGG_PRESORTED_DISTINCT_SINGLE ||
+ as->opcode == EEOP_AGG_PRESORTED_DISTINCT_MULTI)
+ {
+ Assert(as->d.agg_presorted_distinctcheck.jumpdistinct == -1);
+ as->d.agg_presorted_distinctcheck.jumpdistinct = state->steps_len;
+ }
else
Assert(false);
}
@@ -3857,7 +3894,8 @@ ExecBuildAggTransCall(ExprState *state, AggState *aggstate,
/*
* Determine appropriate transition implementation.
*
- * For non-ordered aggregates:
+ * For non-ordered aggregates and ORDER BY / DISTINCT aggregates with
+ * presorted input:
*
* If the initial value for the transition state doesn't exist in the
* pg_aggregate table then we will let the first non-NULL value returned
@@ -3887,7 +3925,7 @@ ExecBuildAggTransCall(ExprState *state, AggState *aggstate,
* process_ordered_aggregate_{single, multi} and
* advance_transition_function.
*/
- if (pertrans->numSortCols == 0)
+ if (!pertrans->aggsortrequired)
{
if (pertrans->transtypeByVal)
{
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 723770fda0..636794ca6f 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -502,6 +502,8 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
&&CASE_EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYREF,
&&CASE_EEOP_AGG_PLAIN_TRANS_STRICT_BYREF,
&&CASE_EEOP_AGG_PLAIN_TRANS_BYREF,
+ &&CASE_EEOP_AGG_PRESORTED_DISTINCT_SINGLE,
+ &&CASE_EEOP_AGG_PRESORTED_DISTINCT_MULTI,
&&CASE_EEOP_AGG_ORDERED_TRANS_DATUM,
&&CASE_EEOP_AGG_ORDERED_TRANS_TUPLE,
&&CASE_EEOP_LAST
@@ -1786,6 +1788,28 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
EEO_NEXT();
}
+ EEO_CASE(EEOP_AGG_PRESORTED_DISTINCT_SINGLE)
+ {
+ AggStatePerTrans pertrans = op->d.agg_presorted_distinctcheck.pertrans;
+ AggState *aggstate = castNode(AggState, state->parent);
+
+ if (ExecEvalPreOrderedDistinctSingle(aggstate, pertrans))
+ EEO_NEXT();
+ else
+ EEO_JUMP(op->d.agg_presorted_distinctcheck.jumpdistinct);
+ }
+
+ EEO_CASE(EEOP_AGG_PRESORTED_DISTINCT_MULTI)
+ {
+ AggState *aggstate = castNode(AggState, state->parent);
+ AggStatePerTrans pertrans = op->d.agg_presorted_distinctcheck.pertrans;
+
+ if (ExecEvalPreOrderedDistinctMulti(aggstate, pertrans))
+ EEO_NEXT();
+ else
+ EEO_JUMP(op->d.agg_presorted_distinctcheck.jumpdistinct);
+ }
+
/* process single-column ordered aggregate datum */
EEO_CASE(EEOP_AGG_ORDERED_TRANS_DATUM)
{
@@ -4403,6 +4427,84 @@ ExecAggTransReparent(AggState *aggstate, AggStatePerTrans pertrans,
}
/*
+ * ExecEvalPreOrderedDistinctSingle
+ * Returns true when the aggregate transition value Datum is distinct
+ * from the previous input Datum and returns false when the input Datum
+ * matches the previous input Datum.
+ */
+bool
+ExecEvalPreOrderedDistinctSingle(AggState *aggstate, AggStatePerTrans pertrans)
+{
+ Datum value = pertrans->transfn_fcinfo->args[1].value;
+ bool isnull = pertrans->transfn_fcinfo->args[1].isnull;
+
+ if (!pertrans->haslast ||
+ pertrans->lastisnull != isnull ||
+ !DatumGetBool(FunctionCall2Coll(&pertrans->equalfnOne,
+ pertrans->aggCollation,
+ pertrans->lastdatum, value)))
+ {
+ if (pertrans->haslast && !pertrans->inputtypeByVal)
+ pfree(DatumGetPointer(pertrans->lastdatum));
+
+ pertrans->haslast = true;
+ if (!isnull)
+ {
+ MemoryContext oldContext;
+
+ oldContext = MemoryContextSwitchTo(aggstate->curaggcontext->ecxt_per_tuple_memory);
+
+ pertrans->lastdatum = datumCopy(value, pertrans->inputtypeByVal,
+ pertrans->inputtypeLen);
+
+ MemoryContextSwitchTo(oldContext);
+ }
+ else
+ pertrans->lastdatum = (Datum) 0;
+ pertrans->lastisnull = isnull;
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * ExecEvalPreOrderedDistinctMulti
+ * Returns true when the aggregate input is distinct from the previous
+ * input and returns false when the input matches the previous input.
+ */
+bool
+ExecEvalPreOrderedDistinctMulti(AggState *aggstate, AggStatePerTrans pertrans)
+{
+ ExprContext *tmpcontext = aggstate->tmpcontext;
+
+ for (int i = 0; i < pertrans->numTransInputs; i++)
+ {
+ pertrans->sortslot->tts_values[i] = pertrans->transfn_fcinfo->args[i + 1].value;
+ pertrans->sortslot->tts_isnull[i] = pertrans->transfn_fcinfo->args[i + 1].isnull;
+ }
+
+ ExecClearTuple(pertrans->sortslot);
+ pertrans->sortslot->tts_nvalid = pertrans->numInputs;
+ ExecStoreVirtualTuple(pertrans->sortslot);
+
+ tmpcontext->ecxt_outertuple = pertrans->sortslot;
+ tmpcontext->ecxt_innertuple = pertrans->uniqslot;
+
+ if (!pertrans->haslast ||
+ !ExecQual(pertrans->equalfnMulti, tmpcontext))
+ {
+ if (pertrans->haslast)
+ ExecClearTuple(pertrans->uniqslot);
+
+ pertrans->haslast = true;
+ ExecCopySlot(pertrans->uniqslot, pertrans->sortslot);
+ return true;
+ }
+ return false;
+}
+
+/*
* Invoke ordered transition function, with a datum argument.
*/
void
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 2fc606cf29..96d200e446 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -583,7 +583,7 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans,
/*
* Start a fresh sort operation for each DISTINCT/ORDER BY aggregate.
*/
- if (pertrans->numSortCols > 0)
+ if (pertrans->aggsortrequired)
{
/*
* In case of rescan, maybe there could be an uncompleted sort
@@ -1309,7 +1309,7 @@ finalize_aggregates(AggState *aggstate,
pergroupstate = &pergroup[transno];
- if (pertrans->numSortCols > 0)
+ if (pertrans->aggsortrequired)
{
Assert(aggstate->aggstrategy != AGG_HASHED &&
aggstate->aggstrategy != AGG_MIXED);
@@ -1323,6 +1323,21 @@ finalize_aggregates(AggState *aggstate,
pertrans,
pergroupstate);
}
+ else if (pertrans->numDistinctCols > 0 && pertrans->haslast)
+ {
+ pertrans->haslast = false;
+
+ if (pertrans->numDistinctCols == 1)
+ {
+ if (!pertrans->inputtypeByVal && !pertrans->lastisnull)
+ pfree(DatumGetPointer(pertrans->lastdatum));
+
+ pertrans->lastisnull = false;
+ pertrans->lastdatum = (Datum) 0;
+ }
+ else
+ ExecClearTuple(pertrans->uniqslot);
+ }
}
/*
@@ -4127,6 +4142,12 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
* stick them into arrays. We ignore ORDER BY for an ordered-set agg,
* however; the agg's transfn and finalfn are responsible for that.
*
+ * When the planner has set the aggpresorted flag, the input to the
+ * aggregate is already correctly sorted. For ORDER BY aggregates we can
+ * simply treat these as normal aggregates. For presorted DISTINCT
+ * aggregates an extra step must be added to remove duplicate consecutive
+ * inputs.
+ *
* Note that by construction, if there is a DISTINCT clause then the ORDER
* BY clause is a prefix of it (see transformDistinctClause).
*/
@@ -4134,18 +4155,27 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
{
sortlist = NIL;
numSortCols = numDistinctCols = 0;
+ pertrans->aggsortrequired = false;
+ }
+ else if (aggref->aggpresorted && aggref->aggdistinct == NIL)
+ {
+ sortlist = NIL;
+ numSortCols = numDistinctCols = 0;
+ pertrans->aggsortrequired = false;
}
else if (aggref->aggdistinct)
{
sortlist = aggref->aggdistinct;
numSortCols = numDistinctCols = list_length(sortlist);
Assert(numSortCols >= list_length(aggref->aggorder));
+ pertrans->aggsortrequired = !aggref->aggpresorted;
}
else
{
sortlist = aggref->aggorder;
numSortCols = list_length(sortlist);
numDistinctCols = 0;
+ pertrans->aggsortrequired = (numSortCols > 0);
}
pertrans->numSortCols = numSortCols;
diff --git a/src/backend/jit/llvm/llvmjit_expr.c b/src/backend/jit/llvm/llvmjit_expr.c
index b6b6512ef1..bd3965143d 100644
--- a/src/backend/jit/llvm/llvmjit_expr.c
+++ b/src/backend/jit/llvm/llvmjit_expr.c
@@ -2335,6 +2335,54 @@ llvm_compile_expr(ExprState *state)
break;
}
+ case EEOP_AGG_PRESORTED_DISTINCT_SINGLE:
+ {
+ AggState *aggstate = castNode(AggState, state->parent);
+ AggStatePerTrans pertrans = op->d.agg_presorted_distinctcheck.pertrans;
+ int jumpdistinct = op->d.agg_presorted_distinctcheck.jumpdistinct;
+
+ LLVMValueRef v_fn = llvm_pg_func(mod, "ExecEvalPreOrderedDistinctSingle");
+ LLVMValueRef v_args[2];
+ LLVMValueRef v_ret;
+
+ v_args[0] = l_ptr_const(aggstate, l_ptr(StructAggState));
+ v_args[1] = l_ptr_const(pertrans, l_ptr(StructAggStatePerTransData));
+
+ v_ret = LLVMBuildCall(b, v_fn, v_args, 2, "");
+ v_ret = LLVMBuildZExt(b, v_ret, TypeStorageBool, "");
+
+ LLVMBuildCondBr(b,
+ LLVMBuildICmp(b, LLVMIntEQ, v_ret,
+ l_sbool_const(1), ""),
+ opblocks[opno + 1],
+ opblocks[jumpdistinct]);
+ break;
+ }
+
+ case EEOP_AGG_PRESORTED_DISTINCT_MULTI:
+ {
+ AggState *aggstate = castNode(AggState, state->parent);
+ AggStatePerTrans pertrans = op->d.agg_presorted_distinctcheck.pertrans;
+ int jumpdistinct = op->d.agg_presorted_distinctcheck.jumpdistinct;
+
+ LLVMValueRef v_fn = llvm_pg_func(mod, "ExecEvalPreOrderedDistinctMulti");
+ LLVMValueRef v_args[2];
+ LLVMValueRef v_ret;
+
+ v_args[0] = l_ptr_const(aggstate, l_ptr(StructAggState));
+ v_args[1] = l_ptr_const(pertrans, l_ptr(StructAggStatePerTransData));
+
+ v_ret = LLVMBuildCall(b, v_fn, v_args, 2, "");
+ v_ret = LLVMBuildZExt(b, v_ret, TypeStorageBool, "");
+
+ LLVMBuildCondBr(b,
+ LLVMBuildICmp(b, LLVMIntEQ, v_ret,
+ l_sbool_const(1), ""),
+ opblocks[opno + 1],
+ opblocks[jumpdistinct]);
+ break;
+ }
+
case EEOP_AGG_ORDERED_TRANS_DATUM:
build_EvalXFunc(b, mod, "ExecEvalAggOrderedTransDatum",
v_state, op, v_econtext);
diff --git a/src/backend/jit/llvm/llvmjit_types.c b/src/backend/jit/llvm/llvmjit_types.c
index b2bda86889..373471ad27 100644
--- a/src/backend/jit/llvm/llvmjit_types.c
+++ b/src/backend/jit/llvm/llvmjit_types.c
@@ -103,6 +103,8 @@ void *referenced_functions[] =
{
ExecAggInitGroup,
ExecAggTransReparent,
+ ExecEvalPreOrderedDistinctSingle,
+ ExecEvalPreOrderedDistinctMulti,
ExecEvalAggOrderedTransDatum,
ExecEvalAggOrderedTransTuple,
ExecEvalArrayCoerce,
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 2a1c923b4a..1fa7fc99b5 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -104,6 +104,28 @@ make_canonical_pathkey(PlannerInfo *root,
}
/*
+ * append_pathkeys
+ * Append all non-redundant PathKeys in 'source' onto 'target' and
+ * returns the updated 'target' list.
+ */
+List *
+append_pathkeys(List *target, List *source)
+{
+ ListCell *lc;
+
+ Assert(target != NIL);
+
+ foreach(lc, source)
+ {
+ PathKey *pk = lfirst_node(PathKey, lc);
+
+ if (!pathkey_is_redundant(pk, target))
+ target = lappend(target, pk);
+ }
+ return target;
+}
+
+/*
* pathkey_is_redundant
* Is a pathkey redundant with one already in the given list?
*
@@ -746,7 +768,8 @@ get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
List *
get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
List *path_pathkeys,
- List *group_pathkeys, List *group_clauses)
+ List *group_pathkeys, List *group_clauses,
+ List *aggregate_pathkeys)
{
Query *parse = root->parse;
List *infos = NIL;
@@ -758,7 +781,10 @@ get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
/* always return at least the original pathkeys/clauses */
info = makeNode(PathKeyInfo);
- info->pathkeys = pathkeys;
+ if (aggregate_pathkeys != NIL)
+ info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys);
+ else
+ info->pathkeys = pathkeys;
info->clauses = clauses;
infos = lappend(infos, info);
@@ -783,7 +809,10 @@ get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
n_preordered))
{
info = makeNode(PathKeyInfo);
- info->pathkeys = pathkeys;
+ if (aggregate_pathkeys != NIL)
+ info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys);
+ else
+ info->pathkeys = pathkeys;
info->clauses = clauses;
infos = lappend(infos, info);
@@ -822,7 +851,10 @@ get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
* still want to keep the keys reordered to path_pathkeys.
*/
info = makeNode(PathKeyInfo);
- info->pathkeys = pathkeys;
+ if (aggregate_pathkeys != NIL)
+ info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys);
+ else
+ info->pathkeys = pathkeys;
info->clauses = clauses;
infos = lappend(infos, info);
@@ -850,7 +882,10 @@ get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
/* keep the group keys reordered to match ordering of input path */
info = makeNode(PathKeyInfo);
- info->pathkeys = pathkeys;
+ if (aggregate_pathkeys != NIL)
+ info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys);
+ else
+ info->pathkeys = pathkeys;
info->clauses = clauses;
infos = lappend(infos, info);
diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c
index e0e357960f..ab3f7abba1 100644
--- a/src/backend/optimizer/plan/planagg.c
+++ b/src/backend/optimizer/plan/planagg.c
@@ -245,7 +245,7 @@ can_minmax_aggs(PlannerInfo *root, List **context)
foreach(lc, root->agginfos)
{
AggInfo *agginfo = lfirst_node(AggInfo, lc);
- Aggref *aggref = agginfo->representative_aggref;
+ Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs);
Oid aggsortop;
TargetEntry *curTarget;
MinMaxAggInfo *mminfo;
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 06ad856eac..64632db73c 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -24,6 +24,7 @@
#include "access/sysattr.h"
#include "access/table.h"
#include "access/xact.h"
+#include "catalog/pg_aggregate.h"
#include "catalog/pg_constraint.h"
#include "catalog/pg_inherits.h"
#include "catalog/pg_proc.h"
@@ -3126,6 +3127,217 @@ reorder_grouping_sets(List *groupingsets, List *sortclause)
}
/*
+ * make_pathkeys_for_groupagg
+ * Determine the pathkeys for the GROUP BY clause and/or any ordered
+ * aggregates. We expect at least one of these here.
+ *
+ * Building the pathkeys for the GROUP BY is simple. Most of the complexity
+ * involved here comes from calculating the best pathkeys for ordered
+ * aggregates. We define "best" as the pathkeys that suit the most number of
+ * aggregate functions. We find these by looking at the first ORDER BY /
+ * DISTINCT aggregate and take the pathkeys for that before searching for
+ * other aggregates that require the same or a more strict variation of the
+ * same pathkeys. We then repeat that process for any remaining aggregates
+ * with different pathkeys and if we find another set of pathkeys that suits a
+ * larger number of aggregates then we return those pathkeys instead.
+ *
+ * *number_groupby_pathkeys gets set to the number of elements in the returned
+ * list that belong to the GROUP BY clause. Any elements above this number
+ * must belong to ORDER BY / DISTINCT aggregates.
+ *
+ * When the best pathkeys are found we also mark each Aggref that can use
+ * those pathkeys as aggpresorted = true.
+ */
+static List *
+make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist,
+ int *number_groupby_pathkeys)
+{
+ List *grouppathkeys = NIL;
+ List *bestpathkeys;
+ Bitmapset *bestaggs;
+ Bitmapset *unprocessed_aggs;
+ ListCell *lc;
+ int i;
+
+ Assert(groupClause != NIL || root->numOrderedAggs > 0);
+
+ if (groupClause != NIL)
+ {
+ /* no pathkeys possible if there's an unsortable GROUP BY */
+ if (!grouping_is_sortable(groupClause))
+ {
+ *number_groupby_pathkeys = 0;
+ return NIL;
+ }
+
+ grouppathkeys = make_pathkeys_for_sortclauses(root, groupClause,
+ tlist);
+ *number_groupby_pathkeys = list_length(grouppathkeys);
+ }
+ else
+ *number_groupby_pathkeys = 0;
+
+ /*
+ * We can't add pathkeys for ordered aggregates if there are any grouping
+ * sets. All handling specific to ordered aggregates must be done by the
+ * executor in that case.
+ */
+ if (root->numOrderedAggs == 0 || root->parse->groupingSets != NIL)
+ return grouppathkeys;
+
+ /*
+ * Make a first pass over all AggInfos to collect a Bitmapset containing
+ * the indexes of all AggInfos to be processed below.
+ */
+ unprocessed_aggs = NULL;
+ foreach(lc, root->agginfos)
+ {
+ AggInfo *agginfo = lfirst_node(AggInfo, lc);
+ Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs);
+
+ if (AGGKIND_IS_ORDERED_SET(aggref->aggkind))
+ continue;
+
+ /* only add aggregates with a DISTINCT or ORDER BY */
+ if (aggref->aggdistinct != NIL || aggref->aggorder != NIL)
+ unprocessed_aggs = bms_add_member(unprocessed_aggs,
+ foreach_current_index(lc));
+ }
+
+ /*
+ * Now process all the unprocessed_aggs to find the best set of pathkeys
+ * for the given set of aggregates.
+ *
+ * On the first outer loop here 'bestaggs' will be empty. We'll populate
+ * this during the first loop using the pathkeys for the very first
+ * AggInfo then taking any stronger pathkeys from any other AggInfos with
+ * a more strict set of compatible pathkeys. Once the outer loop is
+ * complete, we mark off all the aggregates with compatible pathkeys then
+ * remove those from the unprocessed_aggs and repeat the process to try to
+ * find another set of pathkeys that are suitable for a larger number of
+ * aggregates. The outer loop will stop when there are not enough
+ * unprocessed aggregates for it to be possible to find a set of pathkeys
+ * to suit a larger number of aggregates.
+ */
+ bestpathkeys = NIL;
+ bestaggs = NULL;
+ while (bms_num_members(unprocessed_aggs) > bms_num_members(bestaggs))
+ {
+ Bitmapset *aggindexes = NULL;
+ List *currpathkeys = NIL;
+
+ i = -1;
+ while ((i = bms_next_member(unprocessed_aggs, i)) >= 0)
+ {
+ AggInfo *agginfo = list_nth_node(AggInfo, root->agginfos, i);
+ Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs);
+ List *sortlist;
+
+ if (aggref->aggdistinct != NIL)
+ sortlist = aggref->aggdistinct;
+ else
+ sortlist = aggref->aggorder;
+
+ /*
+ * When not set yet, take the pathkeys from the first unprocessed
+ * aggregate.
+ */
+ if (currpathkeys == NIL)
+ {
+ currpathkeys = make_pathkeys_for_sortclauses(root, sortlist,
+ aggref->args);
+
+ /* include the GROUP BY pathkeys, if they exist */
+ if (grouppathkeys != NIL)
+ currpathkeys = append_pathkeys(list_copy(grouppathkeys),
+ currpathkeys);
+
+ /* record that we found pathkeys for this aggregate */
+ aggindexes = bms_add_member(aggindexes, i);
+ }
+ else
+ {
+ List *pathkeys;
+
+ /* now look for a stronger set of matching pathkeys */
+ pathkeys = make_pathkeys_for_sortclauses(root, sortlist,
+ aggref->args);
+
+ /* include the GROUP BY pathkeys, if they exist */
+ if (grouppathkeys != NIL)
+ pathkeys = append_pathkeys(list_copy(grouppathkeys),
+ pathkeys);
+
+ /* are 'pathkeys' compatible or better than 'currpathkeys'? */
+ switch (compare_pathkeys(currpathkeys, pathkeys))
+ {
+ case PATHKEYS_BETTER2:
+ /* 'pathkeys' are stronger, use these ones instead */
+ currpathkeys = pathkeys;
+ /* FALLTHROUGH */
+
+ case PATHKEYS_BETTER1:
+ /* 'pathkeys' are less strict */
+ /* FALLTHROUGH */
+
+ case PATHKEYS_EQUAL:
+ /* mark this aggregate as covered by 'currpathkeys' */
+ aggindexes = bms_add_member(aggindexes, i);
+ break;
+
+ case PATHKEYS_DIFFERENT:
+ break;
+ }
+ }
+ }
+
+ /* remove the aggregates that we've just processed */
+ unprocessed_aggs = bms_del_members(unprocessed_aggs, aggindexes);
+
+ /*
+ * If this pass included more aggregates than the previous best then
+ * use these ones as the best set.
+ */
+ if (bms_num_members(aggindexes) > bms_num_members(bestaggs))
+ {
+ bestaggs = aggindexes;
+ bestpathkeys = currpathkeys;
+ }
+ }
+
+ /*
+ * Now that we've found the best set of aggregates we can set the
+ * presorted flag to indicate to the executor that it needn't bother
+ * performing a sort for these Aggrefs. We're able to do this now as
+ * there's no chance of a Hash Aggregate plan as create_grouping_paths
+ * will not mark the GROUP BY as GROUPING_CAN_USE_HASH due to the presence
+ * of ordered aggregates.
+ */
+ i = -1;
+ while ((i = bms_next_member(bestaggs, i)) >= 0)
+ {
+ AggInfo *agginfo = list_nth_node(AggInfo, root->agginfos, i);
+
+ foreach(lc, agginfo->aggrefs)
+ {
+ Aggref *aggref = lfirst_node(Aggref, lc);
+
+ aggref->aggpresorted = true;
+ }
+ }
+
+ /*
+ * bestpathkeys includes the GROUP BY pathkeys, so if we found any ordered
+ * aggregates, then return bestpathkeys, otherwise return the
+ * grouppathkeys.
+ */
+ if (bestpathkeys != NIL)
+ return bestpathkeys;
+
+ return grouppathkeys;
+}
+
+/*
* Compute query_pathkeys and other pathkeys during plan generation
*/
static void
@@ -3137,18 +3349,19 @@ standard_qp_callback(PlannerInfo *root, void *extra)
List *activeWindows = qp_extra->activeWindows;
/*
- * Calculate pathkeys that represent grouping/ordering requirements. The
- * sortClause is certainly sort-able, but GROUP BY and DISTINCT might not
- * be, in which case we just leave their pathkeys empty.
+ * Calculate pathkeys that represent grouping/ordering and/or ordered
+ * aggregate requirements.
*/
- if (qp_extra->groupClause &&
- grouping_is_sortable(qp_extra->groupClause))
- root->group_pathkeys =
- make_pathkeys_for_sortclauses(root,
- qp_extra->groupClause,
- tlist);
+ if (qp_extra->groupClause != NIL || root->numOrderedAggs > 0)
+ root->group_pathkeys = make_pathkeys_for_groupagg(root,
+ qp_extra->groupClause,
+ tlist,
+ &root->num_groupby_pathkeys);
else
+ {
root->group_pathkeys = NIL;
+ root->num_groupby_pathkeys = 0;
+ }
/* We consider only the first (bottom) window in pathkeys logic */
if (activeWindows != NIL)
@@ -6222,6 +6435,26 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
if (can_sort)
{
+ List *group_pathkeys;
+ List *orderAggPathkeys;
+ int numAggPathkeys;
+
+ numAggPathkeys = list_length(root->group_pathkeys) -
+ root->num_groupby_pathkeys;
+
+ if (numAggPathkeys > 0)
+ {
+ group_pathkeys = list_copy_head(root->group_pathkeys,
+ root->num_groupby_pathkeys);
+ orderAggPathkeys = list_copy_tail(root->group_pathkeys,
+ root->num_groupby_pathkeys);
+ }
+ else
+ {
+ group_pathkeys = root->group_pathkeys;
+ orderAggPathkeys = NIL;
+ }
+
/*
* Use any available suitably-sorted path as input, and also consider
* sorting the cheapest-total path.
@@ -6234,7 +6467,6 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
List *pathkey_orderings = NIL;
- List *group_pathkeys = root->group_pathkeys;
List *group_clauses = parse->groupClause;
/* generate alternative group orderings that might be useful */
@@ -6242,7 +6474,8 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
path->rows,
path->pathkeys,
group_pathkeys,
- group_clauses);
+ group_clauses,
+ orderAggPathkeys);
Assert(list_length(pathkey_orderings) > 0);
@@ -6414,7 +6647,8 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
path->rows,
path->pathkeys,
group_pathkeys,
- group_clauses);
+ group_clauses,
+ orderAggPathkeys);
Assert(list_length(pathkey_orderings) > 0);
@@ -6717,6 +6951,26 @@ create_partial_grouping_paths(PlannerInfo *root,
if (can_sort && cheapest_total_path != NULL)
{
+ List *group_pathkeys;
+ List *orderAggPathkeys;
+ int numAggPathkeys;
+
+ numAggPathkeys = list_length(root->group_pathkeys) -
+ root->num_groupby_pathkeys;
+
+ if (numAggPathkeys > 0)
+ {
+ group_pathkeys = list_copy_head(root->group_pathkeys,
+ root->num_groupby_pathkeys);
+ orderAggPathkeys = list_copy_tail(root->group_pathkeys,
+ root->num_groupby_pathkeys);
+ }
+ else
+ {
+ group_pathkeys = root->group_pathkeys;
+ orderAggPathkeys = NIL;
+ }
+
/* This should have been checked previously */
Assert(parse->hasAggs || parse->groupClause);
@@ -6729,10 +6983,7 @@ create_partial_grouping_paths(PlannerInfo *root,
ListCell *lc2;
Path *path = (Path *) lfirst(lc);
Path *path_save = path;
-
List *pathkey_orderings = NIL;
-
- List *group_pathkeys = root->group_pathkeys;
List *group_clauses = parse->groupClause;
/* generate alternative group orderings that might be useful */
@@ -6740,7 +6991,8 @@ create_partial_grouping_paths(PlannerInfo *root,
path->rows,
path->pathkeys,
group_pathkeys,
- group_clauses);
+ group_clauses,
+ orderAggPathkeys);
Assert(list_length(pathkey_orderings) > 0);
@@ -6856,16 +7108,33 @@ create_partial_grouping_paths(PlannerInfo *root,
if (can_sort && cheapest_partial_path != NULL)
{
+ List *group_pathkeys;
+ List *orderAggPathkeys;
+ int numAggPathkeys;
+
+ numAggPathkeys = list_length(root->group_pathkeys) -
+ root->num_groupby_pathkeys;
+
+ if (numAggPathkeys > 0)
+ {
+ group_pathkeys = list_copy_head(root->group_pathkeys,
+ root->num_groupby_pathkeys);
+ orderAggPathkeys = list_copy_tail(root->group_pathkeys,
+ root->num_groupby_pathkeys);
+ }
+ else
+ {
+ group_pathkeys = root->group_pathkeys;
+ orderAggPathkeys = NIL;
+ }
+
/* Similar to above logic, but for partial paths. */
foreach(lc, input_rel->partial_pathlist)
{
ListCell *lc2;
Path *path = (Path *) lfirst(lc);
Path *path_original = path;
-
List *pathkey_orderings = NIL;
-
- List *group_pathkeys = root->group_pathkeys;
List *group_clauses = parse->groupClause;
/* generate alternative group orderings that might be useful */
@@ -6873,7 +7142,8 @@ create_partial_grouping_paths(PlannerInfo *root,
path->rows,
path->pathkeys,
group_pathkeys,
- group_clauses);
+ group_clauses,
+ orderAggPathkeys);
Assert(list_length(pathkey_orderings) > 0);
diff --git a/src/backend/optimizer/prep/prepagg.c b/src/backend/optimizer/prep/prepagg.c
index 5b12937ead..da89b55402 100644
--- a/src/backend/optimizer/prep/prepagg.c
+++ b/src/backend/optimizer/prep/prepagg.c
@@ -225,6 +225,7 @@ preprocess_aggref(Aggref *aggref, PlannerInfo *root)
{
AggInfo *agginfo = list_nth_node(AggInfo, root->agginfos, aggno);
+ agginfo->aggrefs = lappend(agginfo->aggrefs, aggref);
transno = agginfo->transno;
}
else
@@ -232,7 +233,7 @@ preprocess_aggref(Aggref *aggref, PlannerInfo *root)
AggInfo *agginfo = makeNode(AggInfo);
agginfo->finalfn_oid = aggfinalfn;
- agginfo->representative_aggref = aggref;
+ agginfo->aggrefs = list_make1(aggref);
agginfo->shareable = shareable;
aggno = list_length(root->agginfos);
@@ -386,7 +387,7 @@ find_compatible_agg(PlannerInfo *root, Aggref *newagg,
aggno++;
- existingRef = agginfo->representative_aggref;
+ existingRef = linitial_node(Aggref, agginfo->aggrefs);
/* all of the following must be the same or it's no match */
if (newagg->inputcollid != existingRef->inputcollid ||
@@ -648,7 +649,7 @@ get_agg_clause_costs(PlannerInfo *root, AggSplit aggsplit, AggClauseCosts *costs
foreach(lc, root->agginfos)
{
AggInfo *agginfo = lfirst_node(AggInfo, lc);
- Aggref *aggref = agginfo->representative_aggref;
+ Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs);
/*
* Add the appropriate component function execution costs to
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 059cb7097c..fabb5f7207 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -3816,6 +3816,7 @@ transformJsonAggConstructor(ParseState *pstate, JsonAggConstructor *agg_ctor,
aggref->aggstar = false;
aggref->aggvariadic = false;
aggref->aggkind = AGGKIND_NORMAL;
+ aggref->aggpresorted = false;
/* agglevelsup will be set by transformAggregateCall */
aggref->aggsplit = AGGSPLIT_SIMPLE; /* planner might change this */
aggref->location = agg_ctor->location;
diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c
index f71a682cd6..827989f379 100644
--- a/src/backend/parser/parse_func.c
+++ b/src/backend/parser/parse_func.c
@@ -774,6 +774,7 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs,
aggref->aggstar = agg_star;
aggref->aggvariadic = func_variadic;
aggref->aggkind = aggkind;
+ aggref->aggpresorted = false;
/* agglevelsup will be set by transformAggregateCall */
aggref->aggsplit = AGGSPLIT_SIMPLE; /* planner might change this */
aggref->aggno = -1; /* planner will set aggno and aggtransno */
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 9c254dafa7..e51026cb55 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -57,6 +57,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 202207291
+#define CATALOG_VERSION_NO 202208021
#endif
diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h
index 1e3f1bbee8..0739b389f3 100644
--- a/src/include/executor/execExpr.h
+++ b/src/include/executor/execExpr.h
@@ -258,6 +258,8 @@ typedef enum ExprEvalOp
EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYREF,
EEOP_AGG_PLAIN_TRANS_STRICT_BYREF,
EEOP_AGG_PLAIN_TRANS_BYREF,
+ EEOP_AGG_PRESORTED_DISTINCT_SINGLE,
+ EEOP_AGG_PRESORTED_DISTINCT_MULTI,
EEOP_AGG_ORDERED_TRANS_DATUM,
EEOP_AGG_ORDERED_TRANS_TUPLE,
@@ -659,6 +661,17 @@ typedef struct ExprEvalStep
int jumpnull;
} agg_plain_pergroup_nullcheck;
+ /* for EEOP_AGG_PRESORTED_DISTINCT_{SINGLE,MULTI} */
+ struct
+ {
+ AggStatePerTrans pertrans;
+ ExprContext *aggcontext;
+ int setno;
+ int transno;
+ int setoff;
+ int jumpdistinct;
+ } agg_presorted_distinctcheck;
+
/* for EEOP_AGG_PLAIN_TRANS_[INIT_][STRICT_]{BYVAL,BYREF} */
/* for EEOP_AGG_ORDERED_TRANS_{DATUM,TUPLE} */
struct
@@ -868,6 +881,10 @@ extern void ExecAggInitGroup(AggState *aggstate, AggStatePerTrans pertrans, AggS
extern Datum ExecAggTransReparent(AggState *aggstate, AggStatePerTrans pertrans,
Datum newValue, bool newValueIsNull,
Datum oldValue, bool oldValueIsNull);
+extern bool ExecEvalPreOrderedDistinctSingle(AggState *aggstate,
+ AggStatePerTrans pertrans);
+extern bool ExecEvalPreOrderedDistinctMulti(AggState *aggstate,
+ AggStatePerTrans pertrans);
extern void ExecEvalAggOrderedTransDatum(ExprState *state, ExprEvalStep *op,
ExprContext *econtext);
extern void ExecEvalAggOrderedTransTuple(ExprState *state, ExprEvalStep *op,
diff --git a/src/include/executor/nodeAgg.h b/src/include/executor/nodeAgg.h
index 4d1bd92999..1229c9d1ab 100644
--- a/src/include/executor/nodeAgg.h
+++ b/src/include/executor/nodeAgg.h
@@ -49,6 +49,11 @@ typedef struct AggStatePerTransData
bool aggshared;
/*
+ * True for ORDER BY and DISTINCT Aggrefs that are not aggpresorted.
+ */
+ bool aggsortrequired;
+
+ /*
* Number of aggregated input columns. This includes ORDER BY expressions
* in both the plain-agg and ordered-set cases. Ordered-set direct args
* are not counted, though.
@@ -136,6 +141,9 @@ typedef struct AggStatePerTransData
TupleTableSlot *sortslot; /* current input tuple */
TupleTableSlot *uniqslot; /* used for multi-column DISTINCT */
TupleDesc sortdesc; /* descriptor of input tuples */
+ Datum lastdatum; /* used for single-column DISTINCT */
+ bool lastisnull; /* used for single-column DISTINCT */
+ bool haslast; /* got a last value for DISTINCT check */
/*
* These values are working state that is initialized at the start of an
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index e2081db4ed..3b065139e6 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -365,6 +365,14 @@ struct PlannerInfo
/* groupClause pathkeys, if any */
List *group_pathkeys;
+
+ /*
+ * The number of elements in the group_pathkeys list which belong to the
+ * GROUP BY clause. Additional ones belong to ORDER BY / DISTINCT
+ * aggregates.
+ */
+ int num_groupby_pathkeys;
+
/* pathkeys of bottom window, if any */
List *window_pathkeys;
/* distinctClause pathkeys, if any */
@@ -3134,12 +3142,12 @@ typedef struct AggInfo
NodeTag type;
/*
- * Link to an Aggref expr this state value is for.
+ * List of Aggref exprs that this state value is for.
*
- * There can be multiple identical Aggref's sharing the same per-agg. This
- * points to the first one of them.
+ * There will always be at least one, but there can be multiple identical
+ * Aggref's sharing the same per-agg.
*/
- Aggref *representative_aggref;
+ List *aggrefs;
/* Transition state number for this aggregate */
int transno;
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 1fc2fbffa3..3aa96bb685 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -357,6 +357,10 @@ typedef struct Param
* replaced with a single argument representing the partial-aggregate
* transition values.
*
+ * aggpresorted is set by the query planner for ORDER BY and DISTINCT
+ * aggregates where the chosen plan provides presorted input for this
+ * aggregate during execution.
+ *
* aggsplit indicates the expected partial-aggregation mode for the Aggref's
* parent plan node. It's always set to AGGSPLIT_SIMPLE in the parser, but
* the planner might change it to something else. We use this mainly as
@@ -422,6 +426,9 @@ typedef struct Aggref
/* aggregate kind (see pg_aggregate.h) */
char aggkind;
+ /* aggregate input already sorted */
+ bool aggpresorted pg_node_attr(equal_ignore);
+
/* > 0 if agg belongs to outer query */
Index agglevelsup;
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 54ab709c67..d11cdac7f8 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -208,7 +208,8 @@ extern int group_keys_reorder_by_pathkeys(List *pathkeys,
List **group_clauses);
extern List *get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
List *path_pathkeys,
- List *group_pathkeys, List *group_clauses);
+ List *group_pathkeys, List *group_clauses,
+ List *aggregate_pathkeys);
extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
Relids required_outer,
CostSelector cost_criterion,
@@ -255,6 +256,7 @@ extern List *truncate_useless_pathkeys(PlannerInfo *root,
RelOptInfo *rel,
List *pathkeys);
extern bool has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel);
+extern List *append_pathkeys(List *target, List *source);
extern PathKey *make_canonical_pathkey(PlannerInfo *root,
EquivalenceClass *eclass, Oid opfamily,
int strategy, bool nulls_first);
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 601047fa3d..b2198724e3 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1393,6 +1393,84 @@ LINE 1: select t1.f1 from t1 left join t2 using (f1) group by f1;
^
drop table t1, t2;
--
+-- Test planner's selection of pathkeys for ORDER BY aggregates
+--
+-- Ensure we order by four. This suits the most aggregate functions.
+explain (costs off)
+select sum(two order by two),max(four order by four), min(four order by four)
+from tenk1;
+ QUERY PLAN
+-------------------------------
+ Aggregate
+ -> Sort
+ Sort Key: four
+ -> Seq Scan on tenk1
+(4 rows)
+
+-- Ensure we order by two. It's a tie between ordering by two and four but
+-- we tiebreak on the aggregate's position.
+explain (costs off)
+select
+ sum(two order by two), max(four order by four),
+ min(four order by four), max(two order by two)
+from tenk1;
+ QUERY PLAN
+-------------------------------
+ Aggregate
+ -> Sort
+ Sort Key: two
+ -> Seq Scan on tenk1
+(4 rows)
+
+-- Similar to above, but tiebreak on ordering by four
+explain (costs off)
+select
+ max(four order by four), sum(two order by two),
+ min(four order by four), max(two order by two)
+from tenk1;
+ QUERY PLAN
+-------------------------------
+ Aggregate
+ -> Sort
+ Sort Key: four
+ -> Seq Scan on tenk1
+(4 rows)
+
+-- Ensure this one orders by ten since there are 3 aggregates that require ten
+-- vs two that suit two and four.
+explain (costs off)
+select
+ max(four order by four), sum(two order by two),
+ min(four order by four), max(two order by two),
+ sum(ten order by ten), min(ten order by ten), max(ten order by ten)
+from tenk1;
+ QUERY PLAN
+-------------------------------
+ Aggregate
+ -> Sort
+ Sort Key: ten
+ -> Seq Scan on tenk1
+(4 rows)
+
+-- Try a case involving a GROUP BY clause where the GROUP BY column is also
+-- part of an aggregate's ORDER BY clause. We want a sort order that works
+-- for the GROUP BY along with the first and the last aggregate.
+explain (costs off)
+select
+ sum(unique1 order by ten, two), sum(unique1 order by four),
+ sum(unique1 order by two, four)
+from tenk1
+group by ten;
+ QUERY PLAN
+----------------------------------
+ GroupAggregate
+ Group Key: ten
+ -> Sort
+ Sort Key: ten, two, four
+ -> Seq Scan on tenk1
+(5 rows)
+
+--
-- Test combinations of DISTINCT and/or ORDER BY
--
select array_agg(a order by b)
@@ -2263,9 +2341,9 @@ NOTICE: avg_transfn called with 3
-- shouldn't share states due to the distinctness not matching.
select my_avg(distinct one),my_sum(one) from (values(1),(3)) t(one);
NOTICE: avg_transfn called with 1
-NOTICE: avg_transfn called with 3
NOTICE: avg_transfn called with 1
NOTICE: avg_transfn called with 3
+NOTICE: avg_transfn called with 3
my_avg | my_sum
--------+--------
2 | 4
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out
index a08a3825ff..4b41ccf1aa 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -367,17 +367,17 @@ SELECT c, sum(b order by a) FROM pagg_tab GROUP BY c ORDER BY 1, 2;
-> GroupAggregate
Group Key: pagg_tab.c
-> Sort
- Sort Key: pagg_tab.c
+ Sort Key: pagg_tab.c, pagg_tab.a
-> Seq Scan on pagg_tab_p1 pagg_tab
-> GroupAggregate
Group Key: pagg_tab_1.c
-> Sort
- Sort Key: pagg_tab_1.c
+ Sort Key: pagg_tab_1.c, pagg_tab_1.a
-> Seq Scan on pagg_tab_p2 pagg_tab_1
-> GroupAggregate
Group Key: pagg_tab_2.c
-> Sort
- Sort Key: pagg_tab_2.c
+ Sort Key: pagg_tab_2.c, pagg_tab_2.a
-> Seq Scan on pagg_tab_p3 pagg_tab_2
(18 rows)
@@ -948,34 +948,36 @@ SET max_parallel_workers_per_gather TO 2;
-- is not partial agg safe.
EXPLAIN (COSTS OFF)
SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3;
- QUERY PLAN
---------------------------------------------------------------------------------------
- Sort
- Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
- -> Append
- -> GroupAggregate
- Group Key: pagg_tab_ml.a
- Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
- -> Sort
- Sort Key: pagg_tab_ml.a
- -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
- -> GroupAggregate
- Group Key: pagg_tab_ml_2.a
- Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
- -> Sort
- Sort Key: pagg_tab_ml_2.a
- -> Append
- -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
- -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
- -> GroupAggregate
- Group Key: pagg_tab_ml_5.a
- Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
- -> Sort
- Sort Key: pagg_tab_ml_5.a
- -> Append
- -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
- -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-(25 rows)
+ QUERY PLAN
+--------------------------------------------------------------------------------------------
+ Gather Merge
+ Workers Planned: 2
+ -> Sort
+ Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
+ -> Parallel Append
+ -> GroupAggregate
+ Group Key: pagg_tab_ml.a
+ Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+ -> Sort
+ Sort Key: pagg_tab_ml.a, pagg_tab_ml.c
+ -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+ -> GroupAggregate
+ Group Key: pagg_tab_ml_5.a
+ Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+ -> Sort
+ Sort Key: pagg_tab_ml_5.a, pagg_tab_ml_5.c
+ -> Append
+ -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+ -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+ -> GroupAggregate
+ Group Key: pagg_tab_ml_2.a
+ Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+ -> Sort
+ Sort Key: pagg_tab_ml_2.a, pagg_tab_ml_2.c
+ -> Append
+ -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+ -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+(27 rows)
SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3;
a | sum | array_agg | count
@@ -994,32 +996,34 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA
-- Without ORDER BY clause, to test Gather at top-most path
EXPLAIN (COSTS OFF)
SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3;
- QUERY PLAN
----------------------------------------------------------------------
- Append
- -> GroupAggregate
- Group Key: pagg_tab_ml.a
- Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
- -> Sort
- Sort Key: pagg_tab_ml.a
- -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
- -> GroupAggregate
- Group Key: pagg_tab_ml_2.a
- Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
- -> Sort
- Sort Key: pagg_tab_ml_2.a
- -> Append
- -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
- -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
- -> GroupAggregate
- Group Key: pagg_tab_ml_5.a
- Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
- -> Sort
- Sort Key: pagg_tab_ml_5.a
- -> Append
- -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
- -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-(23 rows)
+ QUERY PLAN
+---------------------------------------------------------------------------
+ Gather
+ Workers Planned: 2
+ -> Parallel Append
+ -> GroupAggregate
+ Group Key: pagg_tab_ml.a
+ Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+ -> Sort
+ Sort Key: pagg_tab_ml.a, pagg_tab_ml.c
+ -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+ -> GroupAggregate
+ Group Key: pagg_tab_ml_5.a
+ Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+ -> Sort
+ Sort Key: pagg_tab_ml_5.a, pagg_tab_ml_5.c
+ -> Append
+ -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+ -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+ -> GroupAggregate
+ Group Key: pagg_tab_ml_2.a
+ Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+ -> Sort
+ Sort Key: pagg_tab_ml_2.a, pagg_tab_ml_2.c
+ -> Append
+ -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+ -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+(25 rows)
-- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
-- for level 1 only. For subpartitions, GROUP BY clause does not match with
diff --git a/src/test/regress/expected/sqljson.out b/src/test/regress/expected/sqljson.out
index bdd0969a50..748dfdb04d 100644
--- a/src/test/regress/expected/sqljson.out
+++ b/src/test/regress/expected/sqljson.out
@@ -866,14 +866,14 @@ FROM
(VALUES (NULL), (3), (1), (NULL), (NULL), (5), (2), (4), (NULL)) foo(bar);
json_arrayagg | json_arrayagg | json_arrayagg | json_arrayagg | json_arrayagg | json_arrayagg | json_arrayagg | json_arrayagg | json_arrayagg | json_arrayagg
-----------------+-----------------+-----------------+-----------------+-----------------------------------------+-----------------------------------------+-----------------+--------------------------------------------------------------------------------------------------------------------------+---------------+--------------------------------------
- [3, 1, 5, 2, 4] | [3, 1, 5, 2, 4] | [3, 1, 5, 2, 4] | [3, 1, 5, 2, 4] | [null, 3, 1, null, null, 5, 2, 4, null] | [null, 3, 1, null, null, 5, 2, 4, null] | [{"bar":null}, +| [{"bar": null}, {"bar": 3}, {"bar": 1}, {"bar": null}, {"bar": null}, {"bar": 5}, {"bar": 2}, {"bar": 4}, {"bar": null}] | [{"bar":3}, +| [{"bar": 3}, {"bar": 4}, {"bar": 5}]
- | | | | | | {"bar":3}, +| | {"bar":4}, +|
- | | | | | | {"bar":1}, +| | {"bar":5}] |
+ [1, 2, 3, 4, 5] | [1, 2, 3, 4, 5] | [1, 2, 3, 4, 5] | [1, 2, 3, 4, 5] | [1, 2, 3, 4, 5, null, null, null, null] | [1, 2, 3, 4, 5, null, null, null, null] | [{"bar":1}, +| [{"bar": 1}, {"bar": 2}, {"bar": 3}, {"bar": 4}, {"bar": 5}, {"bar": null}, {"bar": null}, {"bar": null}, {"bar": null}] | [{"bar":3}, +| [{"bar": 3}, {"bar": 4}, {"bar": 5}]
+ | | | | | | {"bar":2}, +| | {"bar":4}, +|
+ | | | | | | {"bar":3}, +| | {"bar":5}] |
+ | | | | | | {"bar":4}, +| | |
+ | | | | | | {"bar":5}, +| | |
+ | | | | | | {"bar":null}, +| | |
| | | | | | {"bar":null}, +| | |
| | | | | | {"bar":null}, +| | |
- | | | | | | {"bar":5}, +| | |
- | | | | | | {"bar":2}, +| | |
- | | | | | | {"bar":4}, +| | |
| | | | | | {"bar":null}] | | |
(1 row)
diff --git a/src/test/regress/expected/tuplesort.out b/src/test/regress/expected/tuplesort.out
index 418f296a3f..a2efa179fc 100644
--- a/src/test/regress/expected/tuplesort.out
+++ b/src/test/regress/expected/tuplesort.out
@@ -622,15 +622,18 @@ EXPLAIN (COSTS OFF) :qry;
-> GroupAggregate
Group Key: a.col12
Filter: (count(*) > 1)
- -> Merge Join
- Merge Cond: (a.col12 = b.col12)
- -> Sort
- Sort Key: a.col12 DESC
- -> Seq Scan on test_mark_restore a
- -> Sort
- Sort Key: b.col12 DESC
- -> Seq Scan on test_mark_restore b
-(14 rows)
+ -> Incremental Sort
+ Sort Key: a.col12 DESC, a.col1
+ Presorted Key: a.col12
+ -> Merge Join
+ Merge Cond: (a.col12 = b.col12)
+ -> Sort
+ Sort Key: a.col12 DESC
+ -> Seq Scan on test_mark_restore a
+ -> Sort
+ Sort Key: b.col12 DESC
+ -> Seq Scan on test_mark_restore b
+(17 rows)
:qry;
col12 | count | count | count | count | count
@@ -658,15 +661,18 @@ EXPLAIN (COSTS OFF) :qry;
-> GroupAggregate
Group Key: a.col12
Filter: (count(*) > 1)
- -> Merge Join
- Merge Cond: (a.col12 = b.col12)
- -> Sort
- Sort Key: a.col12 DESC
- -> Seq Scan on test_mark_restore a
- -> Sort
- Sort Key: b.col12 DESC
- -> Seq Scan on test_mark_restore b
-(14 rows)
+ -> Incremental Sort
+ Sort Key: a.col12 DESC, a.col1
+ Presorted Key: a.col12
+ -> Merge Join
+ Merge Cond: (a.col12 = b.col12)
+ -> Sort
+ Sort Key: a.col12 DESC
+ -> Seq Scan on test_mark_restore a
+ -> Sort
+ Sort Key: b.col12 DESC
+ -> Seq Scan on test_mark_restore b
+(17 rows)
:qry;
col12 | count | count | count | count | count
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index c6e0d7ba2b..4540a06f45 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -504,6 +504,49 @@ select t1.f1 from t1 left join t2 using (f1) group by f1;
drop table t1, t2;
--
+-- Test planner's selection of pathkeys for ORDER BY aggregates
+--
+
+-- Ensure we order by four. This suits the most aggregate functions.
+explain (costs off)
+select sum(two order by two),max(four order by four), min(four order by four)
+from tenk1;
+
+-- Ensure we order by two. It's a tie between ordering by two and four but
+-- we tiebreak on the aggregate's position.
+explain (costs off)
+select
+ sum(two order by two), max(four order by four),
+ min(four order by four), max(two order by two)
+from tenk1;
+
+-- Similar to above, but tiebreak on ordering by four
+explain (costs off)
+select
+ max(four order by four), sum(two order by two),
+ min(four order by four), max(two order by two)
+from tenk1;
+
+-- Ensure this one orders by ten since there are 3 aggregates that require ten
+-- vs two that suit two and four.
+explain (costs off)
+select
+ max(four order by four), sum(two order by two),
+ min(four order by four), max(two order by two),
+ sum(ten order by ten), min(ten order by ten), max(ten order by ten)
+from tenk1;
+
+-- Try a case involving a GROUP BY clause where the GROUP BY column is also
+-- part of an aggregate's ORDER BY clause. We want a sort order that works
+-- for the GROUP BY along with the first and the last aggregate.
+explain (costs off)
+select
+ sum(unique1 order by ten, two), sum(unique1 order by four),
+ sum(unique1 order by two, four)
+from tenk1
+group by ten;
+
+--
-- Test combinations of DISTINCT and/or ORDER BY
--