From be45be9c33a85e72cdaeb9967e9f6d2d00199e09 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Thu, 18 Mar 2021 17:45:38 +0100 Subject: Implement GROUP BY DISTINCT With grouping sets, it's possible that some of the grouping sets are duplicate. This is especially common with CUBE and ROLLUP clauses. For example GROUP BY CUBE (a,b), CUBE (b,c) is equivalent to GROUP BY GROUPING SETS ( (a, b, c), (a, b, c), (a, b, c), (a, b), (a, b), (a, b), (a), (a), (a), (c, a), (c, a), (c, a), (c), (b, c), (b), () ) Some of the grouping sets are calculated multiple times, which is mostly unnecessary. This commit implements a new GROUP BY DISTINCT feature, as defined in the SQL standard, which eliminates the duplicate sets. Author: Vik Fearing Reviewed-by: Erik Rijkers, Georgios Kokolatos, Tomas Vondra Discussion: https://postgr.es/m/bf3805a8-d7d1-ae61-fece-761b7ff41ecc@postgresfriends.org --- src/test/regress/expected/groupingsets.out | 111 +++++++++++++++++++++++++++++ src/test/regress/sql/groupingsets.sql | 26 +++++++ 2 files changed, 137 insertions(+) (limited to 'src/test/regress') diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out index 7c844c6e09..4c467c1b15 100644 --- a/src/test/regress/expected/groupingsets.out +++ b/src/test/regress/expected/groupingsets.out @@ -1929,4 +1929,115 @@ set work_mem to default; drop table gs_group_1; drop table gs_hash_1; +-- GROUP BY DISTINCT +-- "normal" behavior... +select a, b, c +from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c) +group by all rollup(a, b), rollup(a, c) +order by a, b, c; + a | b | c +---+---+--- + 1 | 2 | 3 + 1 | 2 | + 1 | 2 | + 1 | | 3 + 1 | | 3 + 1 | | + 1 | | + 1 | | + 4 | | 6 + 4 | | 6 + 4 | | 6 + 4 | | + 4 | | + 4 | | + 4 | | + 4 | | + 7 | 8 | 9 + 7 | 8 | + 7 | 8 | + 7 | | 9 + 7 | | 9 + 7 | | + 7 | | + 7 | | + | | +(25 rows) + +-- ...which is also the default +select a, b, c +from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c) +group by rollup(a, b), rollup(a, c) +order by a, b, c; + a | b | c +---+---+--- + 1 | 2 | 3 + 1 | 2 | + 1 | 2 | + 1 | | 3 + 1 | | 3 + 1 | | + 1 | | + 1 | | + 4 | | 6 + 4 | | 6 + 4 | | 6 + 4 | | + 4 | | + 4 | | + 4 | | + 4 | | + 7 | 8 | 9 + 7 | 8 | + 7 | 8 | + 7 | | 9 + 7 | | 9 + 7 | | + 7 | | + 7 | | + | | +(25 rows) + +-- "group by distinct" behavior... +select a, b, c +from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c) +group by distinct rollup(a, b), rollup(a, c) +order by a, b, c; + a | b | c +---+---+--- + 1 | 2 | 3 + 1 | 2 | + 1 | | 3 + 1 | | + 4 | | 6 + 4 | | 6 + 4 | | + 4 | | + 7 | 8 | 9 + 7 | 8 | + 7 | | 9 + 7 | | + | | +(13 rows) + +-- ...which is not the same as "select distinct" +select distinct a, b, c +from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c) +group by rollup(a, b), rollup(a, c) +order by a, b, c; + a | b | c +---+---+--- + 1 | 2 | 3 + 1 | 2 | + 1 | | 3 + 1 | | + 4 | | 6 + 4 | | + 7 | 8 | 9 + 7 | 8 | + 7 | | 9 + 7 | | + | | +(11 rows) + -- end diff --git a/src/test/regress/sql/groupingsets.sql b/src/test/regress/sql/groupingsets.sql index 18ae803e9d..3944944704 100644 --- a/src/test/regress/sql/groupingsets.sql +++ b/src/test/regress/sql/groupingsets.sql @@ -529,4 +529,30 @@ set work_mem to default; drop table gs_group_1; drop table gs_hash_1; +-- GROUP BY DISTINCT + +-- "normal" behavior... +select a, b, c +from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c) +group by all rollup(a, b), rollup(a, c) +order by a, b, c; + +-- ...which is also the default +select a, b, c +from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c) +group by rollup(a, b), rollup(a, c) +order by a, b, c; + +-- "group by distinct" behavior... +select a, b, c +from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c) +group by distinct rollup(a, b), rollup(a, c) +order by a, b, c; + +-- ...which is not the same as "select distinct" +select distinct a, b, c +from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c) +group by rollup(a, b), rollup(a, c) +order by a, b, c; + -- end -- cgit v1.2.1