summaryrefslogtreecommitdiff
path: root/src/test/regress/expected/stats_ext.out
diff options
context:
space:
mode:
Diffstat (limited to 'src/test/regress/expected/stats_ext.out')
-rw-r--r--src/test/regress/expected/stats_ext.out1340
1 files changed, 1283 insertions, 57 deletions
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 07af1e29e3..60e9bfcd78 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -25,7 +25,7 @@ begin
end;
$$;
-- Verify failures
-CREATE TABLE ext_stats_test (x int, y int, z int);
+CREATE TABLE ext_stats_test (x text, y int, z int);
CREATE STATISTICS tst;
ERROR: syntax error at or near ";"
LINE 1: CREATE STATISTICS tst;
@@ -44,12 +44,25 @@ CREATE STATISTICS tst ON a, b FROM ext_stats_test;
ERROR: column "a" does not exist
CREATE STATISTICS tst ON x, x, y FROM ext_stats_test;
ERROR: duplicate column name in statistics definition
-CREATE STATISTICS tst ON x + y FROM ext_stats_test;
-ERROR: only simple column references are allowed in CREATE STATISTICS
-CREATE STATISTICS tst ON (x, y) FROM ext_stats_test;
-ERROR: only simple column references are allowed in CREATE STATISTICS
+CREATE STATISTICS tst ON x, x, y, x, x, y, x, x, y FROM ext_stats_test;
+ERROR: cannot have more than 8 columns in statistics
+CREATE STATISTICS tst ON x, x, y, x, x, (x || 'x'), (y + 1), (x || 'x'), (x || 'x'), (y + 1) FROM ext_stats_test;
+ERROR: cannot have more than 8 columns in statistics
+CREATE STATISTICS tst ON (x || 'x'), (x || 'x'), (y + 1), (x || 'x'), (x || 'x'), (y + 1), (x || 'x'), (x || 'x'), (y + 1) FROM ext_stats_test;
+ERROR: cannot have more than 8 columns in statistics
+CREATE STATISTICS tst ON (x || 'x'), (x || 'x'), y FROM ext_stats_test;
+ERROR: duplicate expression in statistics definition
CREATE STATISTICS tst (unrecognized) ON x, y FROM ext_stats_test;
ERROR: unrecognized statistics kind "unrecognized"
+-- incorrect expressions
+CREATE STATISTICS tst ON y + z FROM ext_stats_test; -- missing parentheses
+ERROR: syntax error at or near "+"
+LINE 1: CREATE STATISTICS tst ON y + z FROM ext_stats_test;
+ ^
+CREATE STATISTICS tst ON (x, y) FROM ext_stats_test; -- tuple expression
+ERROR: syntax error at or near ","
+LINE 1: CREATE STATISTICS tst ON (x, y) FROM ext_stats_test;
+ ^
DROP TABLE ext_stats_test;
-- Ensure stats are dropped sanely, and test IF NOT EXISTS while at it
CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
@@ -79,7 +92,7 @@ ALTER TABLE ab1 DROP COLUMN a;
b | integer | | |
c | integer | | |
Statistics objects:
- "public"."ab1_b_c_stats" (ndistinct, dependencies, mcv) ON b, c FROM ab1
+ "public"."ab1_b_c_stats" ON b, c FROM ab1
-- Ensure statistics are dropped when table is
SELECT stxname FROM pg_statistic_ext WHERE stxname LIKE 'ab1%';
@@ -111,7 +124,7 @@ ALTER STATISTICS ab1_a_b_stats SET STATISTICS 0;
a | integer | | |
b | integer | | |
Statistics objects:
- "public"."ab1_a_b_stats" (ndistinct, dependencies, mcv) ON a, b FROM ab1; STATISTICS 0
+ "public"."ab1_a_b_stats" ON a, b FROM ab1; STATISTICS 0
ANALYZE ab1;
SELECT stxname, stxdndistinct, stxddependencies, stxdmcv
@@ -131,7 +144,7 @@ ALTER STATISTICS ab1_a_b_stats SET STATISTICS -1;
a | integer | | | | plain | |
b | integer | | | | plain | |
Statistics objects:
- "public"."ab1_a_b_stats" (ndistinct, dependencies, mcv) ON a, b FROM ab1
+ "public"."ab1_a_b_stats" ON a, b FROM ab1
-- partial analyze doesn't build stats either
ANALYZE ab1 (a);
@@ -150,6 +163,39 @@ CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1;
ANALYZE ab1;
DROP TABLE ab1 CASCADE;
NOTICE: drop cascades to table ab1c
+-- basic test for statistics on expressions
+CREATE TABLE ab1 (a INTEGER, b INTEGER, c TIMESTAMP, d TIMESTAMPTZ);
+-- expression stats may be built on a single expression column
+CREATE STATISTICS ab1_exprstat_1 ON (a+b) FROM ab1;
+-- with a single expression, we only enable expression statistics
+CREATE STATISTICS ab1_exprstat_2 ON (a+b) FROM ab1;
+SELECT stxkind FROM pg_statistic_ext WHERE stxname = 'ab1_exprstat_2';
+ stxkind
+---------
+ {e}
+(1 row)
+
+-- adding anything to the expression builds all statistics kinds
+CREATE STATISTICS ab1_exprstat_3 ON (a+b), a FROM ab1;
+SELECT stxkind FROM pg_statistic_ext WHERE stxname = 'ab1_exprstat_3';
+ stxkind
+-----------
+ {d,f,m,e}
+(1 row)
+
+-- date_trunc on timestamptz is not immutable, but that should not matter
+CREATE STATISTICS ab1_exprstat_4 ON date_trunc('day', d) FROM ab1;
+-- date_trunc on timestamp is immutable
+CREATE STATISTICS ab1_exprstat_5 ON date_trunc('day', c) FROM ab1;
+-- insert some data and run analyze, to test that these cases build properly
+INSERT INTO ab1
+SELECT
+ generate_series(1,10),
+ generate_series(1,10),
+ generate_series('2020-10-01'::timestamp, '2020-10-10'::timestamp, interval '1 day'),
+ generate_series('2020-10-01'::timestamptz, '2020-10-10'::timestamptz, interval '1 day');
+ANALYZE ab1;
+DROP TABLE ab1;
-- Verify supported object types for extended statistics
CREATE schema tststats;
CREATE TABLE tststats.t (a int, b int, c text);
@@ -244,6 +290,30 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c
200 | 11
(1 row)
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (a+1)');
+ estimated | actual
+-----------+--------
+ 100 | 11
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)');
+ estimated | actual
+-----------+--------
+ 100 | 11
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)');
+ estimated | actual
+-----------+--------
+ 100 | 11
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)');
+ estimated | actual
+-----------+--------
+ 100 | 11
+(1 row)
+
-- correct command
CREATE STATISTICS s10 ON a, b, c FROM ndistinct;
ANALYZE ndistinct;
@@ -282,6 +352,32 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b
11 | 11
(1 row)
+-- partial improvement (match on attributes)
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (a+1)');
+ estimated | actual
+-----------+--------
+ 11 | 11
+(1 row)
+
+-- expressions - no improvement
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)');
+ estimated | actual
+-----------+--------
+ 11 | 11
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)');
+ estimated | actual
+-----------+--------
+ 11 | 11
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)');
+ estimated | actual
+-----------+--------
+ 11 | 11
+(1 row)
+
-- last two plans keep using Group Aggregate, because 'd' is not covered
-- by the statistic and while it's NULL-only we assume 200 values for it
SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d');
@@ -343,6 +439,30 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, d
200 | 13
(1 row)
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (a+1)');
+ estimated | actual
+-----------+--------
+ 221 | 221
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)');
+ estimated | actual
+-----------+--------
+ 221 | 221
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)');
+ estimated | actual
+-----------+--------
+ 1000 | 1000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)');
+ estimated | actual
+-----------+--------
+ 221 | 221
+(1 row)
+
DROP STATISTICS s10;
SELECT s.stxkind, d.stxdndistinct
FROM pg_statistic_ext s, pg_statistic_ext_data d
@@ -383,6 +503,413 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, d
200 | 13
(1 row)
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (a+1)');
+ estimated | actual
+-----------+--------
+ 100 | 221
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)');
+ estimated | actual
+-----------+--------
+ 100 | 221
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)');
+ estimated | actual
+-----------+--------
+ 100 | 1000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)');
+ estimated | actual
+-----------+--------
+ 100 | 221
+(1 row)
+
+-- ndistinct estimates with statistics on expressions
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)');
+ estimated | actual
+-----------+--------
+ 100 | 221
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)');
+ estimated | actual
+-----------+--------
+ 100 | 1000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)');
+ estimated | actual
+-----------+--------
+ 100 | 221
+(1 row)
+
+CREATE STATISTICS s10 (ndistinct) ON (a+1), (b+100), (2*c) FROM ndistinct;
+ANALYZE ndistinct;
+SELECT s.stxkind, d.stxdndistinct
+ FROM pg_statistic_ext s, pg_statistic_ext_data d
+ WHERE s.stxrelid = 'ndistinct'::regclass
+ AND d.stxoid = s.oid;
+ stxkind | stxdndistinct
+---------+-------------------------------------------------------------------
+ {d,e} | {"-1, -2": 221, "-1, -3": 247, "-2, -3": 323, "-1, -2, -3": 1000}
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)');
+ estimated | actual
+-----------+--------
+ 221 | 221
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)');
+ estimated | actual
+-----------+--------
+ 1000 | 1000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)');
+ estimated | actual
+-----------+--------
+ 221 | 221
+(1 row)
+
+DROP STATISTICS s10;
+-- a mix of attributes and expressions
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+ estimated | actual
+-----------+--------
+ 100 | 221
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (2*c)');
+ estimated | actual
+-----------+--------
+ 100 | 247
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (2*c)');
+ estimated | actual
+-----------+--------
+ 100 | 1000
+(1 row)
+
+CREATE STATISTICS s10 (ndistinct) ON a, b, (2*c) FROM ndistinct;
+ANALYZE ndistinct;
+SELECT s.stxkind, d.stxdndistinct
+ FROM pg_statistic_ext s, pg_statistic_ext_data d
+ WHERE s.stxrelid = 'ndistinct'::regclass
+ AND d.stxoid = s.oid;
+ stxkind | stxdndistinct
+---------+-------------------------------------------------------------
+ {d,e} | {"3, 4": 221, "3, -1": 247, "4, -1": 323, "3, 4, -1": 1000}
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+ estimated | actual
+-----------+--------
+ 221 | 221
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (2*c)');
+ estimated | actual
+-----------+--------
+ 247 | 247
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (2*c)');
+ estimated | actual
+-----------+--------
+ 1000 | 1000
+(1 row)
+
+DROP STATISTICS s10;
+-- combination of multiple ndistinct statistics, with/without expressions
+TRUNCATE ndistinct;
+-- two mostly independent groups of columns
+INSERT INTO ndistinct (a, b, c, d)
+ SELECT mod(i,3), mod(i,9), mod(i,5), mod(i,20)
+ FROM generate_series(1,1000) s(i);
+ANALYZE ndistinct;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+ estimated | actual
+-----------+--------
+ 27 | 9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)');
+ estimated | actual
+-----------+--------
+ 27 | 9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b');
+ estimated | actual
+-----------+--------
+ 27 | 9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)');
+ estimated | actual
+-----------+--------
+ 27 | 9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c');
+ estimated | actual
+-----------+--------
+ 100 | 45
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)');
+ estimated | actual
+-----------+--------
+ 100 | 45
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)');
+ estimated | actual
+-----------+--------
+ 100 | 180
+(1 row)
+
+-- basic statistics on both attributes (no expressions)
+CREATE STATISTICS s11 (ndistinct) ON a, b FROM ndistinct;
+CREATE STATISTICS s12 (ndistinct) ON c, d FROM ndistinct;
+ANALYZE ndistinct;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+ estimated | actual
+-----------+--------
+ 9 | 9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)');
+ estimated | actual
+-----------+--------
+ 9 | 9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b');
+ estimated | actual
+-----------+--------
+ 9 | 9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)');
+ estimated | actual
+-----------+--------
+ 9 | 9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c');
+ estimated | actual
+-----------+--------
+ 45 | 45
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)');
+ estimated | actual
+-----------+--------
+ 45 | 45
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)');
+ estimated | actual
+-----------+--------
+ 100 | 180
+(1 row)
+
+-- replace the second statistics by statistics on expressions
+DROP STATISTICS s12;
+CREATE STATISTICS s12 (ndistinct) ON (c * 10), (d - 1) FROM ndistinct;
+ANALYZE ndistinct;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+ estimated | actual
+-----------+--------
+ 9 | 9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)');
+ estimated | actual
+-----------+--------
+ 9 | 9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b');
+ estimated | actual
+-----------+--------
+ 9 | 9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)');
+ estimated | actual
+-----------+--------
+ 9 | 9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c');
+ estimated | actual
+-----------+--------
+ 45 | 45
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)');
+ estimated | actual
+-----------+--------
+ 45 | 45
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)');
+ estimated | actual
+-----------+--------
+ 100 | 180
+(1 row)
+
+-- replace the second statistics by statistics on both attributes and expressions
+DROP STATISTICS s12;
+CREATE STATISTICS s12 (ndistinct) ON c, d, (c * 10), (d - 1) FROM ndistinct;
+ANALYZE ndistinct;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+ estimated | actual
+-----------+--------
+ 9 | 9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)');
+ estimated | actual
+-----------+--------
+ 9 | 9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b');
+ estimated | actual
+-----------+--------
+ 9 | 9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)');
+ estimated | actual
+-----------+--------
+ 9 | 9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c');
+ estimated | actual
+-----------+--------
+ 45 | 45
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)');
+ estimated | actual
+-----------+--------
+ 45 | 45
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)');
+ estimated | actual
+-----------+--------
+ 100 | 180
+(1 row)
+
+-- replace the other statistics by statistics on both attributes and expressions
+DROP STATISTICS s11;
+CREATE STATISTICS s11 (ndistinct) ON a, b, (a*5), (b+1) FROM ndistinct;
+ANALYZE ndistinct;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+ estimated | actual
+-----------+--------
+ 9 | 9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)');
+ estimated | actual
+-----------+--------
+ 9 | 9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b');
+ estimated | actual
+-----------+--------
+ 9 | 9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)');
+ estimated | actual
+-----------+--------
+ 9 | 9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c');
+ estimated | actual
+-----------+--------
+ 45 | 45
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)');
+ estimated | actual
+-----------+--------
+ 45 | 45
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)');
+ estimated | actual
+-----------+--------
+ 100 | 180
+(1 row)
+
+-- replace statistics by somewhat overlapping ones (this expected to get worse estimate
+-- because the first statistics shall be applied to 3 columns, and the second one can't
+-- be really applied)
+DROP STATISTICS s11;
+DROP STATISTICS s12;
+CREATE STATISTICS s11 (ndistinct) ON a, b, (a*5), (b+1) FROM ndistinct;
+CREATE STATISTICS s12 (ndistinct) ON a, (b+1), (c * 10) FROM ndistinct;
+ANALYZE ndistinct;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+ estimated | actual
+-----------+--------
+ 9 | 9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)');
+ estimated | actual
+-----------+--------
+ 9 | 9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b');
+ estimated | actual
+-----------+--------
+ 9 | 9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)');
+ estimated | actual
+-----------+--------
+ 9 | 9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c');
+ estimated | actual
+-----------+--------
+ 45 | 45
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)');
+ estimated | actual
+-----------+--------
+ 100 | 45
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)');
+ estimated | actual
+-----------+--------
+ 100 | 180
+(1 row)
+
+DROP STATISTICS s11;
+DROP STATISTICS s12;
-- functional dependencies tests
CREATE TABLE functional_dependencies (
filler1 TEXT,
@@ -430,6 +957,40 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
-- a => b, a => c, b => c
TRUNCATE functional_dependencies;
DROP STATISTICS func_deps_stat;
+-- now do the same thing, but with expressions
+INSERT INTO functional_dependencies (a, b, c, filler1)
+ SELECT i, i, i, i FROM generate_series(1,5000) s(i);
+ANALYZE functional_dependencies;
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1');
+ estimated | actual
+-----------+--------
+ 1 | 35
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1 AND mod(c, 7) = 1');
+ estimated | actual
+-----------+--------
+ 1 | 5
+(1 row)
+
+-- create statistics
+CREATE STATISTICS func_deps_stat (dependencies) ON (mod(a,11)), (mod(b::int, 13)), (mod(c, 7)) FROM functional_dependencies;
+ANALYZE functional_dependencies;
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1');
+ estimated | actual
+-----------+--------
+ 35 | 35
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1 AND mod(c, 7) = 1');
+ estimated | actual
+-----------+--------
+ 5 | 5
+(1 row)
+
+-- a => b, a => c, b => c
+TRUNCATE functional_dependencies;
+DROP STATISTICS func_deps_stat;
INSERT INTO functional_dependencies (a, b, c, filler1)
SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i);
ANALYZE functional_dependencies;
@@ -755,14 +1316,14 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
1 | 0
(1 row)
--- changing the type of column c causes its single-column stats to be dropped,
--- giving a default estimate of 0.005 * 5000 = 25 for (c = 1); check multiple
--- clauses estimated with functional dependencies does not exceed this
+-- changing the type of column c causes all its stats to be dropped, reverting
+-- to default estimates without any statistics, i.e. 0.5% selectivity for each
+-- condition
ALTER TABLE functional_dependencies ALTER COLUMN c TYPE numeric;
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
estimated | actual
-----------+--------
- 25 | 50
+ 1 | 50
(1 row)
ANALYZE functional_dependencies;
@@ -772,6 +1333,332 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
50 | 50
(1 row)
+DROP STATISTICS func_deps_stat;
+-- now try functional dependencies with expressions
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND (b || ''X'') = ''1X''');
+ estimated | actual
+-----------+--------
+ 1 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND (b || ''X'') = ''1X'' AND (c + 1) = 2');
+ estimated | actual
+-----------+--------
+ 1 | 50
+(1 row)
+
+-- IN
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ''1X''');
+ estimated | actual
+-----------+--------
+ 1 | 100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') IN (''1X'', ''2X'')');
+ estimated | actual
+-----------+--------
+ 1 | 100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') IN (''1X'', ''2X'')');
+ estimated | actual
+-----------+--------
+ 1 | 200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') = ''1X''');
+ estimated | actual
+-----------+--------
+ 1 | 100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 52, 102, 152) AND (b || ''X'') IN (''1X'', ''26X'') AND (c + 1) = 2');
+ estimated | actual
+-----------+--------
+ 1 | 200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 52, 102, 152) AND (b || ''X'') IN (''1X'', ''26X'') AND (c + 1) IN (2)');
+ estimated | actual
+-----------+--------
+ 1 | 200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 52, 54, 102, 104, 152, 154) AND (b || ''X'') IN (''1X'', ''2X'', ''26X'', ''27X'') AND (c + 1) IN (2, 3)');
+ estimated | actual
+-----------+--------
+ 1 | 400
+(1 row)
+
+-- OR clauses referencing the same attribute
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND (b || ''X'') = ''1X''');
+ estimated | actual
+-----------+--------
+ 1 | 100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND ((b || ''X'') = ''1X'' OR (b || ''X'') = ''2X'')');
+ estimated | actual
+-----------+--------
+ 1 | 100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 4 OR (a * 2) = 102 OR (a * 2) = 104) AND ((b || ''X'') = ''1X'' OR (b || ''X'') = ''2X'')');
+ estimated | actual
+-----------+--------
+ 1 | 200
+(1 row)
+
+-- OR clauses referencing different attributes
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (b || ''X'') = ''1X'') AND (b || ''X'') = ''1X''');
+ estimated | actual
+-----------+--------
+ 1 | 100
+(1 row)
+
+-- ANY
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND (b || ''X'') = ''1X''');
+ estimated | actual
+-----------+--------
+ 1 | 100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X''])');
+ estimated | actual
+-----------+--------
+ 1 | 100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 4, 102, 104]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X''])');
+ estimated | actual
+-----------+--------
+ 1 | 200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 52, 102, 152]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''26X'']) AND (c + 1) = 2');
+ estimated | actual
+-----------+--------
+ 1 | 200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 52, 102, 152]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''26X'']) AND (c + 1) = ANY (ARRAY[2])');
+ estimated | actual
+-----------+--------
+ 1 | 200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 4, 52, 54, 102, 104, 152, 154]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X'', ''26X'', ''27X'']) AND (c + 1) = ANY (ARRAY[2, 3])');
+ estimated | actual
+-----------+--------
+ 1 | 400
+(1 row)
+
+-- ANY with inequalities should not benefit from functional dependencies
+-- the estimates however improve thanks to having expression statistics
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) < ANY (ARRAY[2, 102]) AND (b || ''X'') > ''1X''');
+ estimated | actual
+-----------+--------
+ 926 | 1900
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) >= ANY (ARRAY[2, 102]) AND (b || ''X'') <= ANY (ARRAY[''1X'', ''2X''])');
+ estimated | actual
+-----------+--------
+ 1543 | 2250
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) <= ANY (ARRAY[2, 4, 102, 104]) AND (b || ''X'') >= ANY (ARRAY[''1X'', ''2X''])');
+ estimated | actual
+-----------+--------
+ 2229 | 2050
+(1 row)
+
+-- ALL (should not benefit from functional dependencies)
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ALL (ARRAY[''1X''])');
+ estimated | actual
+-----------+--------
+ 1 | 100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ALL (ARRAY[''1X'', ''2X''])');
+ estimated | actual
+-----------+--------
+ 1 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') = ALL (ARRAY[''1X'', ''2X''])');
+ estimated | actual
+-----------+--------
+ 1 | 0
+(1 row)
+
+-- create statistics on expressions
+CREATE STATISTICS func_deps_stat (dependencies) ON (a * 2), (b || 'X'), (c + 1) FROM functional_dependencies;
+ANALYZE functional_dependencies;
+-- print the detected dependencies
+SELECT dependencies FROM pg_stats_ext WHERE statistics_name = 'func_deps_stat';
+ dependencies
+------------------------------------------------------------------------------------------------------------------------
+ {"-1 => -2": 1.000000, "-1 => -3": 1.000000, "-2 => -3": 1.000000, "-1, -2 => -3": 1.000000, "-1, -3 => -2": 1.000000}
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND (b || ''X'') = ''1X''');
+ estimated | actual
+-----------+--------
+ 50 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND (b || ''X'') = ''1X'' AND (c + 1) = 2');
+ estimated | actual
+-----------+--------
+ 50 | 50
+(1 row)
+
+-- IN
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ''1X''');
+ estimated | actual
+-----------+--------
+ 100 | 100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') IN (''1X'', ''2X'')');
+ estimated | actual
+-----------+--------
+ 100 | 100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') IN (''1X'', ''2X'')');
+ estimated | actual
+-----------+--------
+ 200 | 200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') = ''1X''');
+ estimated | actual
+-----------+--------
+ 100 | 100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 52, 102, 152) AND (b || ''X'') IN (''1X'', ''26X'') AND (c + 1) = 2');
+ estimated | actual
+-----------+--------
+ 200 | 200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 52, 102, 152) AND (b || ''X'') IN (''1X'', ''26X'') AND (c + 1) IN (2)');
+ estimated | actual
+-----------+--------
+ 200 | 200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 52, 54, 102, 104, 152, 154) AND (b || ''X'') IN (''1X'', ''2X'', ''26X'', ''27X'') AND (c + 1) IN (2, 3)');
+ estimated | actual
+-----------+--------
+ 400 | 400
+(1 row)
+
+-- OR clauses referencing the same attribute
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND (b || ''X'') = ''1X''');
+ estimated | actual
+-----------+--------
+ 99 | 100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND ((b || ''X'') = ''1X'' OR (b || ''X'') = ''2X'')');
+ estimated | actual
+-----------+--------
+ 99 | 100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 4 OR (a * 2) = 102 OR (a * 2) = 104) AND ((b || ''X'') = ''1X'' OR (b || ''X'') = ''2X'')');
+ estimated | actual
+-----------+--------
+ 197 | 200
+(1 row)
+
+-- OR clauses referencing different attributes
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (b || ''X'') = ''1X'') AND (b || ''X'') = ''1X''');
+ estimated | actual
+-----------+--------
+ 3 | 100
+(1 row)
+
+-- ANY
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND (b || ''X'') = ''1X''');
+ estimated | actual
+-----------+--------
+ 100 | 100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X''])');
+ estimated | actual
+-----------+--------
+ 100 | 100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 4, 102, 104]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X''])');
+ estimated | actual
+-----------+--------
+ 200 | 200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 52, 102, 152]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''26X'']) AND (c + 1) = 2');
+ estimated | actual
+-----------+--------
+ 200 | 200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 52, 102, 152]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''26X'']) AND (c + 1) = ANY (ARRAY[2])');
+ estimated | actual
+-----------+--------
+ 200 | 200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 4, 52, 54, 102, 104, 152, 154]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X'', ''26X'', ''27X'']) AND (c + 1) = ANY (ARRAY[2, 3])');
+ estimated | actual
+-----------+--------
+ 400 | 400
+(1 row)
+
+-- ANY with inequalities should not benefit from functional dependencies
+-- the estimates however improve thanks to having expression statistics
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) < ANY (ARRAY[2, 102]) AND (b || ''X'') > ''1X''');
+ estimated | actual
+-----------+--------
+ 1957 | 1900
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) >= ANY (ARRAY[2, 102]) AND (b || ''X'') <= ANY (ARRAY[''1X'', ''2X''])');
+ estimated | actual
+-----------+--------
+ 2933 | 2250
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) <= ANY (ARRAY[2, 4, 102, 104]) AND (b || ''X'') >= ANY (ARRAY[''1X'', ''2X''])');
+ estimated | actual
+-----------+--------
+ 3548 | 2050
+(1 row)
+
+-- ALL (should not benefit from functional dependencies)
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ALL (ARRAY[''1X''])');
+ estimated | actual
+-----------+--------
+ 2 | 100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ALL (ARRAY[''1X'', ''2X''])');
+ estimated | actual
+-----------+--------
+ 1 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') = ALL (ARRAY[''1X'', ''2X''])');
+ estimated | actual
+-----------+--------
+ 1 | 0
+(1 row)
+
-- check the ability to use multiple functional dependencies
CREATE TABLE functional_dependencies_multi (
a INTEGER,
@@ -896,6 +1783,39 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b =
1 | 1
(1 row)
+TRUNCATE mcv_lists;
+DROP STATISTICS mcv_lists_stats;
+-- random data (no MCV list), but with expression
+INSERT INTO mcv_lists (a, b, c, filler1)
+ SELECT i, i, i, i FROM generate_series(1,1000) s(i);
+ANALYZE mcv_lists;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 AND mod(b::int,11) = 1');
+ estimated | actual
+-----------+--------
+ 1 | 13
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 AND mod(b::int,11) = 1 AND mod(c,13) = 1');
+ estimated | actual
+-----------+--------
+ 1 | 1
+(1 row)
+
+-- create statistics
+CREATE STATISTICS mcv_lists_stats (mcv) ON (mod(a,7)), (mod(b::int,11)), (mod(c,13)) FROM mcv_lists;
+ANALYZE mcv_lists;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 AND mod(b::int,11) = 1');
+ estimated | actual
+-----------+--------
+ 13 | 13
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 AND mod(b::int,11) = 1 AND mod(c,13) = 1');
+ estimated | actual
+-----------+--------
+ 1 | 1
+(1 row)
+
-- 100 distinct combinations, all in the MCV list
TRUNCATE mcv_lists;
DROP STATISTICS mcv_lists_stats;
@@ -1121,6 +2041,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
200 | 200
(1 row)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
+ estimated | actual
+-----------+--------
+ 200 | 200
+(1 row)
+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
estimated | actual
-----------+--------
@@ -1207,6 +2133,212 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b =
50 | 50
(1 row)
+-- 100 distinct combinations, all in the MCV list, but with expressions
+TRUNCATE mcv_lists;
+DROP STATISTICS mcv_lists_stats;
+INSERT INTO mcv_lists (a, b, c, filler1)
+ SELECT i, i, i, i FROM generate_series(1,1000) s(i);
+ANALYZE mcv_lists;
+-- without any stats on the expressions, we have to use default selectivities, which
+-- is why the estimates here are different from the pre-computed case above
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1');
+ estimated | actual
+-----------+--------
+ 1 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,20) AND 1 = mod(b::int,10)');
+ estimated | actual
+-----------+--------
+ 1 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < 1 AND mod(b::int,10) < 1');
+ estimated | actual
+-----------+--------
+ 111 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,20) AND 1 > mod(b::int,10)');
+ estimated | actual
+-----------+--------
+ 111 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1 AND mod(c,5) = 1');
+ estimated | actual
+-----------+--------
+ 1 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 OR mod(b::int,10) = 1 OR mod(c,25) = 1 OR d IS NOT NULL');
+ estimated | actual
+-----------+--------
+ 15 | 120
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) IN (1, 2, 51, 52, NULL) AND mod(b::int,10) IN ( 1, 2, NULL)');
+ estimated | actual
+-----------+--------
+ 1 | 100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,10) = ANY (ARRAY[1, 2])');
+ estimated | actual
+-----------+--------
+ 1 | 100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,10) IN (1, 2, NULL, 3)');
+ estimated | actual
+-----------+--------
+ 11 | 150
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < ALL (ARRAY[4, 5]) AND mod(b::int,10) IN (1, 2, 3) AND mod(c,5) > ANY (ARRAY[1, 2, 3])');
+ estimated | actual
+-----------+--------
+ 1 | 100
+(1 row)
+
+-- create statistics with expressions only (we create three separate stats, in order not to build more complex extended stats)
+CREATE STATISTICS mcv_lists_stats_1 ON (mod(a,20)) FROM mcv_lists;
+CREATE STATISTICS mcv_lists_stats_2 ON (mod(b::int,10)) FROM mcv_lists;
+CREATE STATISTICS mcv_lists_stats_3 ON (mod(c,5)) FROM mcv_lists;
+ANALYZE mcv_lists;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1');
+ estimated | actual
+-----------+--------
+ 5 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,20) AND 1 = mod(b::int,10)');
+ estimated | actual
+-----------+--------
+ 5 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < 1 AND mod(b::int,10) < 1');
+ estimated | actual
+-----------+--------
+ 5 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,20) AND 1 > mod(b::int,10)');
+ estimated | actual
+-----------+--------
+ 5 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1 AND mod(c,5) = 1');
+ estimated | actual
+-----------+--------
+ 1 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 OR mod(b::int,10) = 1 OR mod(c,25) = 1 OR d IS NOT NULL');
+ estimated | actual
+-----------+--------
+ 149 | 120
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) IN (1, 2, 51, 52, NULL) AND mod(b::int,10) IN ( 1, 2, NULL)');
+ estimated | actual
+-----------+--------
+ 20 | 100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,10) = ANY (ARRAY[1, 2])');
+ estimated | actual
+-----------+--------
+ 20 | 100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,10) IN (1, 2, NULL, 3)');
+ estimated | actual
+-----------+--------
+ 116 | 150
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < ALL (ARRAY[4, 5]) AND mod(b::int,10) IN (1, 2, 3) AND mod(c,5) > ANY (ARRAY[1, 2, 3])');
+ estimated | actual
+-----------+--------
+ 12 | 100
+(1 row)
+
+DROP STATISTICS mcv_lists_stats_1;
+DROP STATISTICS mcv_lists_stats_2;
+DROP STATISTICS mcv_lists_stats_3;
+-- create statistics with both MCV and expressions
+CREATE STATISTICS mcv_lists_stats (mcv) ON (mod(a,20)), (mod(b::int,10)), (mod(c,5)) FROM mcv_lists;
+ANALYZE mcv_lists;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1');
+ estimated | actual
+-----------+--------
+ 50 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,20) AND 1 = mod(b::int,10)');
+ estimated | actual
+-----------+--------
+ 50 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < 1 AND mod(b::int,10) < 1');
+ estimated | actual
+-----------+--------
+ 50 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,20) AND 1 > mod(b::int,10)');
+ estimated | actual
+-----------+--------
+ 50 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1 AND mod(c,5) = 1');
+ estimated | actual
+-----------+--------
+ 50 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 OR mod(b::int,10) = 1 OR mod(c,25) = 1 OR d IS NOT NULL');
+ estimated | actual
+-----------+--------
+ 105 | 120
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) IN (1, 2, 51, 52, NULL) AND mod(b::int,10) IN ( 1, 2, NULL)');
+ estimated | actual
+-----------+--------
+ 100 | 100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,10) = ANY (ARRAY[1, 2])');
+ estimated | actual
+-----------+--------
+ 100 | 100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,10) IN (1, 2, NULL, 3)');
+ estimated | actual
+-----------+--------
+ 150 | 150
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < ALL (ARRAY[4, 5]) AND mod(b::int,10) IN (1, 2, 3) AND mod(c,5) > ANY (ARRAY[1, 2, 3])');
+ estimated | actual
+-----------+--------
+ 100 | 100
+(1 row)
+
+-- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 OR mod(b::int,10) = 1 OR mod(c,5) = 1 OR d IS NOT NULL');
+ estimated | actual
+-----------+--------
+ 200 | 200
+(1 row)
+
-- 100 distinct combinations with NULL values, all in the MCV list
TRUNCATE mcv_lists;
DROP STATISTICS mcv_lists_stats;
@@ -1712,6 +2844,100 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR
(1 row)
DROP TABLE mcv_lists_multi;
+-- statistics on integer expressions
+CREATE TABLE expr_stats (a int, b int, c int);
+INSERT INTO expr_stats SELECT mod(i,10), mod(i,10), mod(i,10) FROM generate_series(1,1000) s(i);
+ANALYZE expr_stats;
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (2*a) = 0 AND (3*b) = 0');
+ estimated | actual
+-----------+--------
+ 1 | 100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (a+b) = 0 AND (a-b) = 0');
+ estimated | actual
+-----------+--------
+ 1 | 100
+(1 row)
+
+CREATE STATISTICS expr_stats_1 (mcv) ON (a+b), (a-b), (2*a), (3*b) FROM expr_stats;
+ANALYZE expr_stats;
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (2*a) = 0 AND (3*b) = 0');
+ estimated | actual
+-----------+--------
+ 100 | 100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (a+b) = 0 AND (a-b) = 0');
+ estimated | actual
+-----------+--------
+ 100 | 100
+(1 row)
+
+DROP STATISTICS expr_stats_1;
+DROP TABLE expr_stats;
+-- statistics on a mix columns and expressions
+CREATE TABLE expr_stats (a int, b int, c int);
+INSERT INTO expr_stats SELECT mod(i,10), mod(i,10), mod(i,10) FROM generate_series(1,1000) s(i);
+ANALYZE expr_stats;
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (2*a) = 0 AND (3*b) = 0');
+ estimated | actual
+-----------+--------
+ 1 | 100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 3 AND b = 3 AND (a-b) = 0');
+ estimated | actual
+-----------+--------
+ 1 | 100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND b = 1 AND (a-b) = 0');
+ estimated | actual
+-----------+--------
+ 1 | 0
+(1 row)
+
+CREATE STATISTICS expr_stats_1 (mcv) ON a, b, (2*a), (3*b), (a+b), (a-b) FROM expr_stats;
+ANALYZE expr_stats;
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (2*a) = 0 AND (3*b) = 0');
+ estimated | actual
+-----------+--------
+ 100 | 100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 3 AND b = 3 AND (a-b) = 0');
+ estimated | actual
+-----------+--------
+ 100 | 100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND b = 1 AND (a-b) = 0');
+ estimated | actual
+-----------+--------
+ 1 | 0
+(1 row)
+
+DROP TABLE expr_stats;
+-- statistics on expressions with different data types
+CREATE TABLE expr_stats (a int, b name, c text);
+INSERT INTO expr_stats SELECT mod(i,10), md5(mod(i,10)::text), md5(mod(i,10)::text) FROM generate_series(1,1000) s(i);
+ANALYZE expr_stats;
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (b || c) <= ''z'' AND (c || b) >= ''0''');
+ estimated | actual
+-----------+--------
+ 11 | 100
+(1 row)
+
+CREATE STATISTICS expr_stats_1 (mcv) ON a, b, (b || c), (c || b) FROM expr_stats;
+ANALYZE expr_stats;
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (b || c) <= ''z'' AND (c || b) >= ''0''');
+ estimated | actual
+-----------+--------
+ 100 | 100
+(1 row)
+
+DROP TABLE expr_stats;
-- Permission tests. Users should not be able to see specific data values in
-- the extended statistics, if they lack permission to see those values in
-- the underlying table.
@@ -1743,21 +2969,21 @@ create statistics stts_s2.stts_yama (dependencies, mcv) on col1, col3 from stts_
insert into stts_t1 select i,i from generate_series(1,100) i;
analyze stts_t1;
\dX
- List of extended statistics
- Schema | Name | Definition | Ndistinct | Dependencies | MCV
-----------+------------------------+--------------------------------------+-----------+--------------+---------
- public | func_deps_stat | a, b, c FROM functional_dependencies | | defined |
- public | mcv_lists_arrays_stats | a, b, c FROM mcv_lists_arrays | | | defined
- public | mcv_lists_bool_stats | a, b, c FROM mcv_lists_bool | | | defined
- public | mcv_lists_stats | a, b, d FROM mcv_lists | | | defined
- public | stts_1 | a, b FROM stts_t1 | defined | |
- public | stts_2 | a, b FROM stts_t1 | defined | defined |
- public | stts_3 | a, b FROM stts_t1 | defined | defined | defined
- public | stts_4 | b, c FROM stts_t2 | defined | defined | defined
- public | stts_hoge | col1, col2, col3 FROM stts_t3 | defined | defined | defined
- stts_s1 | stts_foo | col1, col2 FROM stts_t3 | defined | defined | defined
- stts_s2 | stts_yama | col1, col3 FROM stts_t3 | | defined | defined
- tststats | priv_test_stats | a, b FROM tststats.priv_test_tbl | | | defined
+ List of extended statistics
+ Schema | Name | Definition | Ndistinct | Dependencies | MCV
+----------+------------------------+----------------------------------------------------------------------------------+-----------+--------------+---------
+ public | func_deps_stat | ((a * 2)), ((b || 'X'::text)), ((c + (1)::numeric)) FROM functional_dependencies | | defined |
+ public | mcv_lists_arrays_stats | a, b, c FROM mcv_lists_arrays | | | defined
+ public | mcv_lists_bool_stats | a, b, c FROM mcv_lists_bool | | | defined
+ public | mcv_lists_stats | a, b, d FROM mcv_lists | | | defined
+ public | stts_1 | a, b FROM stts_t1 | defined | |
+ public | stts_2 | a, b FROM stts_t1 | defined | defined |
+ public | stts_3 | a, b FROM stts_t1 | defined | defined | defined
+ public | stts_4 | b, c FROM stts_t2 | defined | defined | defined
+ public | stts_hoge | col1, col2, col3 FROM stts_t3 | defined | defined | defined
+ stts_s1 | stts_foo | col1, col2 FROM stts_t3 | defined | defined | defined
+ stts_s2 | stts_yama | col1, col3 FROM stts_t3 | | defined | defined
+ tststats | priv_test_stats | a, b FROM tststats.priv_test_tbl | | | defined
(12 rows)
\dX stts_?
@@ -1778,21 +3004,21 @@ analyze stts_t1;
(1 row)
\dX+
- List of extended statistics
- Schema | Name | Definition | Ndistinct | Dependencies | MCV
-----------+------------------------+--------------------------------------+-----------+--------------+---------
- public | func_deps_stat | a, b, c FROM functional_dependencies | | defined |
- public | mcv_lists_arrays_stats | a, b, c FROM mcv_lists_arrays | | | defined
- public | mcv_lists_bool_stats | a, b, c FROM mcv_lists_bool | | | defined
- public | mcv_lists_stats | a, b, d FROM mcv_lists | | | defined
- public | stts_1 | a, b FROM stts_t1 | defined | |
- public | stts_2 | a, b FROM stts_t1 | defined | defined |
- public | stts_3 | a, b FROM stts_t1 | defined | defined | defined
- public | stts_4 | b, c FROM stts_t2 | defined | defined | defined
- public | stts_hoge | col1, col2, col3 FROM stts_t3 | defined | defined | defined
- stts_s1 | stts_foo | col1, col2 FROM stts_t3 | defined | defined | defined
- stts_s2 | stts_yama | col1, col3 FROM stts_t3 | | defined | defined
- tststats | priv_test_stats | a, b FROM tststats.priv_test_tbl | | | defined
+ List of extended statistics
+ Schema | Name | Definition | Ndistinct | Dependencies | MCV
+----------+------------------------+----------------------------------------------------------------------------------+-----------+--------------+---------
+ public | func_deps_stat | ((a * 2)), ((b || 'X'::text)), ((c + (1)::numeric)) FROM functional_dependencies | | defined |
+ public | mcv_lists_arrays_stats | a, b, c FROM mcv_lists_arrays | | | defined
+ public | mcv_lists_bool_stats | a, b, c FROM mcv_lists_bool | | | defined
+ public | mcv_lists_stats | a, b, d FROM mcv_lists | | | defined
+ public | stts_1 | a, b FROM stts_t1 | defined | |
+ public | stts_2 | a, b FROM stts_t1 | defined | defined |
+ public | stts_3 | a, b FROM stts_t1 | defined | defined | defined
+ public | stts_4 | b, c FROM stts_t2 | defined | defined | defined
+ public | stts_hoge | col1, col2, col3 FROM stts_t3 | defined | defined | defined
+ stts_s1 | stts_foo | col1, col2 FROM stts_t3 | defined | defined | defined
+ stts_s2 | stts_yama | col1, col3 FROM stts_t3 | | defined | defined
+ tststats | priv_test_stats | a, b FROM tststats.priv_test_tbl | | | defined
(12 rows)
\dX+ stts_?
@@ -1822,21 +3048,21 @@ analyze stts_t1;
create role regress_stats_ext nosuperuser;
set role regress_stats_ext;
\dX
- List of extended statistics
- Schema | Name | Definition | Ndistinct | Dependencies | MCV
-----------+------------------------+--------------------------------------+-----------+--------------+---------
- public | func_deps_stat | a, b, c FROM functional_dependencies | | defined |
- public | mcv_lists_arrays_stats | a, b, c FROM mcv_lists_arrays | | | defined
- public | mcv_lists_bool_stats | a, b, c FROM mcv_lists_bool | | | defined
- public | mcv_lists_stats | a, b, d FROM mcv_lists | | | defined
- public | stts_1 | a, b FROM stts_t1 | defined | |
- public | stts_2 | a, b FROM stts_t1 | defined | defined |
- public | stts_3 | a, b FROM stts_t1 | defined | defined | defined
- public | stts_4 | b, c FROM stts_t2 | defined | defined | defined
- public | stts_hoge | col1, col2, col3 FROM stts_t3 | defined | defined | defined
- stts_s1 | stts_foo | col1, col2 FROM stts_t3 | defined | defined | defined
- stts_s2 | stts_yama | col1, col3 FROM stts_t3 | | defined | defined
- tststats | priv_test_stats | a, b FROM tststats.priv_test_tbl | | | defined
+ List of extended statistics
+ Schema | Name | Definition | Ndistinct | Dependencies | MCV
+----------+------------------------+----------------------------------------------------------------------------------+-----------+--------------+---------
+ public | func_deps_stat | ((a * 2)), ((b || 'X'::text)), ((c + (1)::numeric)) FROM functional_dependencies | | defined |
+ public | mcv_lists_arrays_stats | a, b, c FROM mcv_lists_arrays | | | defined
+ public | mcv_lists_bool_stats | a, b, c FROM mcv_lists_bool | | | defined
+ public | mcv_lists_stats | a, b, d FROM mcv_lists | | | defined
+ public | stts_1 | a, b FROM stts_t1 | defined | |
+ public | stts_2 | a, b FROM stts_t1 | defined | defined |
+ public | stts_3 | a, b FROM stts_t1 | defined | defined | defined
+ public | stts_4 | b, c FROM stts_t2 | defined | defined | defined
+ public | stts_hoge | col1, col2, col3 FROM stts_t3 | defined | defined | defined
+ stts_s1 | stts_foo | col1, col2 FROM stts_t3 | defined | defined | defined
+ stts_s2 | stts_yama | col1, col3 FROM stts_t3 | | defined | defined
+ tststats | priv_test_stats | a, b FROM tststats.priv_test_tbl | | | defined
(12 rows)
reset role;