diff options
Diffstat (limited to 'src/test/regress/expected/stats_ext.out')
-rw-r--r-- | src/test/regress/expected/stats_ext.out | 1340 |
1 files changed, 1283 insertions, 57 deletions
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index 07af1e29e3..60e9bfcd78 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -25,7 +25,7 @@ begin end; $$; -- Verify failures -CREATE TABLE ext_stats_test (x int, y int, z int); +CREATE TABLE ext_stats_test (x text, y int, z int); CREATE STATISTICS tst; ERROR: syntax error at or near ";" LINE 1: CREATE STATISTICS tst; @@ -44,12 +44,25 @@ CREATE STATISTICS tst ON a, b FROM ext_stats_test; ERROR: column "a" does not exist CREATE STATISTICS tst ON x, x, y FROM ext_stats_test; ERROR: duplicate column name in statistics definition -CREATE STATISTICS tst ON x + y FROM ext_stats_test; -ERROR: only simple column references are allowed in CREATE STATISTICS -CREATE STATISTICS tst ON (x, y) FROM ext_stats_test; -ERROR: only simple column references are allowed in CREATE STATISTICS +CREATE STATISTICS tst ON x, x, y, x, x, y, x, x, y FROM ext_stats_test; +ERROR: cannot have more than 8 columns in statistics +CREATE STATISTICS tst ON x, x, y, x, x, (x || 'x'), (y + 1), (x || 'x'), (x || 'x'), (y + 1) FROM ext_stats_test; +ERROR: cannot have more than 8 columns in statistics +CREATE STATISTICS tst ON (x || 'x'), (x || 'x'), (y + 1), (x || 'x'), (x || 'x'), (y + 1), (x || 'x'), (x || 'x'), (y + 1) FROM ext_stats_test; +ERROR: cannot have more than 8 columns in statistics +CREATE STATISTICS tst ON (x || 'x'), (x || 'x'), y FROM ext_stats_test; +ERROR: duplicate expression in statistics definition CREATE STATISTICS tst (unrecognized) ON x, y FROM ext_stats_test; ERROR: unrecognized statistics kind "unrecognized" +-- incorrect expressions +CREATE STATISTICS tst ON y + z FROM ext_stats_test; -- missing parentheses +ERROR: syntax error at or near "+" +LINE 1: CREATE STATISTICS tst ON y + z FROM ext_stats_test; + ^ +CREATE STATISTICS tst ON (x, y) FROM ext_stats_test; -- tuple expression +ERROR: syntax error at or near "," +LINE 1: CREATE STATISTICS tst ON (x, y) FROM ext_stats_test; + ^ DROP TABLE ext_stats_test; -- Ensure stats are dropped sanely, and test IF NOT EXISTS while at it CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER); @@ -79,7 +92,7 @@ ALTER TABLE ab1 DROP COLUMN a; b | integer | | | c | integer | | | Statistics objects: - "public"."ab1_b_c_stats" (ndistinct, dependencies, mcv) ON b, c FROM ab1 + "public"."ab1_b_c_stats" ON b, c FROM ab1 -- Ensure statistics are dropped when table is SELECT stxname FROM pg_statistic_ext WHERE stxname LIKE 'ab1%'; @@ -111,7 +124,7 @@ ALTER STATISTICS ab1_a_b_stats SET STATISTICS 0; a | integer | | | b | integer | | | Statistics objects: - "public"."ab1_a_b_stats" (ndistinct, dependencies, mcv) ON a, b FROM ab1; STATISTICS 0 + "public"."ab1_a_b_stats" ON a, b FROM ab1; STATISTICS 0 ANALYZE ab1; SELECT stxname, stxdndistinct, stxddependencies, stxdmcv @@ -131,7 +144,7 @@ ALTER STATISTICS ab1_a_b_stats SET STATISTICS -1; a | integer | | | | plain | | b | integer | | | | plain | | Statistics objects: - "public"."ab1_a_b_stats" (ndistinct, dependencies, mcv) ON a, b FROM ab1 + "public"."ab1_a_b_stats" ON a, b FROM ab1 -- partial analyze doesn't build stats either ANALYZE ab1 (a); @@ -150,6 +163,39 @@ CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1; ANALYZE ab1; DROP TABLE ab1 CASCADE; NOTICE: drop cascades to table ab1c +-- basic test for statistics on expressions +CREATE TABLE ab1 (a INTEGER, b INTEGER, c TIMESTAMP, d TIMESTAMPTZ); +-- expression stats may be built on a single expression column +CREATE STATISTICS ab1_exprstat_1 ON (a+b) FROM ab1; +-- with a single expression, we only enable expression statistics +CREATE STATISTICS ab1_exprstat_2 ON (a+b) FROM ab1; +SELECT stxkind FROM pg_statistic_ext WHERE stxname = 'ab1_exprstat_2'; + stxkind +--------- + {e} +(1 row) + +-- adding anything to the expression builds all statistics kinds +CREATE STATISTICS ab1_exprstat_3 ON (a+b), a FROM ab1; +SELECT stxkind FROM pg_statistic_ext WHERE stxname = 'ab1_exprstat_3'; + stxkind +----------- + {d,f,m,e} +(1 row) + +-- date_trunc on timestamptz is not immutable, but that should not matter +CREATE STATISTICS ab1_exprstat_4 ON date_trunc('day', d) FROM ab1; +-- date_trunc on timestamp is immutable +CREATE STATISTICS ab1_exprstat_5 ON date_trunc('day', c) FROM ab1; +-- insert some data and run analyze, to test that these cases build properly +INSERT INTO ab1 +SELECT + generate_series(1,10), + generate_series(1,10), + generate_series('2020-10-01'::timestamp, '2020-10-10'::timestamp, interval '1 day'), + generate_series('2020-10-01'::timestamptz, '2020-10-10'::timestamptz, interval '1 day'); +ANALYZE ab1; +DROP TABLE ab1; -- Verify supported object types for extended statistics CREATE schema tststats; CREATE TABLE tststats.t (a int, b int, c text); @@ -244,6 +290,30 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c 200 | 11 (1 row) +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (a+1)'); + estimated | actual +-----------+-------- + 100 | 11 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)'); + estimated | actual +-----------+-------- + 100 | 11 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)'); + estimated | actual +-----------+-------- + 100 | 11 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)'); + estimated | actual +-----------+-------- + 100 | 11 +(1 row) + -- correct command CREATE STATISTICS s10 ON a, b, c FROM ndistinct; ANALYZE ndistinct; @@ -282,6 +352,32 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b 11 | 11 (1 row) +-- partial improvement (match on attributes) +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (a+1)'); + estimated | actual +-----------+-------- + 11 | 11 +(1 row) + +-- expressions - no improvement +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)'); + estimated | actual +-----------+-------- + 11 | 11 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)'); + estimated | actual +-----------+-------- + 11 | 11 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)'); + estimated | actual +-----------+-------- + 11 | 11 +(1 row) + -- last two plans keep using Group Aggregate, because 'd' is not covered -- by the statistic and while it's NULL-only we assume 200 values for it SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d'); @@ -343,6 +439,30 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, d 200 | 13 (1 row) +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (a+1)'); + estimated | actual +-----------+-------- + 221 | 221 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)'); + estimated | actual +-----------+-------- + 221 | 221 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)'); + estimated | actual +-----------+-------- + 1000 | 1000 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)'); + estimated | actual +-----------+-------- + 221 | 221 +(1 row) + DROP STATISTICS s10; SELECT s.stxkind, d.stxdndistinct FROM pg_statistic_ext s, pg_statistic_ext_data d @@ -383,6 +503,413 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, d 200 | 13 (1 row) +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (a+1)'); + estimated | actual +-----------+-------- + 100 | 221 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)'); + estimated | actual +-----------+-------- + 100 | 221 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)'); + estimated | actual +-----------+-------- + 100 | 1000 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)'); + estimated | actual +-----------+-------- + 100 | 221 +(1 row) + +-- ndistinct estimates with statistics on expressions +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)'); + estimated | actual +-----------+-------- + 100 | 221 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)'); + estimated | actual +-----------+-------- + 100 | 1000 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)'); + estimated | actual +-----------+-------- + 100 | 221 +(1 row) + +CREATE STATISTICS s10 (ndistinct) ON (a+1), (b+100), (2*c) FROM ndistinct; +ANALYZE ndistinct; +SELECT s.stxkind, d.stxdndistinct + FROM pg_statistic_ext s, pg_statistic_ext_data d + WHERE s.stxrelid = 'ndistinct'::regclass + AND d.stxoid = s.oid; + stxkind | stxdndistinct +---------+------------------------------------------------------------------- + {d,e} | {"-1, -2": 221, "-1, -3": 247, "-2, -3": 323, "-1, -2, -3": 1000} +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)'); + estimated | actual +-----------+-------- + 221 | 221 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)'); + estimated | actual +-----------+-------- + 1000 | 1000 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)'); + estimated | actual +-----------+-------- + 221 | 221 +(1 row) + +DROP STATISTICS s10; +-- a mix of attributes and expressions +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b'); + estimated | actual +-----------+-------- + 100 | 221 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (2*c)'); + estimated | actual +-----------+-------- + 100 | 247 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (2*c)'); + estimated | actual +-----------+-------- + 100 | 1000 +(1 row) + +CREATE STATISTICS s10 (ndistinct) ON a, b, (2*c) FROM ndistinct; +ANALYZE ndistinct; +SELECT s.stxkind, d.stxdndistinct + FROM pg_statistic_ext s, pg_statistic_ext_data d + WHERE s.stxrelid = 'ndistinct'::regclass + AND d.stxoid = s.oid; + stxkind | stxdndistinct +---------+------------------------------------------------------------- + {d,e} | {"3, 4": 221, "3, -1": 247, "4, -1": 323, "3, 4, -1": 1000} +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b'); + estimated | actual +-----------+-------- + 221 | 221 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (2*c)'); + estimated | actual +-----------+-------- + 247 | 247 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (2*c)'); + estimated | actual +-----------+-------- + 1000 | 1000 +(1 row) + +DROP STATISTICS s10; +-- combination of multiple ndistinct statistics, with/without expressions +TRUNCATE ndistinct; +-- two mostly independent groups of columns +INSERT INTO ndistinct (a, b, c, d) + SELECT mod(i,3), mod(i,9), mod(i,5), mod(i,20) + FROM generate_series(1,1000) s(i); +ANALYZE ndistinct; +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b'); + estimated | actual +-----------+-------- + 27 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)'); + estimated | actual +-----------+-------- + 27 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b'); + estimated | actual +-----------+-------- + 27 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)'); + estimated | actual +-----------+-------- + 27 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c'); + estimated | actual +-----------+-------- + 100 | 45 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)'); + estimated | actual +-----------+-------- + 100 | 45 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)'); + estimated | actual +-----------+-------- + 100 | 180 +(1 row) + +-- basic statistics on both attributes (no expressions) +CREATE STATISTICS s11 (ndistinct) ON a, b FROM ndistinct; +CREATE STATISTICS s12 (ndistinct) ON c, d FROM ndistinct; +ANALYZE ndistinct; +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c'); + estimated | actual +-----------+-------- + 45 | 45 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)'); + estimated | actual +-----------+-------- + 45 | 45 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)'); + estimated | actual +-----------+-------- + 100 | 180 +(1 row) + +-- replace the second statistics by statistics on expressions +DROP STATISTICS s12; +CREATE STATISTICS s12 (ndistinct) ON (c * 10), (d - 1) FROM ndistinct; +ANALYZE ndistinct; +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c'); + estimated | actual +-----------+-------- + 45 | 45 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)'); + estimated | actual +-----------+-------- + 45 | 45 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)'); + estimated | actual +-----------+-------- + 100 | 180 +(1 row) + +-- replace the second statistics by statistics on both attributes and expressions +DROP STATISTICS s12; +CREATE STATISTICS s12 (ndistinct) ON c, d, (c * 10), (d - 1) FROM ndistinct; +ANALYZE ndistinct; +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c'); + estimated | actual +-----------+-------- + 45 | 45 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)'); + estimated | actual +-----------+-------- + 45 | 45 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)'); + estimated | actual +-----------+-------- + 100 | 180 +(1 row) + +-- replace the other statistics by statistics on both attributes and expressions +DROP STATISTICS s11; +CREATE STATISTICS s11 (ndistinct) ON a, b, (a*5), (b+1) FROM ndistinct; +ANALYZE ndistinct; +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c'); + estimated | actual +-----------+-------- + 45 | 45 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)'); + estimated | actual +-----------+-------- + 45 | 45 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)'); + estimated | actual +-----------+-------- + 100 | 180 +(1 row) + +-- replace statistics by somewhat overlapping ones (this expected to get worse estimate +-- because the first statistics shall be applied to 3 columns, and the second one can't +-- be really applied) +DROP STATISTICS s11; +DROP STATISTICS s12; +CREATE STATISTICS s11 (ndistinct) ON a, b, (a*5), (b+1) FROM ndistinct; +CREATE STATISTICS s12 (ndistinct) ON a, (b+1), (c * 10) FROM ndistinct; +ANALYZE ndistinct; +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)'); + estimated | actual +-----------+-------- + 9 | 9 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c'); + estimated | actual +-----------+-------- + 45 | 45 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)'); + estimated | actual +-----------+-------- + 100 | 45 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)'); + estimated | actual +-----------+-------- + 100 | 180 +(1 row) + +DROP STATISTICS s11; +DROP STATISTICS s12; -- functional dependencies tests CREATE TABLE functional_dependencies ( filler1 TEXT, @@ -430,6 +957,40 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE -- a => b, a => c, b => c TRUNCATE functional_dependencies; DROP STATISTICS func_deps_stat; +-- now do the same thing, but with expressions +INSERT INTO functional_dependencies (a, b, c, filler1) + SELECT i, i, i, i FROM generate_series(1,5000) s(i); +ANALYZE functional_dependencies; +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1'); + estimated | actual +-----------+-------- + 1 | 35 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1 AND mod(c, 7) = 1'); + estimated | actual +-----------+-------- + 1 | 5 +(1 row) + +-- create statistics +CREATE STATISTICS func_deps_stat (dependencies) ON (mod(a,11)), (mod(b::int, 13)), (mod(c, 7)) FROM functional_dependencies; +ANALYZE functional_dependencies; +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1'); + estimated | actual +-----------+-------- + 35 | 35 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1 AND mod(c, 7) = 1'); + estimated | actual +-----------+-------- + 5 | 5 +(1 row) + +-- a => b, a => c, b => c +TRUNCATE functional_dependencies; +DROP STATISTICS func_deps_stat; INSERT INTO functional_dependencies (a, b, c, filler1) SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i); ANALYZE functional_dependencies; @@ -755,14 +1316,14 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE 1 | 0 (1 row) --- changing the type of column c causes its single-column stats to be dropped, --- giving a default estimate of 0.005 * 5000 = 25 for (c = 1); check multiple --- clauses estimated with functional dependencies does not exceed this +-- changing the type of column c causes all its stats to be dropped, reverting +-- to default estimates without any statistics, i.e. 0.5% selectivity for each +-- condition ALTER TABLE functional_dependencies ALTER COLUMN c TYPE numeric; SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1'); estimated | actual -----------+-------- - 25 | 50 + 1 | 50 (1 row) ANALYZE functional_dependencies; @@ -772,6 +1333,332 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE 50 | 50 (1 row) +DROP STATISTICS func_deps_stat; +-- now try functional dependencies with expressions +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND (b || ''X'') = ''1X'''); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND (b || ''X'') = ''1X'' AND (c + 1) = 2'); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +-- IN +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ''1X'''); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') IN (''1X'', ''2X'')'); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') IN (''1X'', ''2X'')'); + estimated | actual +-----------+-------- + 1 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') = ''1X'''); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 52, 102, 152) AND (b || ''X'') IN (''1X'', ''26X'') AND (c + 1) = 2'); + estimated | actual +-----------+-------- + 1 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 52, 102, 152) AND (b || ''X'') IN (''1X'', ''26X'') AND (c + 1) IN (2)'); + estimated | actual +-----------+-------- + 1 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 52, 54, 102, 104, 152, 154) AND (b || ''X'') IN (''1X'', ''2X'', ''26X'', ''27X'') AND (c + 1) IN (2, 3)'); + estimated | actual +-----------+-------- + 1 | 400 +(1 row) + +-- OR clauses referencing the same attribute +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND (b || ''X'') = ''1X'''); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND ((b || ''X'') = ''1X'' OR (b || ''X'') = ''2X'')'); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 4 OR (a * 2) = 102 OR (a * 2) = 104) AND ((b || ''X'') = ''1X'' OR (b || ''X'') = ''2X'')'); + estimated | actual +-----------+-------- + 1 | 200 +(1 row) + +-- OR clauses referencing different attributes +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (b || ''X'') = ''1X'') AND (b || ''X'') = ''1X'''); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +-- ANY +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND (b || ''X'') = ''1X'''); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X''])'); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 4, 102, 104]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X''])'); + estimated | actual +-----------+-------- + 1 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 52, 102, 152]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''26X'']) AND (c + 1) = 2'); + estimated | actual +-----------+-------- + 1 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 52, 102, 152]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''26X'']) AND (c + 1) = ANY (ARRAY[2])'); + estimated | actual +-----------+-------- + 1 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 4, 52, 54, 102, 104, 152, 154]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X'', ''26X'', ''27X'']) AND (c + 1) = ANY (ARRAY[2, 3])'); + estimated | actual +-----------+-------- + 1 | 400 +(1 row) + +-- ANY with inequalities should not benefit from functional dependencies +-- the estimates however improve thanks to having expression statistics +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) < ANY (ARRAY[2, 102]) AND (b || ''X'') > ''1X'''); + estimated | actual +-----------+-------- + 926 | 1900 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) >= ANY (ARRAY[2, 102]) AND (b || ''X'') <= ANY (ARRAY[''1X'', ''2X''])'); + estimated | actual +-----------+-------- + 1543 | 2250 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) <= ANY (ARRAY[2, 4, 102, 104]) AND (b || ''X'') >= ANY (ARRAY[''1X'', ''2X''])'); + estimated | actual +-----------+-------- + 2229 | 2050 +(1 row) + +-- ALL (should not benefit from functional dependencies) +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ALL (ARRAY[''1X''])'); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ALL (ARRAY[''1X'', ''2X''])'); + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') = ALL (ARRAY[''1X'', ''2X''])'); + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +-- create statistics on expressions +CREATE STATISTICS func_deps_stat (dependencies) ON (a * 2), (b || 'X'), (c + 1) FROM functional_dependencies; +ANALYZE functional_dependencies; +-- print the detected dependencies +SELECT dependencies FROM pg_stats_ext WHERE statistics_name = 'func_deps_stat'; + dependencies +------------------------------------------------------------------------------------------------------------------------ + {"-1 => -2": 1.000000, "-1 => -3": 1.000000, "-2 => -3": 1.000000, "-1, -2 => -3": 1.000000, "-1, -3 => -2": 1.000000} +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND (b || ''X'') = ''1X'''); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND (b || ''X'') = ''1X'' AND (c + 1) = 2'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +-- IN +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ''1X'''); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') IN (''1X'', ''2X'')'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') IN (''1X'', ''2X'')'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') = ''1X'''); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 52, 102, 152) AND (b || ''X'') IN (''1X'', ''26X'') AND (c + 1) = 2'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 52, 102, 152) AND (b || ''X'') IN (''1X'', ''26X'') AND (c + 1) IN (2)'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 52, 54, 102, 104, 152, 154) AND (b || ''X'') IN (''1X'', ''2X'', ''26X'', ''27X'') AND (c + 1) IN (2, 3)'); + estimated | actual +-----------+-------- + 400 | 400 +(1 row) + +-- OR clauses referencing the same attribute +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND (b || ''X'') = ''1X'''); + estimated | actual +-----------+-------- + 99 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND ((b || ''X'') = ''1X'' OR (b || ''X'') = ''2X'')'); + estimated | actual +-----------+-------- + 99 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 4 OR (a * 2) = 102 OR (a * 2) = 104) AND ((b || ''X'') = ''1X'' OR (b || ''X'') = ''2X'')'); + estimated | actual +-----------+-------- + 197 | 200 +(1 row) + +-- OR clauses referencing different attributes +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (b || ''X'') = ''1X'') AND (b || ''X'') = ''1X'''); + estimated | actual +-----------+-------- + 3 | 100 +(1 row) + +-- ANY +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND (b || ''X'') = ''1X'''); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X''])'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 4, 102, 104]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X''])'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 52, 102, 152]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''26X'']) AND (c + 1) = 2'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 52, 102, 152]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''26X'']) AND (c + 1) = ANY (ARRAY[2])'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 4, 52, 54, 102, 104, 152, 154]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X'', ''26X'', ''27X'']) AND (c + 1) = ANY (ARRAY[2, 3])'); + estimated | actual +-----------+-------- + 400 | 400 +(1 row) + +-- ANY with inequalities should not benefit from functional dependencies +-- the estimates however improve thanks to having expression statistics +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) < ANY (ARRAY[2, 102]) AND (b || ''X'') > ''1X'''); + estimated | actual +-----------+-------- + 1957 | 1900 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) >= ANY (ARRAY[2, 102]) AND (b || ''X'') <= ANY (ARRAY[''1X'', ''2X''])'); + estimated | actual +-----------+-------- + 2933 | 2250 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) <= ANY (ARRAY[2, 4, 102, 104]) AND (b || ''X'') >= ANY (ARRAY[''1X'', ''2X''])'); + estimated | actual +-----------+-------- + 3548 | 2050 +(1 row) + +-- ALL (should not benefit from functional dependencies) +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ALL (ARRAY[''1X''])'); + estimated | actual +-----------+-------- + 2 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ALL (ARRAY[''1X'', ''2X''])'); + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') = ALL (ARRAY[''1X'', ''2X''])'); + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + -- check the ability to use multiple functional dependencies CREATE TABLE functional_dependencies_multi ( a INTEGER, @@ -896,6 +1783,39 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = 1 | 1 (1 row) +TRUNCATE mcv_lists; +DROP STATISTICS mcv_lists_stats; +-- random data (no MCV list), but with expression +INSERT INTO mcv_lists (a, b, c, filler1) + SELECT i, i, i, i FROM generate_series(1,1000) s(i); +ANALYZE mcv_lists; +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 AND mod(b::int,11) = 1'); + estimated | actual +-----------+-------- + 1 | 13 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 AND mod(b::int,11) = 1 AND mod(c,13) = 1'); + estimated | actual +-----------+-------- + 1 | 1 +(1 row) + +-- create statistics +CREATE STATISTICS mcv_lists_stats (mcv) ON (mod(a,7)), (mod(b::int,11)), (mod(c,13)) FROM mcv_lists; +ANALYZE mcv_lists; +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 AND mod(b::int,11) = 1'); + estimated | actual +-----------+-------- + 13 | 13 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 AND mod(b::int,11) = 1 AND mod(c,13) = 1'); + estimated | actual +-----------+-------- + 1 | 1 +(1 row) + -- 100 distinct combinations, all in the MCV list TRUNCATE mcv_lists; DROP STATISTICS mcv_lists_stats; @@ -1121,6 +2041,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ' 200 | 200 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')'); estimated | actual -----------+-------- @@ -1207,6 +2133,212 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = 50 | 50 (1 row) +-- 100 distinct combinations, all in the MCV list, but with expressions +TRUNCATE mcv_lists; +DROP STATISTICS mcv_lists_stats; +INSERT INTO mcv_lists (a, b, c, filler1) + SELECT i, i, i, i FROM generate_series(1,1000) s(i); +ANALYZE mcv_lists; +-- without any stats on the expressions, we have to use default selectivities, which +-- is why the estimates here are different from the pre-computed case above +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1'); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,20) AND 1 = mod(b::int,10)'); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < 1 AND mod(b::int,10) < 1'); + estimated | actual +-----------+-------- + 111 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,20) AND 1 > mod(b::int,10)'); + estimated | actual +-----------+-------- + 111 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1 AND mod(c,5) = 1'); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 OR mod(b::int,10) = 1 OR mod(c,25) = 1 OR d IS NOT NULL'); + estimated | actual +-----------+-------- + 15 | 120 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) IN (1, 2, 51, 52, NULL) AND mod(b::int,10) IN ( 1, 2, NULL)'); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,10) = ANY (ARRAY[1, 2])'); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,10) IN (1, 2, NULL, 3)'); + estimated | actual +-----------+-------- + 11 | 150 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < ALL (ARRAY[4, 5]) AND mod(b::int,10) IN (1, 2, 3) AND mod(c,5) > ANY (ARRAY[1, 2, 3])'); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +-- create statistics with expressions only (we create three separate stats, in order not to build more complex extended stats) +CREATE STATISTICS mcv_lists_stats_1 ON (mod(a,20)) FROM mcv_lists; +CREATE STATISTICS mcv_lists_stats_2 ON (mod(b::int,10)) FROM mcv_lists; +CREATE STATISTICS mcv_lists_stats_3 ON (mod(c,5)) FROM mcv_lists; +ANALYZE mcv_lists; +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1'); + estimated | actual +-----------+-------- + 5 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,20) AND 1 = mod(b::int,10)'); + estimated | actual +-----------+-------- + 5 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < 1 AND mod(b::int,10) < 1'); + estimated | actual +-----------+-------- + 5 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,20) AND 1 > mod(b::int,10)'); + estimated | actual +-----------+-------- + 5 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1 AND mod(c,5) = 1'); + estimated | actual +-----------+-------- + 1 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 OR mod(b::int,10) = 1 OR mod(c,25) = 1 OR d IS NOT NULL'); + estimated | actual +-----------+-------- + 149 | 120 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) IN (1, 2, 51, 52, NULL) AND mod(b::int,10) IN ( 1, 2, NULL)'); + estimated | actual +-----------+-------- + 20 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,10) = ANY (ARRAY[1, 2])'); + estimated | actual +-----------+-------- + 20 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,10) IN (1, 2, NULL, 3)'); + estimated | actual +-----------+-------- + 116 | 150 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < ALL (ARRAY[4, 5]) AND mod(b::int,10) IN (1, 2, 3) AND mod(c,5) > ANY (ARRAY[1, 2, 3])'); + estimated | actual +-----------+-------- + 12 | 100 +(1 row) + +DROP STATISTICS mcv_lists_stats_1; +DROP STATISTICS mcv_lists_stats_2; +DROP STATISTICS mcv_lists_stats_3; +-- create statistics with both MCV and expressions +CREATE STATISTICS mcv_lists_stats (mcv) ON (mod(a,20)), (mod(b::int,10)), (mod(c,5)) FROM mcv_lists; +ANALYZE mcv_lists; +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,20) AND 1 = mod(b::int,10)'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < 1 AND mod(b::int,10) < 1'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,20) AND 1 > mod(b::int,10)'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1 AND mod(c,5) = 1'); + estimated | actual +-----------+-------- + 50 | 50 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 OR mod(b::int,10) = 1 OR mod(c,25) = 1 OR d IS NOT NULL'); + estimated | actual +-----------+-------- + 105 | 120 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) IN (1, 2, 51, 52, NULL) AND mod(b::int,10) IN ( 1, 2, NULL)'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,10) = ANY (ARRAY[1, 2])'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,10) IN (1, 2, NULL, 3)'); + estimated | actual +-----------+-------- + 150 | 150 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < ALL (ARRAY[4, 5]) AND mod(b::int,10) IN (1, 2, 3) AND mod(c,5) > ANY (ARRAY[1, 2, 3])'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +-- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 OR mod(b::int,10) = 1 OR mod(c,5) = 1 OR d IS NOT NULL'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + -- 100 distinct combinations with NULL values, all in the MCV list TRUNCATE mcv_lists; DROP STATISTICS mcv_lists_stats; @@ -1712,6 +2844,100 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR (1 row) DROP TABLE mcv_lists_multi; +-- statistics on integer expressions +CREATE TABLE expr_stats (a int, b int, c int); +INSERT INTO expr_stats SELECT mod(i,10), mod(i,10), mod(i,10) FROM generate_series(1,1000) s(i); +ANALYZE expr_stats; +SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (2*a) = 0 AND (3*b) = 0'); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (a+b) = 0 AND (a-b) = 0'); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +CREATE STATISTICS expr_stats_1 (mcv) ON (a+b), (a-b), (2*a), (3*b) FROM expr_stats; +ANALYZE expr_stats; +SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (2*a) = 0 AND (3*b) = 0'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (a+b) = 0 AND (a-b) = 0'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +DROP STATISTICS expr_stats_1; +DROP TABLE expr_stats; +-- statistics on a mix columns and expressions +CREATE TABLE expr_stats (a int, b int, c int); +INSERT INTO expr_stats SELECT mod(i,10), mod(i,10), mod(i,10) FROM generate_series(1,1000) s(i); +ANALYZE expr_stats; +SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (2*a) = 0 AND (3*b) = 0'); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 3 AND b = 3 AND (a-b) = 0'); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND b = 1 AND (a-b) = 0'); + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +CREATE STATISTICS expr_stats_1 (mcv) ON a, b, (2*a), (3*b), (a+b), (a-b) FROM expr_stats; +ANALYZE expr_stats; +SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (2*a) = 0 AND (3*b) = 0'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 3 AND b = 3 AND (a-b) = 0'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND b = 1 AND (a-b) = 0'); + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +DROP TABLE expr_stats; +-- statistics on expressions with different data types +CREATE TABLE expr_stats (a int, b name, c text); +INSERT INTO expr_stats SELECT mod(i,10), md5(mod(i,10)::text), md5(mod(i,10)::text) FROM generate_series(1,1000) s(i); +ANALYZE expr_stats; +SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (b || c) <= ''z'' AND (c || b) >= ''0'''); + estimated | actual +-----------+-------- + 11 | 100 +(1 row) + +CREATE STATISTICS expr_stats_1 (mcv) ON a, b, (b || c), (c || b) FROM expr_stats; +ANALYZE expr_stats; +SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (b || c) <= ''z'' AND (c || b) >= ''0'''); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +DROP TABLE expr_stats; -- Permission tests. Users should not be able to see specific data values in -- the extended statistics, if they lack permission to see those values in -- the underlying table. @@ -1743,21 +2969,21 @@ create statistics stts_s2.stts_yama (dependencies, mcv) on col1, col3 from stts_ insert into stts_t1 select i,i from generate_series(1,100) i; analyze stts_t1; \dX - List of extended statistics - Schema | Name | Definition | Ndistinct | Dependencies | MCV -----------+------------------------+--------------------------------------+-----------+--------------+--------- - public | func_deps_stat | a, b, c FROM functional_dependencies | | defined | - public | mcv_lists_arrays_stats | a, b, c FROM mcv_lists_arrays | | | defined - public | mcv_lists_bool_stats | a, b, c FROM mcv_lists_bool | | | defined - public | mcv_lists_stats | a, b, d FROM mcv_lists | | | defined - public | stts_1 | a, b FROM stts_t1 | defined | | - public | stts_2 | a, b FROM stts_t1 | defined | defined | - public | stts_3 | a, b FROM stts_t1 | defined | defined | defined - public | stts_4 | b, c FROM stts_t2 | defined | defined | defined - public | stts_hoge | col1, col2, col3 FROM stts_t3 | defined | defined | defined - stts_s1 | stts_foo | col1, col2 FROM stts_t3 | defined | defined | defined - stts_s2 | stts_yama | col1, col3 FROM stts_t3 | | defined | defined - tststats | priv_test_stats | a, b FROM tststats.priv_test_tbl | | | defined + List of extended statistics + Schema | Name | Definition | Ndistinct | Dependencies | MCV +----------+------------------------+----------------------------------------------------------------------------------+-----------+--------------+--------- + public | func_deps_stat | ((a * 2)), ((b || 'X'::text)), ((c + (1)::numeric)) FROM functional_dependencies | | defined | + public | mcv_lists_arrays_stats | a, b, c FROM mcv_lists_arrays | | | defined + public | mcv_lists_bool_stats | a, b, c FROM mcv_lists_bool | | | defined + public | mcv_lists_stats | a, b, d FROM mcv_lists | | | defined + public | stts_1 | a, b FROM stts_t1 | defined | | + public | stts_2 | a, b FROM stts_t1 | defined | defined | + public | stts_3 | a, b FROM stts_t1 | defined | defined | defined + public | stts_4 | b, c FROM stts_t2 | defined | defined | defined + public | stts_hoge | col1, col2, col3 FROM stts_t3 | defined | defined | defined + stts_s1 | stts_foo | col1, col2 FROM stts_t3 | defined | defined | defined + stts_s2 | stts_yama | col1, col3 FROM stts_t3 | | defined | defined + tststats | priv_test_stats | a, b FROM tststats.priv_test_tbl | | | defined (12 rows) \dX stts_? @@ -1778,21 +3004,21 @@ analyze stts_t1; (1 row) \dX+ - List of extended statistics - Schema | Name | Definition | Ndistinct | Dependencies | MCV -----------+------------------------+--------------------------------------+-----------+--------------+--------- - public | func_deps_stat | a, b, c FROM functional_dependencies | | defined | - public | mcv_lists_arrays_stats | a, b, c FROM mcv_lists_arrays | | | defined - public | mcv_lists_bool_stats | a, b, c FROM mcv_lists_bool | | | defined - public | mcv_lists_stats | a, b, d FROM mcv_lists | | | defined - public | stts_1 | a, b FROM stts_t1 | defined | | - public | stts_2 | a, b FROM stts_t1 | defined | defined | - public | stts_3 | a, b FROM stts_t1 | defined | defined | defined - public | stts_4 | b, c FROM stts_t2 | defined | defined | defined - public | stts_hoge | col1, col2, col3 FROM stts_t3 | defined | defined | defined - stts_s1 | stts_foo | col1, col2 FROM stts_t3 | defined | defined | defined - stts_s2 | stts_yama | col1, col3 FROM stts_t3 | | defined | defined - tststats | priv_test_stats | a, b FROM tststats.priv_test_tbl | | | defined + List of extended statistics + Schema | Name | Definition | Ndistinct | Dependencies | MCV +----------+------------------------+----------------------------------------------------------------------------------+-----------+--------------+--------- + public | func_deps_stat | ((a * 2)), ((b || 'X'::text)), ((c + (1)::numeric)) FROM functional_dependencies | | defined | + public | mcv_lists_arrays_stats | a, b, c FROM mcv_lists_arrays | | | defined + public | mcv_lists_bool_stats | a, b, c FROM mcv_lists_bool | | | defined + public | mcv_lists_stats | a, b, d FROM mcv_lists | | | defined + public | stts_1 | a, b FROM stts_t1 | defined | | + public | stts_2 | a, b FROM stts_t1 | defined | defined | + public | stts_3 | a, b FROM stts_t1 | defined | defined | defined + public | stts_4 | b, c FROM stts_t2 | defined | defined | defined + public | stts_hoge | col1, col2, col3 FROM stts_t3 | defined | defined | defined + stts_s1 | stts_foo | col1, col2 FROM stts_t3 | defined | defined | defined + stts_s2 | stts_yama | col1, col3 FROM stts_t3 | | defined | defined + tststats | priv_test_stats | a, b FROM tststats.priv_test_tbl | | | defined (12 rows) \dX+ stts_? @@ -1822,21 +3048,21 @@ analyze stts_t1; create role regress_stats_ext nosuperuser; set role regress_stats_ext; \dX - List of extended statistics - Schema | Name | Definition | Ndistinct | Dependencies | MCV -----------+------------------------+--------------------------------------+-----------+--------------+--------- - public | func_deps_stat | a, b, c FROM functional_dependencies | | defined | - public | mcv_lists_arrays_stats | a, b, c FROM mcv_lists_arrays | | | defined - public | mcv_lists_bool_stats | a, b, c FROM mcv_lists_bool | | | defined - public | mcv_lists_stats | a, b, d FROM mcv_lists | | | defined - public | stts_1 | a, b FROM stts_t1 | defined | | - public | stts_2 | a, b FROM stts_t1 | defined | defined | - public | stts_3 | a, b FROM stts_t1 | defined | defined | defined - public | stts_4 | b, c FROM stts_t2 | defined | defined | defined - public | stts_hoge | col1, col2, col3 FROM stts_t3 | defined | defined | defined - stts_s1 | stts_foo | col1, col2 FROM stts_t3 | defined | defined | defined - stts_s2 | stts_yama | col1, col3 FROM stts_t3 | | defined | defined - tststats | priv_test_stats | a, b FROM tststats.priv_test_tbl | | | defined + List of extended statistics + Schema | Name | Definition | Ndistinct | Dependencies | MCV +----------+------------------------+----------------------------------------------------------------------------------+-----------+--------------+--------- + public | func_deps_stat | ((a * 2)), ((b || 'X'::text)), ((c + (1)::numeric)) FROM functional_dependencies | | defined | + public | mcv_lists_arrays_stats | a, b, c FROM mcv_lists_arrays | | | defined + public | mcv_lists_bool_stats | a, b, c FROM mcv_lists_bool | | | defined + public | mcv_lists_stats | a, b, d FROM mcv_lists | | | defined + public | stts_1 | a, b FROM stts_t1 | defined | | + public | stts_2 | a, b FROM stts_t1 | defined | defined | + public | stts_3 | a, b FROM stts_t1 | defined | defined | defined + public | stts_4 | b, c FROM stts_t2 | defined | defined | defined + public | stts_hoge | col1, col2, col3 FROM stts_t3 | defined | defined | defined + stts_s1 | stts_foo | col1, col2 FROM stts_t3 | defined | defined | defined + stts_s2 | stts_yama | col1, col3 FROM stts_t3 | | defined | defined + tststats | priv_test_stats | a, b FROM tststats.priv_test_tbl | | | defined (12 rows) reset role; |