summaryrefslogtreecommitdiff
path: root/src/test/regress
diff options
context:
space:
mode:
authorTomas Vondra <tomas.vondra@postgresql.org>2021-03-26 13:35:29 +0100
committerTomas Vondra <tomas.vondra@postgresql.org>2021-03-26 13:35:32 +0100
commit77b88cd1bb9041a735f24072150cacfa06c699a3 (patch)
treebe9ca84d673e3aa17e0e75ec579be414ae7eac18 /src/test/regress
parenta681e3c107aa97eb554f118935c4d2278892c3dd (diff)
downloadpostgresql-77b88cd1bb9041a735f24072150cacfa06c699a3.tar.gz
BRIN bloom indexes
Adds a BRIN opclass using a Bloom filter to summarize the range. Indexes using the new opclasses allow only equality queries (similar to hash indexes), but that works fine for data like UUID, MAC addresses etc. for which range queries are not very common. This also means the indexes work for data that is not well correlated to physical location within the table, or perhaps even entirely random (which is a common issue with existing BRIN minmax opclasses). It's possible to specify opclass parameters with the usual Bloom filter parameters, i.e. the desired false-positive rate and the expected number of distinct values per page range. CREATE TABLE t (a int); CREATE INDEX ON t USING brin (a int4_bloom_ops(false_positive_rate = 0.05, n_distinct_per_range = 100)); The opclasses do not operate on the indexed values directly, but compute a 32-bit hash first, and the Bloom filter is built on the hash value. Collisions should not be a huge issue though, as the number of distinct values in a page ranges is usually fairly small. Bump catversion, due to various catalog changes. Author: Tomas Vondra <tomas.vondra@postgresql.org> Reviewed-by: Alvaro Herrera <alvherre@alvh.no-ip.org> Reviewed-by: Alexander Korotkov <aekorotkov@gmail.com> Reviewed-by: Sokolov Yura <y.sokolov@postgrespro.ru> Reviewed-by: Nico Williams <nico@cryptonector.com> Reviewed-by: John Naylor <john.naylor@enterprisedb.com> Discussion: https://postgr.es/m/c1138ead-7668-f0e1-0638-c3be3237e812@2ndquadrant.com Discussion: https://postgr.es/m/5d78b774-7e9c-c94e-12cf-fef51cc89b1a%402ndquadrant.com
Diffstat (limited to 'src/test/regress')
-rw-r--r--src/test/regress/expected/brin_bloom.out428
-rw-r--r--src/test/regress/expected/opr_sanity.out3
-rw-r--r--src/test/regress/expected/psql.out3
-rw-r--r--src/test/regress/expected/type_sanity.out7
-rw-r--r--src/test/regress/parallel_schedule5
-rw-r--r--src/test/regress/serial_schedule1
-rw-r--r--src/test/regress/sql/brin_bloom.sql376
7 files changed, 818 insertions, 5 deletions
diff --git a/src/test/regress/expected/brin_bloom.out b/src/test/regress/expected/brin_bloom.out
new file mode 100644
index 0000000000..32c56a996a
--- /dev/null
+++ b/src/test/regress/expected/brin_bloom.out
@@ -0,0 +1,428 @@
+CREATE TABLE brintest_bloom (byteacol bytea,
+ charcol "char",
+ namecol name,
+ int8col bigint,
+ int2col smallint,
+ int4col integer,
+ textcol text,
+ oidcol oid,
+ float4col real,
+ float8col double precision,
+ macaddrcol macaddr,
+ inetcol inet,
+ cidrcol cidr,
+ bpcharcol character,
+ datecol date,
+ timecol time without time zone,
+ timestampcol timestamp without time zone,
+ timestamptzcol timestamp with time zone,
+ intervalcol interval,
+ timetzcol time with time zone,
+ numericcol numeric,
+ uuidcol uuid,
+ lsncol pg_lsn
+) WITH (fillfactor=10);
+INSERT INTO brintest_bloom SELECT
+ repeat(stringu1, 8)::bytea,
+ substr(stringu1, 1, 1)::"char",
+ stringu1::name, 142857 * tenthous,
+ thousand,
+ twothousand,
+ repeat(stringu1, 8),
+ unique1::oid,
+ (four + 1.0)/(hundred+1),
+ odd::float8 / (tenthous + 1),
+ format('%s:00:%s:00:%s:00', to_hex(odd), to_hex(even), to_hex(hundred))::macaddr,
+ inet '10.2.3.4/24' + tenthous,
+ cidr '10.2.3/24' + tenthous,
+ substr(stringu1, 1, 1)::bpchar,
+ date '1995-08-15' + tenthous,
+ time '01:20:30' + thousand * interval '18.5 second',
+ timestamp '1942-07-23 03:05:09' + tenthous * interval '36.38 hours',
+ timestamptz '1972-10-10 03:00' + thousand * interval '1 hour',
+ justify_days(justify_hours(tenthous * interval '12 minutes')),
+ timetz '01:30:20+02' + hundred * interval '15 seconds',
+ tenthous::numeric(36,30) * fivethous * even / (hundred + 1),
+ format('%s%s-%s-%s-%s-%s%s%s', to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'))::uuid,
+ format('%s/%s%s', odd, even, tenthous)::pg_lsn
+FROM tenk1 ORDER BY unique2 LIMIT 100;
+-- throw in some NULL's and different values
+INSERT INTO brintest_bloom (inetcol, cidrcol) SELECT
+ inet 'fe80::6e40:8ff:fea9:8c46' + tenthous,
+ cidr 'fe80::6e40:8ff:fea9:8c46' + tenthous
+FROM tenk1 ORDER BY thousand, tenthous LIMIT 25;
+-- test bloom specific index options
+-- ndistinct must be >= -1.0
+CREATE INDEX brinidx_bloom ON brintest_bloom USING brin (
+ byteacol bytea_bloom_ops(n_distinct_per_range = -1.1)
+);
+ERROR: value -1.1 out of bounds for option "n_distinct_per_range"
+DETAIL: Valid values are between "-1.000000" and "2147483647.000000".
+-- false_positive_rate must be between 0.0001 and 0.25
+CREATE INDEX brinidx_bloom ON brintest_bloom USING brin (
+ byteacol bytea_bloom_ops(false_positive_rate = 0.00009)
+);
+ERROR: value 0.00009 out of bounds for option "false_positive_rate"
+DETAIL: Valid values are between "0.000100" and "0.250000".
+CREATE INDEX brinidx_bloom ON brintest_bloom USING brin (
+ byteacol bytea_bloom_ops(false_positive_rate = 0.26)
+);
+ERROR: value 0.26 out of bounds for option "false_positive_rate"
+DETAIL: Valid values are between "0.000100" and "0.250000".
+CREATE INDEX brinidx_bloom ON brintest_bloom USING brin (
+ byteacol bytea_bloom_ops,
+ charcol char_bloom_ops,
+ namecol name_bloom_ops,
+ int8col int8_bloom_ops,
+ int2col int2_bloom_ops,
+ int4col int4_bloom_ops,
+ textcol text_bloom_ops,
+ oidcol oid_bloom_ops,
+ float4col float4_bloom_ops,
+ float8col float8_bloom_ops,
+ macaddrcol macaddr_bloom_ops,
+ inetcol inet_bloom_ops,
+ cidrcol inet_bloom_ops,
+ bpcharcol bpchar_bloom_ops,
+ datecol date_bloom_ops,
+ timecol time_bloom_ops,
+ timestampcol timestamp_bloom_ops,
+ timestamptzcol timestamptz_bloom_ops,
+ intervalcol interval_bloom_ops,
+ timetzcol timetz_bloom_ops,
+ numericcol numeric_bloom_ops,
+ uuidcol uuid_bloom_ops,
+ lsncol pg_lsn_bloom_ops
+) with (pages_per_range = 1);
+CREATE TABLE brinopers_bloom (colname name, typ text,
+ op text[], value text[], matches int[],
+ check (cardinality(op) = cardinality(value)),
+ check (cardinality(op) = cardinality(matches)));
+INSERT INTO brinopers_bloom VALUES
+ ('byteacol', 'bytea',
+ '{=}',
+ '{BNAAAABNAAAABNAAAABNAAAABNAAAABNAAAABNAAAABNAAAA}',
+ '{1}'),
+ ('charcol', '"char"',
+ '{=}',
+ '{M}',
+ '{6}'),
+ ('namecol', 'name',
+ '{=}',
+ '{MAAAAA}',
+ '{2}'),
+ ('int2col', 'int2',
+ '{=}',
+ '{800}',
+ '{1}'),
+ ('int4col', 'int4',
+ '{=}',
+ '{800}',
+ '{1}'),
+ ('int8col', 'int8',
+ '{=}',
+ '{1257141600}',
+ '{1}'),
+ ('textcol', 'text',
+ '{=}',
+ '{BNAAAABNAAAABNAAAABNAAAABNAAAABNAAAABNAAAABNAAAA}',
+ '{1}'),
+ ('oidcol', 'oid',
+ '{=}',
+ '{8800}',
+ '{1}'),
+ ('float4col', 'float4',
+ '{=}',
+ '{1}',
+ '{4}'),
+ ('float8col', 'float8',
+ '{=}',
+ '{0}',
+ '{1}'),
+ ('macaddrcol', 'macaddr',
+ '{=}',
+ '{2c:00:2d:00:16:00}',
+ '{2}'),
+ ('inetcol', 'inet',
+ '{=}',
+ '{10.2.14.231/24}',
+ '{1}'),
+ ('inetcol', 'cidr',
+ '{=}',
+ '{fe80::6e40:8ff:fea9:8c46}',
+ '{1}'),
+ ('cidrcol', 'inet',
+ '{=}',
+ '{10.2.14/24}',
+ '{2}'),
+ ('cidrcol', 'inet',
+ '{=}',
+ '{fe80::6e40:8ff:fea9:8c46}',
+ '{1}'),
+ ('cidrcol', 'cidr',
+ '{=}',
+ '{10.2.14/24}',
+ '{2}'),
+ ('cidrcol', 'cidr',
+ '{=}',
+ '{fe80::6e40:8ff:fea9:8c46}',
+ '{1}'),
+ ('bpcharcol', 'bpchar',
+ '{=}',
+ '{W}',
+ '{6}'),
+ ('datecol', 'date',
+ '{=}',
+ '{2009-12-01}',
+ '{1}'),
+ ('timecol', 'time',
+ '{=}',
+ '{02:28:57}',
+ '{1}'),
+ ('timestampcol', 'timestamp',
+ '{=}',
+ '{1964-03-24 19:26:45}',
+ '{1}'),
+ ('timestamptzcol', 'timestamptz',
+ '{=}',
+ '{1972-10-19 09:00:00-07}',
+ '{1}'),
+ ('intervalcol', 'interval',
+ '{=}',
+ '{1 mons 13 days 12:24}',
+ '{1}'),
+ ('timetzcol', 'timetz',
+ '{=}',
+ '{01:35:50+02}',
+ '{2}'),
+ ('numericcol', 'numeric',
+ '{=}',
+ '{2268164.347826086956521739130434782609}',
+ '{1}'),
+ ('uuidcol', 'uuid',
+ '{=}',
+ '{52225222-5222-5222-5222-522252225222}',
+ '{1}'),
+ ('lsncol', 'pg_lsn',
+ '{=, IS, IS NOT}',
+ '{44/455222, NULL, NULL}',
+ '{1, 25, 100}');
+DO $x$
+DECLARE
+ r record;
+ r2 record;
+ cond text;
+ idx_ctids tid[];
+ ss_ctids tid[];
+ count int;
+ plan_ok bool;
+ plan_line text;
+BEGIN
+ FOR r IN SELECT colname, oper, typ, value[ordinality], matches[ordinality] FROM brinopers_bloom, unnest(op) WITH ORDINALITY AS oper LOOP
+
+ -- prepare the condition
+ IF r.value IS NULL THEN
+ cond := format('%I %s %L', r.colname, r.oper, r.value);
+ ELSE
+ cond := format('%I %s %L::%s', r.colname, r.oper, r.value, r.typ);
+ END IF;
+
+ -- run the query using the brin index
+ SET enable_seqscan = 0;
+ SET enable_bitmapscan = 1;
+
+ plan_ok := false;
+ FOR plan_line IN EXECUTE format($y$EXPLAIN SELECT array_agg(ctid) FROM brintest_bloom WHERE %s $y$, cond) LOOP
+ IF plan_line LIKE '%Bitmap Heap Scan on brintest_bloom%' THEN
+ plan_ok := true;
+ END IF;
+ END LOOP;
+ IF NOT plan_ok THEN
+ RAISE WARNING 'did not get bitmap indexscan plan for %', r;
+ END IF;
+
+ EXECUTE format($y$SELECT array_agg(ctid) FROM brintest_bloom WHERE %s $y$, cond)
+ INTO idx_ctids;
+
+ -- run the query using a seqscan
+ SET enable_seqscan = 1;
+ SET enable_bitmapscan = 0;
+
+ plan_ok := false;
+ FOR plan_line IN EXECUTE format($y$EXPLAIN SELECT array_agg(ctid) FROM brintest_bloom WHERE %s $y$, cond) LOOP
+ IF plan_line LIKE '%Seq Scan on brintest_bloom%' THEN
+ plan_ok := true;
+ END IF;
+ END LOOP;
+ IF NOT plan_ok THEN
+ RAISE WARNING 'did not get seqscan plan for %', r;
+ END IF;
+
+ EXECUTE format($y$SELECT array_agg(ctid) FROM brintest_bloom WHERE %s $y$, cond)
+ INTO ss_ctids;
+
+ -- make sure both return the same results
+ count := array_length(idx_ctids, 1);
+
+ IF NOT (count = array_length(ss_ctids, 1) AND
+ idx_ctids @> ss_ctids AND
+ idx_ctids <@ ss_ctids) THEN
+ -- report the results of each scan to make the differences obvious
+ RAISE WARNING 'something not right in %: count %', r, count;
+ SET enable_seqscan = 1;
+ SET enable_bitmapscan = 0;
+ FOR r2 IN EXECUTE 'SELECT ' || r.colname || ' FROM brintest_bloom WHERE ' || cond LOOP
+ RAISE NOTICE 'seqscan: %', r2;
+ END LOOP;
+
+ SET enable_seqscan = 0;
+ SET enable_bitmapscan = 1;
+ FOR r2 IN EXECUTE 'SELECT ' || r.colname || ' FROM brintest_bloom WHERE ' || cond LOOP
+ RAISE NOTICE 'bitmapscan: %', r2;
+ END LOOP;
+ END IF;
+
+ -- make sure we found expected number of matches
+ IF count != r.matches THEN RAISE WARNING 'unexpected number of results % for %', count, r; END IF;
+ END LOOP;
+END;
+$x$;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+INSERT INTO brintest_bloom SELECT
+ repeat(stringu1, 42)::bytea,
+ substr(stringu1, 1, 1)::"char",
+ stringu1::name, 142857 * tenthous,
+ thousand,
+ twothousand,
+ repeat(stringu1, 42),
+ unique1::oid,
+ (four + 1.0)/(hundred+1),
+ odd::float8 / (tenthous + 1),
+ format('%s:00:%s:00:%s:00', to_hex(odd), to_hex(even), to_hex(hundred))::macaddr,
+ inet '10.2.3.4' + tenthous,
+ cidr '10.2.3/24' + tenthous,
+ substr(stringu1, 1, 1)::bpchar,
+ date '1995-08-15' + tenthous,
+ time '01:20:30' + thousand * interval '18.5 second',
+ timestamp '1942-07-23 03:05:09' + tenthous * interval '36.38 hours',
+ timestamptz '1972-10-10 03:00' + thousand * interval '1 hour',
+ justify_days(justify_hours(tenthous * interval '12 minutes')),
+ timetz '01:30:20' + hundred * interval '15 seconds',
+ tenthous::numeric(36,30) * fivethous * even / (hundred + 1),
+ format('%s%s-%s-%s-%s-%s%s%s', to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'))::uuid,
+ format('%s/%s%s', odd, even, tenthous)::pg_lsn
+FROM tenk1 ORDER BY unique2 LIMIT 5 OFFSET 5;
+SELECT brin_desummarize_range('brinidx_bloom', 0);
+ brin_desummarize_range
+------------------------
+
+(1 row)
+
+VACUUM brintest_bloom; -- force a summarization cycle in brinidx
+UPDATE brintest_bloom SET int8col = int8col * int4col;
+UPDATE brintest_bloom SET textcol = '' WHERE textcol IS NOT NULL;
+-- Tests for brin_summarize_new_values
+SELECT brin_summarize_new_values('brintest_bloom'); -- error, not an index
+ERROR: "brintest_bloom" is not an index
+SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index
+ERROR: "tenk1_unique1" is not a BRIN index
+SELECT brin_summarize_new_values('brinidx_bloom'); -- ok, no change expected
+ brin_summarize_new_values
+---------------------------
+ 0
+(1 row)
+
+-- Tests for brin_desummarize_range
+SELECT brin_desummarize_range('brinidx_bloom', -1); -- error, invalid range
+ERROR: block number out of range: -1
+SELECT brin_desummarize_range('brinidx_bloom', 0);
+ brin_desummarize_range
+------------------------
+
+(1 row)
+
+SELECT brin_desummarize_range('brinidx_bloom', 0);
+ brin_desummarize_range
+------------------------
+
+(1 row)
+
+SELECT brin_desummarize_range('brinidx_bloom', 100000000);
+ brin_desummarize_range
+------------------------
+
+(1 row)
+
+-- Test brin_summarize_range
+CREATE TABLE brin_summarize_bloom (
+ value int
+) WITH (fillfactor=10, autovacuum_enabled=false);
+CREATE INDEX brin_summarize_bloom_idx ON brin_summarize_bloom USING brin (value) WITH (pages_per_range=2);
+-- Fill a few pages
+DO $$
+DECLARE curtid tid;
+BEGIN
+ LOOP
+ INSERT INTO brin_summarize_bloom VALUES (1) RETURNING ctid INTO curtid;
+ EXIT WHEN curtid > tid '(2, 0)';
+ END LOOP;
+END;
+$$;
+-- summarize one range
+SELECT brin_summarize_range('brin_summarize_bloom_idx', 0);
+ brin_summarize_range
+----------------------
+ 0
+(1 row)
+
+-- nothing: already summarized
+SELECT brin_summarize_range('brin_summarize_bloom_idx', 1);
+ brin_summarize_range
+----------------------
+ 0
+(1 row)
+
+-- summarize one range
+SELECT brin_summarize_range('brin_summarize_bloom_idx', 2);
+ brin_summarize_range
+----------------------
+ 1
+(1 row)
+
+-- nothing: page doesn't exist in table
+SELECT brin_summarize_range('brin_summarize_bloom_idx', 4294967295);
+ brin_summarize_range
+----------------------
+ 0
+(1 row)
+
+-- invalid block number values
+SELECT brin_summarize_range('brin_summarize_bloom_idx', -1);
+ERROR: block number out of range: -1
+SELECT brin_summarize_range('brin_summarize_bloom_idx', 4294967296);
+ERROR: block number out of range: 4294967296
+-- test brin cost estimates behave sanely based on correlation of values
+CREATE TABLE brin_test_bloom (a INT, b INT);
+INSERT INTO brin_test_bloom SELECT x/100,x%100 FROM generate_series(1,10000) x(x);
+CREATE INDEX brin_test_bloom_a_idx ON brin_test_bloom USING brin (a) WITH (pages_per_range = 2);
+CREATE INDEX brin_test_bloom_b_idx ON brin_test_bloom USING brin (b) WITH (pages_per_range = 2);
+VACUUM ANALYZE brin_test_bloom;
+-- Ensure brin index is used when columns are perfectly correlated
+EXPLAIN (COSTS OFF) SELECT * FROM brin_test_bloom WHERE a = 1;
+ QUERY PLAN
+--------------------------------------------------
+ Bitmap Heap Scan on brin_test_bloom
+ Recheck Cond: (a = 1)
+ -> Bitmap Index Scan on brin_test_bloom_a_idx
+ Index Cond: (a = 1)
+(4 rows)
+
+-- Ensure brin index is not used when values are not correlated
+EXPLAIN (COSTS OFF) SELECT * FROM brin_test_bloom WHERE b = 1;
+ QUERY PLAN
+-----------------------------
+ Seq Scan on brin_test_bloom
+ Filter: (b = 1)
+(2 rows)
+
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 254ca06d3d..ef4b4444b9 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -2037,6 +2037,7 @@ ORDER BY 1, 2, 3;
2742 | 16 | @@
3580 | 1 | <
3580 | 1 | <<
+ 3580 | 1 | =
3580 | 2 | &<
3580 | 2 | <=
3580 | 3 | &&
@@ -2100,7 +2101,7 @@ ORDER BY 1, 2, 3;
4000 | 28 | ^@
4000 | 29 | <^
4000 | 30 | >^
-(123 rows)
+(124 rows)
-- Check that all opclass search operators have selectivity estimators.
-- This is not absolutely required, but it seems a reasonable thing
diff --git a/src/test/regress/expected/psql.out b/src/test/regress/expected/psql.out
index 7204fdb0b4..a7a5b22d4f 100644
--- a/src/test/regress/expected/psql.out
+++ b/src/test/regress/expected/psql.out
@@ -4993,8 +4993,9 @@ List of access methods
List of operator classes
AM | Input type | Storage type | Operator class | Default?
------+------------+--------------+----------------+----------
+ brin | oid | | oid_bloom_ops | no
brin | oid | | oid_minmax_ops | yes
-(1 row)
+(2 rows)
\dAf spgist
List of operator families
diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out
index 0c74dc96a8..48ce3f7411 100644
--- a/src/test/regress/expected/type_sanity.out
+++ b/src/test/regress/expected/type_sanity.out
@@ -67,13 +67,14 @@ WHERE p1.typtype not in ('p') AND p1.typname NOT LIKE E'\\_%'
WHERE p2.typname = ('_' || p1.typname)::name AND
p2.typelem = p1.oid and p1.typarray = p2.oid)
ORDER BY p1.oid;
- oid | typname
-------+-----------------
+ oid | typname
+------+-----------------------
194 | pg_node_tree
3361 | pg_ndistinct
3402 | pg_dependencies
+ 4600 | pg_brin_bloom_summary
5017 | pg_mcv_list
-(4 rows)
+(5 rows)
-- Make sure typarray points to a "true" array type of our own base
SELECT p1.oid, p1.typname as basetype, p2.typname as arraytype,
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index b3605db88c..8150d7190f 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -78,6 +78,11 @@ test: select_into select_distinct select_distinct_on select_implicit select_havi
test: brin gin gist spgist privileges init_privs security_label collate matview lock replica_identity rowsecurity object_address tablesample groupingsets drop_operator password identity generated join_hash
# ----------
+# Additional BRIN tests
+# ----------
+test: brin_bloom
+
+# ----------
# Another group of parallel tests
# ----------
test: create_table_like alter_generic alter_operator misc async dbsize misc_functions sysviews tsrf tid tidscan tidrangescan collate.icu.utf8 incremental_sort
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index 15ec7548e3..f440f5fa17 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -108,6 +108,7 @@ test: delete
test: namespace
test: prepared_xacts
test: brin
+test: brin_bloom
test: gin
test: gist
test: spgist
diff --git a/src/test/regress/sql/brin_bloom.sql b/src/test/regress/sql/brin_bloom.sql
new file mode 100644
index 0000000000..5d499208e3
--- /dev/null
+++ b/src/test/regress/sql/brin_bloom.sql
@@ -0,0 +1,376 @@
+CREATE TABLE brintest_bloom (byteacol bytea,
+ charcol "char",
+ namecol name,
+ int8col bigint,
+ int2col smallint,
+ int4col integer,
+ textcol text,
+ oidcol oid,
+ float4col real,
+ float8col double precision,
+ macaddrcol macaddr,
+ inetcol inet,
+ cidrcol cidr,
+ bpcharcol character,
+ datecol date,
+ timecol time without time zone,
+ timestampcol timestamp without time zone,
+ timestamptzcol timestamp with time zone,
+ intervalcol interval,
+ timetzcol time with time zone,
+ numericcol numeric,
+ uuidcol uuid,
+ lsncol pg_lsn
+) WITH (fillfactor=10);
+
+INSERT INTO brintest_bloom SELECT
+ repeat(stringu1, 8)::bytea,
+ substr(stringu1, 1, 1)::"char",
+ stringu1::name, 142857 * tenthous,
+ thousand,
+ twothousand,
+ repeat(stringu1, 8),
+ unique1::oid,
+ (four + 1.0)/(hundred+1),
+ odd::float8 / (tenthous + 1),
+ format('%s:00:%s:00:%s:00', to_hex(odd), to_hex(even), to_hex(hundred))::macaddr,
+ inet '10.2.3.4/24' + tenthous,
+ cidr '10.2.3/24' + tenthous,
+ substr(stringu1, 1, 1)::bpchar,
+ date '1995-08-15' + tenthous,
+ time '01:20:30' + thousand * interval '18.5 second',
+ timestamp '1942-07-23 03:05:09' + tenthous * interval '36.38 hours',
+ timestamptz '1972-10-10 03:00' + thousand * interval '1 hour',
+ justify_days(justify_hours(tenthous * interval '12 minutes')),
+ timetz '01:30:20+02' + hundred * interval '15 seconds',
+ tenthous::numeric(36,30) * fivethous * even / (hundred + 1),
+ format('%s%s-%s-%s-%s-%s%s%s', to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'))::uuid,
+ format('%s/%s%s', odd, even, tenthous)::pg_lsn
+FROM tenk1 ORDER BY unique2 LIMIT 100;
+
+-- throw in some NULL's and different values
+INSERT INTO brintest_bloom (inetcol, cidrcol) SELECT
+ inet 'fe80::6e40:8ff:fea9:8c46' + tenthous,
+ cidr 'fe80::6e40:8ff:fea9:8c46' + tenthous
+FROM tenk1 ORDER BY thousand, tenthous LIMIT 25;
+
+-- test bloom specific index options
+-- ndistinct must be >= -1.0
+CREATE INDEX brinidx_bloom ON brintest_bloom USING brin (
+ byteacol bytea_bloom_ops(n_distinct_per_range = -1.1)
+);
+-- false_positive_rate must be between 0.0001 and 0.25
+CREATE INDEX brinidx_bloom ON brintest_bloom USING brin (
+ byteacol bytea_bloom_ops(false_positive_rate = 0.00009)
+);
+CREATE INDEX brinidx_bloom ON brintest_bloom USING brin (
+ byteacol bytea_bloom_ops(false_positive_rate = 0.26)
+);
+
+CREATE INDEX brinidx_bloom ON brintest_bloom USING brin (
+ byteacol bytea_bloom_ops,
+ charcol char_bloom_ops,
+ namecol name_bloom_ops,
+ int8col int8_bloom_ops,
+ int2col int2_bloom_ops,
+ int4col int4_bloom_ops,
+ textcol text_bloom_ops,
+ oidcol oid_bloom_ops,
+ float4col float4_bloom_ops,
+ float8col float8_bloom_ops,
+ macaddrcol macaddr_bloom_ops,
+ inetcol inet_bloom_ops,
+ cidrcol inet_bloom_ops,
+ bpcharcol bpchar_bloom_ops,
+ datecol date_bloom_ops,
+ timecol time_bloom_ops,
+ timestampcol timestamp_bloom_ops,
+ timestamptzcol timestamptz_bloom_ops,
+ intervalcol interval_bloom_ops,
+ timetzcol timetz_bloom_ops,
+ numericcol numeric_bloom_ops,
+ uuidcol uuid_bloom_ops,
+ lsncol pg_lsn_bloom_ops
+) with (pages_per_range = 1);
+
+CREATE TABLE brinopers_bloom (colname name, typ text,
+ op text[], value text[], matches int[],
+ check (cardinality(op) = cardinality(value)),
+ check (cardinality(op) = cardinality(matches)));
+
+INSERT INTO brinopers_bloom VALUES
+ ('byteacol', 'bytea',
+ '{=}',
+ '{BNAAAABNAAAABNAAAABNAAAABNAAAABNAAAABNAAAABNAAAA}',
+ '{1}'),
+ ('charcol', '"char"',
+ '{=}',
+ '{M}',
+ '{6}'),
+ ('namecol', 'name',
+ '{=}',
+ '{MAAAAA}',
+ '{2}'),
+ ('int2col', 'int2',
+ '{=}',
+ '{800}',
+ '{1}'),
+ ('int4col', 'int4',
+ '{=}',
+ '{800}',
+ '{1}'),
+ ('int8col', 'int8',
+ '{=}',
+ '{1257141600}',
+ '{1}'),
+ ('textcol', 'text',
+ '{=}',
+ '{BNAAAABNAAAABNAAAABNAAAABNAAAABNAAAABNAAAABNAAAA}',
+ '{1}'),
+ ('oidcol', 'oid',
+ '{=}',
+ '{8800}',
+ '{1}'),
+ ('float4col', 'float4',
+ '{=}',
+ '{1}',
+ '{4}'),
+ ('float8col', 'float8',
+ '{=}',
+ '{0}',
+ '{1}'),
+ ('macaddrcol', 'macaddr',
+ '{=}',
+ '{2c:00:2d:00:16:00}',
+ '{2}'),
+ ('inetcol', 'inet',
+ '{=}',
+ '{10.2.14.231/24}',
+ '{1}'),
+ ('inetcol', 'cidr',
+ '{=}',
+ '{fe80::6e40:8ff:fea9:8c46}',
+ '{1}'),
+ ('cidrcol', 'inet',
+ '{=}',
+ '{10.2.14/24}',
+ '{2}'),
+ ('cidrcol', 'inet',
+ '{=}',
+ '{fe80::6e40:8ff:fea9:8c46}',
+ '{1}'),
+ ('cidrcol', 'cidr',
+ '{=}',
+ '{10.2.14/24}',
+ '{2}'),
+ ('cidrcol', 'cidr',
+ '{=}',
+ '{fe80::6e40:8ff:fea9:8c46}',
+ '{1}'),
+ ('bpcharcol', 'bpchar',
+ '{=}',
+ '{W}',
+ '{6}'),
+ ('datecol', 'date',
+ '{=}',
+ '{2009-12-01}',
+ '{1}'),
+ ('timecol', 'time',
+ '{=}',
+ '{02:28:57}',
+ '{1}'),
+ ('timestampcol', 'timestamp',
+ '{=}',
+ '{1964-03-24 19:26:45}',
+ '{1}'),
+ ('timestamptzcol', 'timestamptz',
+ '{=}',
+ '{1972-10-19 09:00:00-07}',
+ '{1}'),
+ ('intervalcol', 'interval',
+ '{=}',
+ '{1 mons 13 days 12:24}',
+ '{1}'),
+ ('timetzcol', 'timetz',
+ '{=}',
+ '{01:35:50+02}',
+ '{2}'),
+ ('numericcol', 'numeric',
+ '{=}',
+ '{2268164.347826086956521739130434782609}',
+ '{1}'),
+ ('uuidcol', 'uuid',
+ '{=}',
+ '{52225222-5222-5222-5222-522252225222}',
+ '{1}'),
+ ('lsncol', 'pg_lsn',
+ '{=, IS, IS NOT}',
+ '{44/455222, NULL, NULL}',
+ '{1, 25, 100}');
+
+DO $x$
+DECLARE
+ r record;
+ r2 record;
+ cond text;
+ idx_ctids tid[];
+ ss_ctids tid[];
+ count int;
+ plan_ok bool;
+ plan_line text;
+BEGIN
+ FOR r IN SELECT colname, oper, typ, value[ordinality], matches[ordinality] FROM brinopers_bloom, unnest(op) WITH ORDINALITY AS oper LOOP
+
+ -- prepare the condition
+ IF r.value IS NULL THEN
+ cond := format('%I %s %L', r.colname, r.oper, r.value);
+ ELSE
+ cond := format('%I %s %L::%s', r.colname, r.oper, r.value, r.typ);
+ END IF;
+
+ -- run the query using the brin index
+ SET enable_seqscan = 0;
+ SET enable_bitmapscan = 1;
+
+ plan_ok := false;
+ FOR plan_line IN EXECUTE format($y$EXPLAIN SELECT array_agg(ctid) FROM brintest_bloom WHERE %s $y$, cond) LOOP
+ IF plan_line LIKE '%Bitmap Heap Scan on brintest_bloom%' THEN
+ plan_ok := true;
+ END IF;
+ END LOOP;
+ IF NOT plan_ok THEN
+ RAISE WARNING 'did not get bitmap indexscan plan for %', r;
+ END IF;
+
+ EXECUTE format($y$SELECT array_agg(ctid) FROM brintest_bloom WHERE %s $y$, cond)
+ INTO idx_ctids;
+
+ -- run the query using a seqscan
+ SET enable_seqscan = 1;
+ SET enable_bitmapscan = 0;
+
+ plan_ok := false;
+ FOR plan_line IN EXECUTE format($y$EXPLAIN SELECT array_agg(ctid) FROM brintest_bloom WHERE %s $y$, cond) LOOP
+ IF plan_line LIKE '%Seq Scan on brintest_bloom%' THEN
+ plan_ok := true;
+ END IF;
+ END LOOP;
+ IF NOT plan_ok THEN
+ RAISE WARNING 'did not get seqscan plan for %', r;
+ END IF;
+
+ EXECUTE format($y$SELECT array_agg(ctid) FROM brintest_bloom WHERE %s $y$, cond)
+ INTO ss_ctids;
+
+ -- make sure both return the same results
+ count := array_length(idx_ctids, 1);
+
+ IF NOT (count = array_length(ss_ctids, 1) AND
+ idx_ctids @> ss_ctids AND
+ idx_ctids <@ ss_ctids) THEN
+ -- report the results of each scan to make the differences obvious
+ RAISE WARNING 'something not right in %: count %', r, count;
+ SET enable_seqscan = 1;
+ SET enable_bitmapscan = 0;
+ FOR r2 IN EXECUTE 'SELECT ' || r.colname || ' FROM brintest_bloom WHERE ' || cond LOOP
+ RAISE NOTICE 'seqscan: %', r2;
+ END LOOP;
+
+ SET enable_seqscan = 0;
+ SET enable_bitmapscan = 1;
+ FOR r2 IN EXECUTE 'SELECT ' || r.colname || ' FROM brintest_bloom WHERE ' || cond LOOP
+ RAISE NOTICE 'bitmapscan: %', r2;
+ END LOOP;
+ END IF;
+
+ -- make sure we found expected number of matches
+ IF count != r.matches THEN RAISE WARNING 'unexpected number of results % for %', count, r; END IF;
+ END LOOP;
+END;
+$x$;
+
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+
+INSERT INTO brintest_bloom SELECT
+ repeat(stringu1, 42)::bytea,
+ substr(stringu1, 1, 1)::"char",
+ stringu1::name, 142857 * tenthous,
+ thousand,
+ twothousand,
+ repeat(stringu1, 42),
+ unique1::oid,
+ (four + 1.0)/(hundred+1),
+ odd::float8 / (tenthous + 1),
+ format('%s:00:%s:00:%s:00', to_hex(odd), to_hex(even), to_hex(hundred))::macaddr,
+ inet '10.2.3.4' + tenthous,
+ cidr '10.2.3/24' + tenthous,
+ substr(stringu1, 1, 1)::bpchar,
+ date '1995-08-15' + tenthous,
+ time '01:20:30' + thousand * interval '18.5 second',
+ timestamp '1942-07-23 03:05:09' + tenthous * interval '36.38 hours',
+ timestamptz '1972-10-10 03:00' + thousand * interval '1 hour',
+ justify_days(justify_hours(tenthous * interval '12 minutes')),
+ timetz '01:30:20' + hundred * interval '15 seconds',
+ tenthous::numeric(36,30) * fivethous * even / (hundred + 1),
+ format('%s%s-%s-%s-%s-%s%s%s', to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'))::uuid,
+ format('%s/%s%s', odd, even, tenthous)::pg_lsn
+FROM tenk1 ORDER BY unique2 LIMIT 5 OFFSET 5;
+
+SELECT brin_desummarize_range('brinidx_bloom', 0);
+VACUUM brintest_bloom; -- force a summarization cycle in brinidx
+
+UPDATE brintest_bloom SET int8col = int8col * int4col;
+UPDATE brintest_bloom SET textcol = '' WHERE textcol IS NOT NULL;
+
+-- Tests for brin_summarize_new_values
+SELECT brin_summarize_new_values('brintest_bloom'); -- error, not an index
+SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index
+SELECT brin_summarize_new_values('brinidx_bloom'); -- ok, no change expected
+
+-- Tests for brin_desummarize_range
+SELECT brin_desummarize_range('brinidx_bloom', -1); -- error, invalid range
+SELECT brin_desummarize_range('brinidx_bloom', 0);
+SELECT brin_desummarize_range('brinidx_bloom', 0);
+SELECT brin_desummarize_range('brinidx_bloom', 100000000);
+
+-- Test brin_summarize_range
+CREATE TABLE brin_summarize_bloom (
+ value int
+) WITH (fillfactor=10, autovacuum_enabled=false);
+CREATE INDEX brin_summarize_bloom_idx ON brin_summarize_bloom USING brin (value) WITH (pages_per_range=2);
+-- Fill a few pages
+DO $$
+DECLARE curtid tid;
+BEGIN
+ LOOP
+ INSERT INTO brin_summarize_bloom VALUES (1) RETURNING ctid INTO curtid;
+ EXIT WHEN curtid > tid '(2, 0)';
+ END LOOP;
+END;
+$$;
+
+-- summarize one range
+SELECT brin_summarize_range('brin_summarize_bloom_idx', 0);
+-- nothing: already summarized
+SELECT brin_summarize_range('brin_summarize_bloom_idx', 1);
+-- summarize one range
+SELECT brin_summarize_range('brin_summarize_bloom_idx', 2);
+-- nothing: page doesn't exist in table
+SELECT brin_summarize_range('brin_summarize_bloom_idx', 4294967295);
+-- invalid block number values
+SELECT brin_summarize_range('brin_summarize_bloom_idx', -1);
+SELECT brin_summarize_range('brin_summarize_bloom_idx', 4294967296);
+
+
+-- test brin cost estimates behave sanely based on correlation of values
+CREATE TABLE brin_test_bloom (a INT, b INT);
+INSERT INTO brin_test_bloom SELECT x/100,x%100 FROM generate_series(1,10000) x(x);
+CREATE INDEX brin_test_bloom_a_idx ON brin_test_bloom USING brin (a) WITH (pages_per_range = 2);
+CREATE INDEX brin_test_bloom_b_idx ON brin_test_bloom USING brin (b) WITH (pages_per_range = 2);
+VACUUM ANALYZE brin_test_bloom;
+
+-- Ensure brin index is used when columns are perfectly correlated
+EXPLAIN (COSTS OFF) SELECT * FROM brin_test_bloom WHERE a = 1;
+-- Ensure brin index is not used when values are not correlated
+EXPLAIN (COSTS OFF) SELECT * FROM brin_test_bloom WHERE b = 1;