From c8f78b616167bf8e24bc5dc69112c37755ed3058 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Thu, 18 Mar 2021 07:25:27 +0530 Subject: Add a new GUC and a reloption to enable inserts in parallel-mode. Commit 05c8482f7f added the implementation of parallel SELECT for "INSERT INTO ... SELECT ..." which may incur non-negligible overhead in the additional parallel-safety checks that it performs, even when, in the end, those checks determine that parallelism can't be used. This is normally only ever a problem in the case of when the target table has a large number of partitions. A new GUC option "enable_parallel_insert" is added, to allow insert in parallel-mode. The default is on. In addition to the GUC option, the user may want a mechanism to allow inserts in parallel-mode with finer granularity at table level. The new table option "parallel_insert_enabled" allows this. The default is true. Author: "Hou, Zhijie" Reviewed-by: Greg Nancarrow, Amit Langote, Takayuki Tsunakawa, Amit Kapila Discussion: https://postgr.es/m/CAA4eK1K-cW7svLC2D7DHoGHxdAdg3P37BLgebqBOC2ZLc9a6QQ%40mail.gmail.com Discussion: https://postgr.es/m/CAJcOf-cXnB5cnMKqWEp2E2z7Mvcd04iLVmV=qpFJrR3AcrTS3g@mail.gmail.com --- src/test/regress/expected/insert_parallel.out | 56 ++++++++++++++++++++++++++- src/test/regress/expected/sysviews.out | 3 +- src/test/regress/sql/insert_parallel.sql | 44 ++++++++++++++++++++- 3 files changed, 98 insertions(+), 5 deletions(-) (limited to 'src/test/regress') diff --git a/src/test/regress/expected/insert_parallel.out b/src/test/regress/expected/insert_parallel.out index 3599c21eba..164668e319 100644 --- a/src/test/regress/expected/insert_parallel.out +++ b/src/test/regress/expected/insert_parallel.out @@ -61,12 +61,45 @@ set max_parallel_workers_per_gather=4; create table para_insert_p1 ( unique1 int4 PRIMARY KEY, stringu1 name -); +) with (parallel_insert_enabled = off); create table para_insert_f1 ( unique1 int4 REFERENCES para_insert_p1(unique1), stringu1 name ); -- +-- Disable guc option enable_parallel_insert +-- +set enable_parallel_insert = off; +-- Test INSERT with underlying query when enable_parallel_insert=off and reloption.parallel_insert_enabled=off. +-- (should create plan with serial INSERT + SELECT) +-- +explain(costs off) insert into para_insert_p1 select unique1, stringu1 from tenk1; + QUERY PLAN +-------------------------- + Insert on para_insert_p1 + -> Seq Scan on tenk1 +(2 rows) + +-- +-- Reset guc option enable_parallel_insert +-- +reset enable_parallel_insert; +-- +-- Test INSERT with underlying query when enable_parallel_insert=on and reloption.parallel_insert_enabled=off. +-- (should create plan with serial INSERT + SELECT) +-- +explain(costs off) insert into para_insert_p1 select unique1, stringu1 from tenk1; + QUERY PLAN +-------------------------- + Insert on para_insert_p1 + -> Seq Scan on tenk1 +(2 rows) + +-- +-- Enable reloption parallel_insert_enabled +-- +alter table para_insert_p1 set (parallel_insert_enabled = on); +-- -- Test INSERT with underlying query. -- (should create plan with parallel SELECT, Gather parent node) -- @@ -362,9 +395,28 @@ explain (costs off) insert into testdef(a,d) select a,a*8 from test_data; -- -- Test INSERT into partition with underlying query. -- -create table parttable1 (a int, b name) partition by range (a); +create table parttable1 (a int, b name) partition by range (a) with (parallel_insert_enabled=off); create table parttable1_1 partition of parttable1 for values from (0) to (5000); create table parttable1_2 partition of parttable1 for values from (5000) to (10000); +-- +-- Test INSERT into partition when reloption.parallel_insert_enabled=off +-- (should not create a parallel plan) +-- +explain (costs off) insert into parttable1 select unique1,stringu1 from tenk1; + QUERY PLAN +------------------------- + Insert on parttable1 + -> Seq Scan on tenk1 +(2 rows) + +-- +-- Enable reloption parallel_insert_enabled +-- +alter table parttable1 set (parallel_insert_enabled = on); +-- +-- Test INSERT into partition when reloption.parallel_insert_enabled=on +-- (should create a parallel plan) +-- explain (costs off) insert into parttable1 select unique1,stringu1 from tenk1; QUERY PLAN ---------------------------------------- diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out index 6d048e309c..a62bf5dc92 100644 --- a/src/test/regress/expected/sysviews.out +++ b/src/test/regress/expected/sysviews.out @@ -107,13 +107,14 @@ select name, setting from pg_settings where name like 'enable%'; enable_nestloop | on enable_parallel_append | on enable_parallel_hash | on + enable_parallel_insert | on enable_partition_pruning | on enable_partitionwise_aggregate | off enable_partitionwise_join | off enable_seqscan | on enable_sort | on enable_tidscan | on -(18 rows) +(19 rows) -- Test that the pg_timezone_names and pg_timezone_abbrevs views are -- more-or-less working. We can't test their contents in any great detail diff --git a/src/test/regress/sql/insert_parallel.sql b/src/test/regress/sql/insert_parallel.sql index 8eb4e9564e..171d8e5b84 100644 --- a/src/test/regress/sql/insert_parallel.sql +++ b/src/test/regress/sql/insert_parallel.sql @@ -77,13 +77,38 @@ set max_parallel_workers_per_gather=4; create table para_insert_p1 ( unique1 int4 PRIMARY KEY, stringu1 name -); +) with (parallel_insert_enabled = off); create table para_insert_f1 ( unique1 int4 REFERENCES para_insert_p1(unique1), stringu1 name ); +-- +-- Disable guc option enable_parallel_insert +-- +set enable_parallel_insert = off; + +-- Test INSERT with underlying query when enable_parallel_insert=off and reloption.parallel_insert_enabled=off. +-- (should create plan with serial INSERT + SELECT) +-- +explain(costs off) insert into para_insert_p1 select unique1, stringu1 from tenk1; + +-- +-- Reset guc option enable_parallel_insert +-- +reset enable_parallel_insert; + +-- +-- Test INSERT with underlying query when enable_parallel_insert=on and reloption.parallel_insert_enabled=off. +-- (should create plan with serial INSERT + SELECT) +-- +explain(costs off) insert into para_insert_p1 select unique1, stringu1 from tenk1; + +-- +-- Enable reloption parallel_insert_enabled +-- +alter table para_insert_p1 set (parallel_insert_enabled = on); -- -- Test INSERT with underlying query. @@ -208,10 +233,25 @@ explain (costs off) insert into testdef(a,d) select a,a*8 from test_data; -- -- Test INSERT into partition with underlying query. -- -create table parttable1 (a int, b name) partition by range (a); +create table parttable1 (a int, b name) partition by range (a) with (parallel_insert_enabled=off); create table parttable1_1 partition of parttable1 for values from (0) to (5000); create table parttable1_2 partition of parttable1 for values from (5000) to (10000); +-- +-- Test INSERT into partition when reloption.parallel_insert_enabled=off +-- (should not create a parallel plan) +-- +explain (costs off) insert into parttable1 select unique1,stringu1 from tenk1; + +-- +-- Enable reloption parallel_insert_enabled +-- +alter table parttable1 set (parallel_insert_enabled = on); + +-- +-- Test INSERT into partition when reloption.parallel_insert_enabled=on +-- (should create a parallel plan) +-- explain (costs off) insert into parttable1 select unique1,stringu1 from tenk1; insert into parttable1 select unique1,stringu1 from tenk1; select count(*) from parttable1_1; -- cgit v1.2.1