summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAmit Kapila <akapila@postgresql.org>2021-03-18 07:25:27 +0530
committerAmit Kapila <akapila@postgresql.org>2021-03-18 07:25:27 +0530
commitc8f78b616167bf8e24bc5dc69112c37755ed3058 (patch)
tree71ba14ed3e7b3056581ef777f1675fba846ff64f
parent5f79580ad69f6e696365bdc63bc265f45bd77211 (diff)
downloadpostgresql-c8f78b616167bf8e24bc5dc69112c37755ed3058.tar.gz
Add a new GUC and a reloption to enable inserts in parallel-mode.
Commit 05c8482f7f added the implementation of parallel SELECT for "INSERT INTO ... SELECT ..." which may incur non-negligible overhead in the additional parallel-safety checks that it performs, even when, in the end, those checks determine that parallelism can't be used. This is normally only ever a problem in the case of when the target table has a large number of partitions. A new GUC option "enable_parallel_insert" is added, to allow insert in parallel-mode. The default is on. In addition to the GUC option, the user may want a mechanism to allow inserts in parallel-mode with finer granularity at table level. The new table option "parallel_insert_enabled" allows this. The default is true. Author: "Hou, Zhijie" Reviewed-by: Greg Nancarrow, Amit Langote, Takayuki Tsunakawa, Amit Kapila Discussion: https://postgr.es/m/CAA4eK1K-cW7svLC2D7DHoGHxdAdg3P37BLgebqBOC2ZLc9a6QQ%40mail.gmail.com Discussion: https://postgr.es/m/CAJcOf-cXnB5cnMKqWEp2E2z7Mvcd04iLVmV=qpFJrR3AcrTS3g@mail.gmail.com
-rw-r--r--doc/src/sgml/config.sgml23
-rw-r--r--doc/src/sgml/ref/alter_table.sgml3
-rw-r--r--doc/src/sgml/ref/create_table.sgml26
-rw-r--r--src/backend/access/common/reloptions.c25
-rw-r--r--src/backend/optimizer/path/costsize.c2
-rw-r--r--src/backend/optimizer/util/clauses.c34
-rw-r--r--src/backend/utils/misc/guc.c10
-rw-r--r--src/backend/utils/misc/postgresql.conf.sample1
-rw-r--r--src/bin/psql/tab-complete.c1
-rw-r--r--src/include/optimizer/cost.h1
-rw-r--r--src/include/utils/rel.h25
-rw-r--r--src/test/regress/expected/insert_parallel.out56
-rw-r--r--src/test/regress/expected/sysviews.out3
-rw-r--r--src/test/regress/sql/insert_parallel.sql44
-rw-r--r--src/tools/pgindent/typedefs.list1
15 files changed, 240 insertions, 15 deletions
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 5e551b9f6d..8603cf3f94 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -5072,6 +5072,29 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
</listitem>
</varlistentry>
+ <varlistentry id="guc-enable-parallel-insert" xreflabel="enable_parallel_insert">
+ <term><varname>enable_parallel_insert</varname> (<type>boolean</type>)
+ <indexterm>
+ <primary><varname>enable_parallel_insert</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Enables or disables the query planner's use of parallel plans for
+ <command>INSERT</command> commands. The default is <literal>on</literal>.
+ When enabled, the planner performs additional parallel-safety checks
+ on the target table's attributes and indexes, in order to determine
+ if it's safe to use a parallel plan for <command>INSERT</command>. In
+ cases such as when the target table has a large number of partitions,
+ and particularly also when that table uses something parallel-unsafe
+ that prevents parallelism, the overhead of these checks may become
+ prohibitively high. To address this potential overhead in these cases,
+ this option can be used to disable the use of parallel plans for
+ <command>INSERT</command>.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
</sect2>
<sect2 id="runtime-config-query-constants">
diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml
index c25ef5abd6..f82dce4a59 100644
--- a/doc/src/sgml/ref/alter_table.sgml
+++ b/doc/src/sgml/ref/alter_table.sgml
@@ -722,7 +722,8 @@ WITH ( MODULUS <replaceable class="parameter">numeric_literal</replaceable>, REM
<para>
<literal>SHARE UPDATE EXCLUSIVE</literal> lock will be taken for
fillfactor, toast and autovacuum storage parameters, as well as the
- planner parameter <varname>parallel_workers</varname>.
+ planner parameter <varname>parallel_workers</varname> and
+ <varname>parallel_insert_enabled</varname>.
</para>
</listitem>
</varlistentry>
diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml
index 71703da85a..ff1b642722 100644
--- a/doc/src/sgml/ref/create_table.sgml
+++ b/doc/src/sgml/ref/create_table.sgml
@@ -1409,6 +1409,32 @@ WITH ( MODULUS <replaceable class="parameter">numeric_literal</replaceable>, REM
</listitem>
</varlistentry>
+ <varlistentry id="reloption-parallel-insert-enabled" xreflabel="parallel_insert_enabled">
+ <term><literal>parallel_insert_enabled</literal> (<type>boolean</type>)
+ <indexterm>
+ <primary><varname>parallel_insert_enabled</varname> storage parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Enables or disables the query planner's use of parallel insert for
+ this table. When enabled (and provided that
+ <xref linkend="guc-enable-parallel-insert"/> is also <literal>true</literal>),
+ the planner performs additional parallel-safety checks on the table's
+ attributes and indexes, in order to determine if it's safe to use a
+ parallel plan for <command>INSERT</command>. The default is
+ <literal>true</literal>. In cases such as when the table has a large
+ number of partitions, and particularly also when that table uses a
+ parallel-unsafe feature that prevents parallelism, the overhead of these
+ checks may become prohibitively high. To address this potential overhead
+ in these cases, this option can be used to disable the use of parallel
+ insert for this table. Note that if the target table of the parallel
+ insert is partitioned, the <literal>parallel_insert_enabled</literal>
+ option values of the partitions are ignored.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry id="reloption-autovacuum-enabled" xreflabel="autovacuum_enabled">
<term><literal>autovacuum_enabled</literal>, <literal>toast.autovacuum_enabled</literal> (<type>boolean</type>)
<indexterm>
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index d897bbec2b..5a0ae99750 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -168,6 +168,15 @@ static relopt_bool boolRelOpts[] =
},
true
},
+ {
+ {
+ "parallel_insert_enabled",
+ "Enables \"parallel insert\" feature for this table",
+ RELOPT_KIND_HEAP | RELOPT_KIND_PARTITIONED,
+ ShareUpdateExclusiveLock
+ },
+ true
+ },
/* list terminator */
{{NULL}}
};
@@ -1859,7 +1868,9 @@ default_reloptions(Datum reloptions, bool validate, relopt_kind kind)
{"vacuum_index_cleanup", RELOPT_TYPE_BOOL,
offsetof(StdRdOptions, vacuum_index_cleanup)},
{"vacuum_truncate", RELOPT_TYPE_BOOL,
- offsetof(StdRdOptions, vacuum_truncate)}
+ offsetof(StdRdOptions, vacuum_truncate)},
+ {"parallel_insert_enabled", RELOPT_TYPE_BOOL,
+ offsetof(StdRdOptions, parallel_insert_enabled)}
};
return (bytea *) build_reloptions(reloptions, validate, kind,
@@ -1961,13 +1972,15 @@ build_local_reloptions(local_relopts *relopts, Datum options, bool validate)
bytea *
partitioned_table_reloptions(Datum reloptions, bool validate)
{
- /*
- * There are no options for partitioned tables yet, but this is able to do
- * some validation.
- */
+ static const relopt_parse_elt tab[] = {
+ {"parallel_insert_enabled", RELOPT_TYPE_BOOL,
+ offsetof(PartitionedTableRdOptions, parallel_insert_enabled)}
+ };
+
return (bytea *) build_reloptions(reloptions, validate,
RELOPT_KIND_PARTITIONED,
- 0, NULL, 0);
+ sizeof(PartitionedTableRdOptions),
+ tab, lengthof(tab));
}
/*
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index a25b674a19..c81e2cf244 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -129,6 +129,8 @@ Cost disable_cost = 1.0e10;
int max_parallel_workers_per_gather = 2;
+bool enable_parallel_insert = true;
+
bool enable_seqscan = true;
bool enable_indexscan = true;
bool enable_indexonlyscan = true;
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index ade66a541c..c6be4f87c2 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -1265,8 +1265,10 @@ target_rel_chk_constr_max_parallel_hazard(Relation rel,
*
* It's not possible in the following cases:
*
- * 1) INSERT...ON CONFLICT...DO UPDATE
- * 2) INSERT without SELECT
+ * 1) enable_parallel_insert is off
+ * 2) INSERT...ON CONFLICT...DO UPDATE
+ * 3) INSERT without SELECT
+ * 4) the reloption parallel_insert_enabled is set to off
*
* (Note: we don't do in-depth parallel-safety checks here, we do only the
* cheaper tests that can quickly exclude obvious cases for which
@@ -1277,12 +1279,17 @@ bool
is_parallel_allowed_for_modify(Query *parse)
{
bool hasSubQuery;
+ bool parallel_enabled;
RangeTblEntry *rte;
ListCell *lc;
+ Relation rel;
if (!IsModifySupportedInParallelMode(parse->commandType))
return false;
+ if (!enable_parallel_insert)
+ return false;
+
/*
* UPDATE is not currently supported in parallel-mode, so prohibit
* INSERT...ON CONFLICT...DO UPDATE...
@@ -1313,7 +1320,28 @@ is_parallel_allowed_for_modify(Query *parse)
}
}
- return hasSubQuery;
+ if (!hasSubQuery)
+ return false;
+
+ /*
+ * Check if parallel_insert_enabled is enabled for the target table, if
+ * not, skip the safety checks.
+ *
+ * (Note: if the target table is partitioned, the parallel_insert_enabled
+ * option setting of the partitions are ignored).
+ */
+ rte = rt_fetch(parse->resultRelation, parse->rtable);
+
+ /*
+ * The target table is already locked by the caller (this is done in the
+ * parse/analyze phase), and remains locked until end-of-transaction.
+ */
+ rel = table_open(rte->relid, NoLock);
+
+ parallel_enabled = RelationGetParallelInsert(rel, true);
+ table_close(rel, NoLock);
+
+ return parallel_enabled;
}
/*****************************************************************************
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 855076b1fd..b263e3493b 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1123,6 +1123,16 @@ static struct config_bool ConfigureNamesBool[] =
NULL, NULL, NULL
},
{
+ {"enable_parallel_insert", PGC_USERSET, QUERY_TUNING_METHOD,
+ gettext_noop("Enables the planner's use of parallel plans for INSERT commands."),
+ NULL,
+ GUC_EXPLAIN
+ },
+ &enable_parallel_insert,
+ true,
+ NULL, NULL, NULL
+ },
+ {
/* Not for general use --- used by SET SESSION AUTHORIZATION */
{"is_superuser", PGC_INTERNAL, UNGROUPED,
gettext_noop("Shows whether the current user is a superuser."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index f46c2dd7a8..6647f8fd6e 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -371,6 +371,7 @@
#enable_partitionwise_aggregate = off
#enable_parallel_hash = on
#enable_partition_pruning = on
+#enable_parallel_insert = on
# - Planner Cost Constants -
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index ecdb8d752b..d3fb734f05 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -1118,6 +1118,7 @@ static const char *const table_storage_parameters[] = {
"autovacuum_vacuum_threshold",
"fillfactor",
"log_autovacuum_min_duration",
+ "parallel_insert_enabled",
"parallel_workers",
"toast.autovacuum_enabled",
"toast.autovacuum_freeze_max_age",
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 1be93be098..22e6db96b6 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -47,6 +47,7 @@ typedef enum
/* parameter variables and flags (see also optimizer.h) */
extern PGDLLIMPORT Cost disable_cost;
extern PGDLLIMPORT int max_parallel_workers_per_gather;
+extern PGDLLIMPORT bool enable_parallel_insert;
extern PGDLLIMPORT bool enable_seqscan;
extern PGDLLIMPORT bool enable_indexscan;
extern PGDLLIMPORT bool enable_indexonlyscan;
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 10b63982c0..5375a37dd1 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -306,6 +306,8 @@ typedef struct StdRdOptions
int parallel_workers; /* max number of parallel workers */
bool vacuum_index_cleanup; /* enables index vacuuming and cleanup */
bool vacuum_truncate; /* enables vacuum to truncate a relation */
+ bool parallel_insert_enabled; /* enables planner's use of
+ * parallel insert */
} StdRdOptions;
#define HEAP_MIN_FILLFACTOR 10
@@ -424,6 +426,29 @@ typedef struct ViewOptions
VIEW_OPTION_CHECK_OPTION_CASCADED)
/*
+ * PartitionedTableRdOptions
+ * Contents of rd_options for partitioned tables
+ */
+typedef struct PartitionedTableRdOptions
+{
+ int32 vl_len_; /* varlena header (do not touch directly!) */
+ bool parallel_insert_enabled; /* enables planner's use of
+ * parallel insert */
+} PartitionedTableRdOptions;
+
+/*
+ * RelationGetParallelInsert
+ * Returns the relation's parallel_insert_enabled reloption setting.
+ * Note multiple eval of argument!
+ */
+#define RelationGetParallelInsert(relation, defaultpd) \
+ ((relation)->rd_options ? \
+ (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ? \
+ ((PartitionedTableRdOptions *) (relation)->rd_options)->parallel_insert_enabled : \
+ ((StdRdOptions *) (relation)->rd_options)->parallel_insert_enabled) : \
+ (defaultpd))
+
+/*
* RelationIsValid
* True iff relation descriptor is valid.
*/
diff --git a/src/test/regress/expected/insert_parallel.out b/src/test/regress/expected/insert_parallel.out
index 3599c21eba..164668e319 100644
--- a/src/test/regress/expected/insert_parallel.out
+++ b/src/test/regress/expected/insert_parallel.out
@@ -61,12 +61,45 @@ set max_parallel_workers_per_gather=4;
create table para_insert_p1 (
unique1 int4 PRIMARY KEY,
stringu1 name
-);
+) with (parallel_insert_enabled = off);
create table para_insert_f1 (
unique1 int4 REFERENCES para_insert_p1(unique1),
stringu1 name
);
--
+-- Disable guc option enable_parallel_insert
+--
+set enable_parallel_insert = off;
+-- Test INSERT with underlying query when enable_parallel_insert=off and reloption.parallel_insert_enabled=off.
+-- (should create plan with serial INSERT + SELECT)
+--
+explain(costs off) insert into para_insert_p1 select unique1, stringu1 from tenk1;
+ QUERY PLAN
+--------------------------
+ Insert on para_insert_p1
+ -> Seq Scan on tenk1
+(2 rows)
+
+--
+-- Reset guc option enable_parallel_insert
+--
+reset enable_parallel_insert;
+--
+-- Test INSERT with underlying query when enable_parallel_insert=on and reloption.parallel_insert_enabled=off.
+-- (should create plan with serial INSERT + SELECT)
+--
+explain(costs off) insert into para_insert_p1 select unique1, stringu1 from tenk1;
+ QUERY PLAN
+--------------------------
+ Insert on para_insert_p1
+ -> Seq Scan on tenk1
+(2 rows)
+
+--
+-- Enable reloption parallel_insert_enabled
+--
+alter table para_insert_p1 set (parallel_insert_enabled = on);
+--
-- Test INSERT with underlying query.
-- (should create plan with parallel SELECT, Gather parent node)
--
@@ -362,9 +395,28 @@ explain (costs off) insert into testdef(a,d) select a,a*8 from test_data;
--
-- Test INSERT into partition with underlying query.
--
-create table parttable1 (a int, b name) partition by range (a);
+create table parttable1 (a int, b name) partition by range (a) with (parallel_insert_enabled=off);
create table parttable1_1 partition of parttable1 for values from (0) to (5000);
create table parttable1_2 partition of parttable1 for values from (5000) to (10000);
+--
+-- Test INSERT into partition when reloption.parallel_insert_enabled=off
+-- (should not create a parallel plan)
+--
+explain (costs off) insert into parttable1 select unique1,stringu1 from tenk1;
+ QUERY PLAN
+-------------------------
+ Insert on parttable1
+ -> Seq Scan on tenk1
+(2 rows)
+
+--
+-- Enable reloption parallel_insert_enabled
+--
+alter table parttable1 set (parallel_insert_enabled = on);
+--
+-- Test INSERT into partition when reloption.parallel_insert_enabled=on
+-- (should create a parallel plan)
+--
explain (costs off) insert into parttable1 select unique1,stringu1 from tenk1;
QUERY PLAN
----------------------------------------
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index 6d048e309c..a62bf5dc92 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -107,13 +107,14 @@ select name, setting from pg_settings where name like 'enable%';
enable_nestloop | on
enable_parallel_append | on
enable_parallel_hash | on
+ enable_parallel_insert | on
enable_partition_pruning | on
enable_partitionwise_aggregate | off
enable_partitionwise_join | off
enable_seqscan | on
enable_sort | on
enable_tidscan | on
-(18 rows)
+(19 rows)
-- Test that the pg_timezone_names and pg_timezone_abbrevs views are
-- more-or-less working. We can't test their contents in any great detail
diff --git a/src/test/regress/sql/insert_parallel.sql b/src/test/regress/sql/insert_parallel.sql
index 8eb4e9564e..171d8e5b84 100644
--- a/src/test/regress/sql/insert_parallel.sql
+++ b/src/test/regress/sql/insert_parallel.sql
@@ -77,13 +77,38 @@ set max_parallel_workers_per_gather=4;
create table para_insert_p1 (
unique1 int4 PRIMARY KEY,
stringu1 name
-);
+) with (parallel_insert_enabled = off);
create table para_insert_f1 (
unique1 int4 REFERENCES para_insert_p1(unique1),
stringu1 name
);
+--
+-- Disable guc option enable_parallel_insert
+--
+set enable_parallel_insert = off;
+
+-- Test INSERT with underlying query when enable_parallel_insert=off and reloption.parallel_insert_enabled=off.
+-- (should create plan with serial INSERT + SELECT)
+--
+explain(costs off) insert into para_insert_p1 select unique1, stringu1 from tenk1;
+
+--
+-- Reset guc option enable_parallel_insert
+--
+reset enable_parallel_insert;
+
+--
+-- Test INSERT with underlying query when enable_parallel_insert=on and reloption.parallel_insert_enabled=off.
+-- (should create plan with serial INSERT + SELECT)
+--
+explain(costs off) insert into para_insert_p1 select unique1, stringu1 from tenk1;
+
+--
+-- Enable reloption parallel_insert_enabled
+--
+alter table para_insert_p1 set (parallel_insert_enabled = on);
--
-- Test INSERT with underlying query.
@@ -208,10 +233,25 @@ explain (costs off) insert into testdef(a,d) select a,a*8 from test_data;
--
-- Test INSERT into partition with underlying query.
--
-create table parttable1 (a int, b name) partition by range (a);
+create table parttable1 (a int, b name) partition by range (a) with (parallel_insert_enabled=off);
create table parttable1_1 partition of parttable1 for values from (0) to (5000);
create table parttable1_2 partition of parttable1 for values from (5000) to (10000);
+--
+-- Test INSERT into partition when reloption.parallel_insert_enabled=off
+-- (should not create a parallel plan)
+--
+explain (costs off) insert into parttable1 select unique1,stringu1 from tenk1;
+
+--
+-- Enable reloption parallel_insert_enabled
+--
+alter table parttable1 set (parallel_insert_enabled = on);
+
+--
+-- Test INSERT into partition when reloption.parallel_insert_enabled=on
+-- (should create a parallel plan)
+--
explain (costs off) insert into parttable1 select unique1,stringu1 from tenk1;
insert into parttable1 select unique1,stringu1 from tenk1;
select count(*) from parttable1_1;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 9e6777e9d0..1d1d5d2f0e 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1797,6 +1797,7 @@ PartitionSpec
PartitionTupleRouting
PartitionedRelPruneInfo
PartitionedRelPruningData
+PartitionedTableRdOptions
PartitionwiseAggregateType
PasswordType
Path