summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Eisentraut <peter@eisentraut.org>2023-03-10 13:35:00 +0100
committerPeter Eisentraut <peter@eisentraut.org>2023-03-10 13:35:43 +0100
commit0d21d4b9bc1f9da9dda29e5c4db0c6dd45408aaa (patch)
tree421b155ad9135a9d477832a559e6b20dfb8ba14e
parent6ad5793a491a2e70e5610988a13f31f43c8946ad (diff)
downloadpostgresql-0d21d4b9bc1f9da9dda29e5c4db0c6dd45408aaa.tar.gz
Add standard collation UNICODE
This adds a new predefined collation named UNICODE, which sorts by the default Unicode collation algorithm specifications, per SQL standard. This only works if ICU support is built. Reviewed-by: Jeff Davis <pgsql@j-davis.com> Discussion: https://www.postgresql.org/message-id/flat/1293e382-2093-a2bf-a397-c04e8f83d3c2@enterprisedb.com
-rw-r--r--doc/src/sgml/charset.sgml31
-rw-r--r--src/bin/initdb/initdb.c10
-rw-r--r--src/include/catalog/catversion.h2
-rw-r--r--src/test/regress/expected/collate.icu.utf8.out9
-rw-r--r--src/test/regress/sql/collate.icu.utf8.sql1
5 files changed, 46 insertions, 7 deletions
diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml
index 3032392b80..12fabb7372 100644
--- a/doc/src/sgml/charset.sgml
+++ b/doc/src/sgml/charset.sgml
@@ -659,9 +659,34 @@ SELECT * FROM test1 ORDER BY a || b COLLATE "fr_FR";
</para>
<para>
- Additionally, the SQL standard collation name <literal>ucs_basic</literal>
- is available for encoding <literal>UTF8</literal>. It is equivalent
- to <literal>C</literal> and sorts by Unicode code point.
+ Additionally, two SQL standard collation names are available:
+
+ <variablelist>
+ <varlistentry>
+ <term><literal>unicode</literal></term>
+ <listitem>
+ <para>
+ This collation sorts using the Unicode Collation Algorithm with the
+ Default Unicode Collation Element Table. It is available in all
+ encodings. ICU support is required to use this collation. (This
+ collation has the same behavior as the ICU root locale; see <xref
+ linkend="collation-managing-predefined-icu-und-x-icu"/>.)
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><literal>ucs_basic</literal></term>
+ <listitem>
+ <para>
+ This collation sorts by Unicode code point. It is only available for
+ encoding <literal>UTF8</literal>. (This collation has the same
+ behavior as the libc locale specification <literal>C</literal> in
+ <literal>UTF8</literal> encoding.)
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
</para>
</sect3>
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index bf88cd2439..497f2205f0 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -1493,10 +1493,14 @@ static void
setup_collation(FILE *cmdfd)
{
/*
- * Add an SQL-standard name. We don't want to pin this, so it doesn't go
- * in pg_collation.h. But add it before reading system collations, so
- * that it wins if libc defines a locale named ucs_basic.
+ * Add SQL-standard names. We don't want to pin these, so they don't go
+ * in pg_collation.dat. But add them before reading system collations, so
+ * that they win if libc defines a locale with the same name.
*/
+ PG_CMD_PRINTF("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collisdeterministic, collencoding, colliculocale)"
+ "VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'unicode', 'pg_catalog'::regnamespace, %u, '%c', true, -1, 'und');\n\n",
+ BOOTSTRAP_SUPERUSERID, COLLPROVIDER_ICU);
+
PG_CMD_PRINTF("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collisdeterministic, collencoding, collcollate, collctype)"
"VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'ucs_basic', 'pg_catalog'::regnamespace, %u, '%c', true, %d, 'C', 'C');\n\n",
BOOTSTRAP_SUPERUSERID, COLLPROVIDER_LIBC, PG_UTF8);
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 24d866badf..5d1b957ed4 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -57,6 +57,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 202303081
+#define CATALOG_VERSION_NO 202303101
#endif
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index f53857b46d..04c3aa14d2 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1151,6 +1151,15 @@ SELECT * FROM collate_test2 ORDER BY b COLLATE UCS_BASIC;
2 | äbc
(4 rows)
+SELECT * FROM collate_test2 ORDER BY b COLLATE UNICODE;
+ a | b
+---+-----
+ 1 | abc
+ 4 | ABC
+ 2 | äbc
+ 3 | bbc
+(4 rows)
+
-- test ICU collation customization
-- test the attributes handled by icu_set_collation_attributes()
CREATE COLLATION testcoll_ignore_accents (provider = icu, locale = '@colStrength=primary;colCaseLevel=yes');
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index abf17a0767..4786d65a08 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -447,6 +447,7 @@ drop type textrange_en_us;
-- standard collations
SELECT * FROM collate_test2 ORDER BY b COLLATE UCS_BASIC;
+SELECT * FROM collate_test2 ORDER BY b COLLATE UNICODE;
-- test ICU collation customization