diff options
author | Peter Eisentraut <peter@eisentraut.org> | 2023-03-10 13:35:00 +0100 |
---|---|---|
committer | Peter Eisentraut <peter@eisentraut.org> | 2023-03-10 13:35:43 +0100 |
commit | 0d21d4b9bc1f9da9dda29e5c4db0c6dd45408aaa (patch) | |
tree | 421b155ad9135a9d477832a559e6b20dfb8ba14e | |
parent | 6ad5793a491a2e70e5610988a13f31f43c8946ad (diff) | |
download | postgresql-0d21d4b9bc1f9da9dda29e5c4db0c6dd45408aaa.tar.gz |
Add standard collation UNICODE
This adds a new predefined collation named UNICODE, which sorts by the
default Unicode collation algorithm specifications, per SQL standard.
This only works if ICU support is built.
Reviewed-by: Jeff Davis <pgsql@j-davis.com>
Discussion: https://www.postgresql.org/message-id/flat/1293e382-2093-a2bf-a397-c04e8f83d3c2@enterprisedb.com
-rw-r--r-- | doc/src/sgml/charset.sgml | 31 | ||||
-rw-r--r-- | src/bin/initdb/initdb.c | 10 | ||||
-rw-r--r-- | src/include/catalog/catversion.h | 2 | ||||
-rw-r--r-- | src/test/regress/expected/collate.icu.utf8.out | 9 | ||||
-rw-r--r-- | src/test/regress/sql/collate.icu.utf8.sql | 1 |
5 files changed, 46 insertions, 7 deletions
diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml index 3032392b80..12fabb7372 100644 --- a/doc/src/sgml/charset.sgml +++ b/doc/src/sgml/charset.sgml @@ -659,9 +659,34 @@ SELECT * FROM test1 ORDER BY a || b COLLATE "fr_FR"; </para> <para> - Additionally, the SQL standard collation name <literal>ucs_basic</literal> - is available for encoding <literal>UTF8</literal>. It is equivalent - to <literal>C</literal> and sorts by Unicode code point. + Additionally, two SQL standard collation names are available: + + <variablelist> + <varlistentry> + <term><literal>unicode</literal></term> + <listitem> + <para> + This collation sorts using the Unicode Collation Algorithm with the + Default Unicode Collation Element Table. It is available in all + encodings. ICU support is required to use this collation. (This + collation has the same behavior as the ICU root locale; see <xref + linkend="collation-managing-predefined-icu-und-x-icu"/>.) + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><literal>ucs_basic</literal></term> + <listitem> + <para> + This collation sorts by Unicode code point. It is only available for + encoding <literal>UTF8</literal>. (This collation has the same + behavior as the libc locale specification <literal>C</literal> in + <literal>UTF8</literal> encoding.) + </para> + </listitem> + </varlistentry> + </variablelist> </para> </sect3> diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index bf88cd2439..497f2205f0 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -1493,10 +1493,14 @@ static void setup_collation(FILE *cmdfd) { /* - * Add an SQL-standard name. We don't want to pin this, so it doesn't go - * in pg_collation.h. But add it before reading system collations, so - * that it wins if libc defines a locale named ucs_basic. + * Add SQL-standard names. We don't want to pin these, so they don't go + * in pg_collation.dat. But add them before reading system collations, so + * that they win if libc defines a locale with the same name. */ + PG_CMD_PRINTF("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collisdeterministic, collencoding, colliculocale)" + "VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'unicode', 'pg_catalog'::regnamespace, %u, '%c', true, -1, 'und');\n\n", + BOOTSTRAP_SUPERUSERID, COLLPROVIDER_ICU); + PG_CMD_PRINTF("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collisdeterministic, collencoding, collcollate, collctype)" "VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'ucs_basic', 'pg_catalog'::regnamespace, %u, '%c', true, %d, 'C', 'C');\n\n", BOOTSTRAP_SUPERUSERID, COLLPROVIDER_LIBC, PG_UTF8); diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 24d866badf..5d1b957ed4 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -57,6 +57,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202303081 +#define CATALOG_VERSION_NO 202303101 #endif diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out index f53857b46d..04c3aa14d2 100644 --- a/src/test/regress/expected/collate.icu.utf8.out +++ b/src/test/regress/expected/collate.icu.utf8.out @@ -1151,6 +1151,15 @@ SELECT * FROM collate_test2 ORDER BY b COLLATE UCS_BASIC; 2 | äbc (4 rows) +SELECT * FROM collate_test2 ORDER BY b COLLATE UNICODE; + a | b +---+----- + 1 | abc + 4 | ABC + 2 | äbc + 3 | bbc +(4 rows) + -- test ICU collation customization -- test the attributes handled by icu_set_collation_attributes() CREATE COLLATION testcoll_ignore_accents (provider = icu, locale = '@colStrength=primary;colCaseLevel=yes'); diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql index abf17a0767..4786d65a08 100644 --- a/src/test/regress/sql/collate.icu.utf8.sql +++ b/src/test/regress/sql/collate.icu.utf8.sql @@ -447,6 +447,7 @@ drop type textrange_en_us; -- standard collations SELECT * FROM collate_test2 ORDER BY b COLLATE UCS_BASIC; +SELECT * FROM collate_test2 ORDER BY b COLLATE UNICODE; -- test ICU collation customization |