summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeff Davis <jdavis@postgresql.org>2023-05-08 10:34:51 -0700
committerJeff Davis <jdavis@postgresql.org>2023-05-08 10:34:51 -0700
commitf7faa9976cc0504c027a20ed66ceca9018041dd4 (patch)
tree8e3935fadc654b0b83984be96db95ed4a4cb36fe
parent8d525d7b9545884a3e0d79adcd61543f9ae2ae28 (diff)
downloadpostgresql-f7faa9976cc0504c027a20ed66ceca9018041dd4.tar.gz
ICU: do not convert locale 'C' to 'en-US-u-va-posix'.
The conversion was intended to be for convenience, but it's more likely to be confusing than useful. The user can still directly specify 'en-US-u-va-posix' if desired. Discussion: https://postgr.es/m/f83f089ee1e9acd5dbbbf3353294d24e1f196e95.camel@j-davis.com Discussion: https://postgr.es/m/37520ec1ae9591f83132f82dbd625f3fc2d69c16.camel@j-davis.com
-rw-r--r--src/backend/utils/adt/pg_locale.c19
-rw-r--r--src/bin/initdb/initdb.c17
-rw-r--r--src/test/regress/expected/collate.icu.utf8.out8
-rw-r--r--src/test/regress/sql/collate.icu.utf8.sql4
4 files changed, 14 insertions, 34 deletions
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index f0b6567da1..51b4221a39 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -2782,26 +2782,10 @@ icu_language_tag(const char *loc_str, int elevel)
{
#ifdef USE_ICU
UErrorCode status;
- char lang[ULOC_LANG_CAPACITY];
char *langtag;
size_t buflen = 32; /* arbitrary starting buffer size */
const bool strict = true;
- status = U_ZERO_ERROR;
- uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
- if (U_FAILURE(status))
- {
- if (elevel > 0)
- ereport(elevel,
- (errmsg("could not get language from locale \"%s\": %s",
- loc_str, u_errorName(status))));
- return NULL;
- }
-
- /* C/POSIX locales aren't handled by uloc_getLanguageTag() */
- if (strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
- return pstrdup("en-US-u-va-posix");
-
/*
* A BCP47 language tag doesn't have a clearly-defined upper limit
* (cf. RFC5646 section 4.4). Additionally, in older ICU versions,
@@ -2889,8 +2873,7 @@ icu_validate_locale(const char *loc_str)
/* check for special language name */
if (strcmp(lang, "") == 0 ||
- strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0 ||
- strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
+ strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0)
found = true;
/* search for matching language within ICU */
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 2c208ead01..4086834458 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -2238,24 +2238,10 @@ icu_language_tag(const char *loc_str)
{
#ifdef USE_ICU
UErrorCode status;
- char lang[ULOC_LANG_CAPACITY];
char *langtag;
size_t buflen = 32; /* arbitrary starting buffer size */
const bool strict = true;
- status = U_ZERO_ERROR;
- uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
- if (U_FAILURE(status))
- {
- pg_fatal("could not get language from locale \"%s\": %s",
- loc_str, u_errorName(status));
- return NULL;
- }
-
- /* C/POSIX locales aren't handled by uloc_getLanguageTag() */
- if (strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
- return pstrdup("en-US-u-va-posix");
-
/*
* A BCP47 language tag doesn't have a clearly-defined upper limit
* (cf. RFC5646 section 4.4). Additionally, in older ICU versions,
@@ -2327,8 +2313,7 @@ icu_validate_locale(const char *loc_str)
/* check for special language name */
if (strcmp(lang, "") == 0 ||
- strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0 ||
- strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
+ strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0)
found = true;
/* search for matching language within ICU */
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index b5a221b030..c8fcbeb450 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1020,6 +1020,7 @@ CREATE ROLE regress_test_role;
CREATE SCHEMA test_schema;
-- We need to do this this way to cope with varying names for encodings:
SET client_min_messages TO WARNING;
+SET icu_validation_level = disabled;
do $$
BEGIN
EXECUTE 'CREATE COLLATION test0 (provider = icu, locale = ' ||
@@ -1034,17 +1035,24 @@ BEGIN
quote_literal(current_setting('lc_collate')) || ');';
END
$$;
+RESET icu_validation_level;
RESET client_min_messages;
CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale"
ERROR: parameter "locale" must be specified
CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); -- fails
ERROR: ICU locale "nonsense-nowhere" has unknown language "nonsense"
HINT: To disable ICU locale validation, set parameter icu_validation_level to DISABLED.
+CREATE COLLATION testx (provider = icu, locale = 'C'); -- fails
+ERROR: could not convert locale name "C" to language tag: U_ILLEGAL_ARGUMENT_ERROR
CREATE COLLATION testx (provider = icu, locale = '@colStrength=primary;nonsense=yes'); -- fails
ERROR: could not convert locale name "@colStrength=primary;nonsense=yes" to language tag: U_ILLEGAL_ARGUMENT_ERROR
SET icu_validation_level = WARNING;
CREATE COLLATION testx (provider = icu, locale = '@colStrength=primary;nonsense=yes'); DROP COLLATION testx;
WARNING: could not convert locale name "@colStrength=primary;nonsense=yes" to language tag: U_ILLEGAL_ARGUMENT_ERROR
+CREATE COLLATION testx (provider = icu, locale = 'C'); DROP COLLATION testx;
+WARNING: could not convert locale name "C" to language tag: U_ILLEGAL_ARGUMENT_ERROR
+WARNING: ICU locale "C" has unknown language "c"
+HINT: To disable ICU locale validation, set parameter icu_validation_level to DISABLED.
CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); DROP COLLATION testx;
WARNING: ICU locale "nonsense-nowhere" has unknown language "nonsense"
HINT: To disable ICU locale validation, set parameter icu_validation_level to DISABLED.
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 85e26951b6..46df58d092 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -358,6 +358,7 @@ CREATE SCHEMA test_schema;
-- We need to do this this way to cope with varying names for encodings:
SET client_min_messages TO WARNING;
+SET icu_validation_level = disabled;
do $$
BEGIN
@@ -373,13 +374,16 @@ BEGIN
END
$$;
+RESET icu_validation_level;
RESET client_min_messages;
CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale"
CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); -- fails
+CREATE COLLATION testx (provider = icu, locale = 'C'); -- fails
CREATE COLLATION testx (provider = icu, locale = '@colStrength=primary;nonsense=yes'); -- fails
SET icu_validation_level = WARNING;
CREATE COLLATION testx (provider = icu, locale = '@colStrength=primary;nonsense=yes'); DROP COLLATION testx;
+CREATE COLLATION testx (provider = icu, locale = 'C'); DROP COLLATION testx;
CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); DROP COLLATION testx;
RESET icu_validation_level;