From 455f948b0d03a556533a7e4a1a8abf45f0eb202e Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Mon, 8 May 2023 20:50:51 -0700 Subject: Revert "ICU: do not convert locale 'C' to 'en-US-u-va-posix'." This reverts commit f7faa9976cc0504c027a20ed66ceca9018041dd4. Discussion: https://postgr.es/m/483826.1683582475@sss.pgh.pa.us --- src/backend/utils/adt/pg_locale.c | 19 ++++++++++++++++++- src/bin/initdb/initdb.c | 17 ++++++++++++++++- src/test/regress/expected/collate.icu.utf8.out | 8 -------- src/test/regress/sql/collate.icu.utf8.sql | 4 ---- 4 files changed, 34 insertions(+), 14 deletions(-) diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 51b4221a39..f0b6567da1 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -2782,10 +2782,26 @@ icu_language_tag(const char *loc_str, int elevel) { #ifdef USE_ICU UErrorCode status; + char lang[ULOC_LANG_CAPACITY]; char *langtag; size_t buflen = 32; /* arbitrary starting buffer size */ const bool strict = true; + status = U_ZERO_ERROR; + uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status); + if (U_FAILURE(status)) + { + if (elevel > 0) + ereport(elevel, + (errmsg("could not get language from locale \"%s\": %s", + loc_str, u_errorName(status)))); + return NULL; + } + + /* C/POSIX locales aren't handled by uloc_getLanguageTag() */ + if (strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0) + return pstrdup("en-US-u-va-posix"); + /* * A BCP47 language tag doesn't have a clearly-defined upper limit * (cf. RFC5646 section 4.4). Additionally, in older ICU versions, @@ -2873,7 +2889,8 @@ icu_validate_locale(const char *loc_str) /* check for special language name */ if (strcmp(lang, "") == 0 || - strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0) + strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0 || + strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0) found = true; /* search for matching language within ICU */ diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 4086834458..2c208ead01 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -2238,10 +2238,24 @@ icu_language_tag(const char *loc_str) { #ifdef USE_ICU UErrorCode status; + char lang[ULOC_LANG_CAPACITY]; char *langtag; size_t buflen = 32; /* arbitrary starting buffer size */ const bool strict = true; + status = U_ZERO_ERROR; + uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status); + if (U_FAILURE(status)) + { + pg_fatal("could not get language from locale \"%s\": %s", + loc_str, u_errorName(status)); + return NULL; + } + + /* C/POSIX locales aren't handled by uloc_getLanguageTag() */ + if (strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0) + return pstrdup("en-US-u-va-posix"); + /* * A BCP47 language tag doesn't have a clearly-defined upper limit * (cf. RFC5646 section 4.4). Additionally, in older ICU versions, @@ -2313,7 +2327,8 @@ icu_validate_locale(const char *loc_str) /* check for special language name */ if (strcmp(lang, "") == 0 || - strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0) + strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0 || + strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0) found = true; /* search for matching language within ICU */ diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out index c8fcbeb450..b5a221b030 100644 --- a/src/test/regress/expected/collate.icu.utf8.out +++ b/src/test/regress/expected/collate.icu.utf8.out @@ -1020,7 +1020,6 @@ CREATE ROLE regress_test_role; CREATE SCHEMA test_schema; -- We need to do this this way to cope with varying names for encodings: SET client_min_messages TO WARNING; -SET icu_validation_level = disabled; do $$ BEGIN EXECUTE 'CREATE COLLATION test0 (provider = icu, locale = ' || @@ -1035,24 +1034,17 @@ BEGIN quote_literal(current_setting('lc_collate')) || ');'; END $$; -RESET icu_validation_level; RESET client_min_messages; CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale" ERROR: parameter "locale" must be specified CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); -- fails ERROR: ICU locale "nonsense-nowhere" has unknown language "nonsense" HINT: To disable ICU locale validation, set parameter icu_validation_level to DISABLED. -CREATE COLLATION testx (provider = icu, locale = 'C'); -- fails -ERROR: could not convert locale name "C" to language tag: U_ILLEGAL_ARGUMENT_ERROR CREATE COLLATION testx (provider = icu, locale = '@colStrength=primary;nonsense=yes'); -- fails ERROR: could not convert locale name "@colStrength=primary;nonsense=yes" to language tag: U_ILLEGAL_ARGUMENT_ERROR SET icu_validation_level = WARNING; CREATE COLLATION testx (provider = icu, locale = '@colStrength=primary;nonsense=yes'); DROP COLLATION testx; WARNING: could not convert locale name "@colStrength=primary;nonsense=yes" to language tag: U_ILLEGAL_ARGUMENT_ERROR -CREATE COLLATION testx (provider = icu, locale = 'C'); DROP COLLATION testx; -WARNING: could not convert locale name "C" to language tag: U_ILLEGAL_ARGUMENT_ERROR -WARNING: ICU locale "C" has unknown language "c" -HINT: To disable ICU locale validation, set parameter icu_validation_level to DISABLED. CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); DROP COLLATION testx; WARNING: ICU locale "nonsense-nowhere" has unknown language "nonsense" HINT: To disable ICU locale validation, set parameter icu_validation_level to DISABLED. diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql index 46df58d092..85e26951b6 100644 --- a/src/test/regress/sql/collate.icu.utf8.sql +++ b/src/test/regress/sql/collate.icu.utf8.sql @@ -358,7 +358,6 @@ CREATE SCHEMA test_schema; -- We need to do this this way to cope with varying names for encodings: SET client_min_messages TO WARNING; -SET icu_validation_level = disabled; do $$ BEGIN @@ -374,16 +373,13 @@ BEGIN END $$; -RESET icu_validation_level; RESET client_min_messages; CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale" CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); -- fails -CREATE COLLATION testx (provider = icu, locale = 'C'); -- fails CREATE COLLATION testx (provider = icu, locale = '@colStrength=primary;nonsense=yes'); -- fails SET icu_validation_level = WARNING; CREATE COLLATION testx (provider = icu, locale = '@colStrength=primary;nonsense=yes'); DROP COLLATION testx; -CREATE COLLATION testx (provider = icu, locale = 'C'); DROP COLLATION testx; CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); DROP COLLATION testx; RESET icu_validation_level; -- cgit v1.2.1