diff options
author | Peter Eisentraut <peter_e@gmx.net> | 2011-02-08 23:04:18 +0200 |
---|---|---|
committer | Peter Eisentraut <peter_e@gmx.net> | 2011-02-08 23:04:18 +0200 |
commit | 414c5a2ea65cbd38d79ffdf9b1fde7cc75c134e0 (patch) | |
tree | 016efd0c7108f659ea4f3c52ea54d78e1e5449e1 /src/backend/utils | |
parent | 1703f0e8da2e8e3eccb6e12879c011ba106f8a62 (diff) | |
download | postgresql-414c5a2ea65cbd38d79ffdf9b1fde7cc75c134e0.tar.gz |
Per-column collation support
This adds collation support for columns and domains, a COLLATE clause
to override it per expression, and B-tree index support.
Peter Eisentraut
reviewed by Pavel Stehule, Itagaki Takahiro, Robert Haas, Noah Misch
Diffstat (limited to 'src/backend/utils')
-rw-r--r-- | src/backend/utils/adt/arrayfuncs.c | 5 | ||||
-rw-r--r-- | src/backend/utils/adt/format_type.c | 25 | ||||
-rw-r--r-- | src/backend/utils/adt/formatting.c | 140 | ||||
-rw-r--r-- | src/backend/utils/adt/like.c | 16 | ||||
-rw-r--r-- | src/backend/utils/adt/oracle_compat.c | 9 | ||||
-rw-r--r-- | src/backend/utils/adt/pg_locale.c | 219 | ||||
-rw-r--r-- | src/backend/utils/adt/ruleutils.c | 77 | ||||
-rw-r--r-- | src/backend/utils/adt/selfuncs.c | 46 | ||||
-rw-r--r-- | src/backend/utils/adt/varchar.c | 21 | ||||
-rw-r--r-- | src/backend/utils/adt/varlena.c | 34 | ||||
-rw-r--r-- | src/backend/utils/cache/lsyscache.c | 94 | ||||
-rw-r--r-- | src/backend/utils/cache/relcache.c | 35 | ||||
-rw-r--r-- | src/backend/utils/cache/syscache.c | 23 | ||||
-rw-r--r-- | src/backend/utils/errcodes.txt | 2 | ||||
-rw-r--r-- | src/backend/utils/fmgr/fmgr.c | 66 | ||||
-rw-r--r-- | src/backend/utils/fmgr/funcapi.c | 4 | ||||
-rw-r--r-- | src/backend/utils/mb/mbutils.c | 10 | ||||
-rw-r--r-- | src/backend/utils/sort/tuplesort.c | 9 |
18 files changed, 704 insertions, 131 deletions
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index 931c6953cb..4ac9830878 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -3307,6 +3307,7 @@ array_cmp(FunctionCallInfo fcinfo) { ArrayType *array1 = PG_GETARG_ARRAYTYPE_P(0); ArrayType *array2 = PG_GETARG_ARRAYTYPE_P(1); + Oid collation = PG_GET_COLLATION(); int ndims1 = ARR_NDIM(array1); int ndims2 = ARR_NDIM(array2); int *dims1 = ARR_DIMS(array1); @@ -3341,7 +3342,8 @@ array_cmp(FunctionCallInfo fcinfo) */ typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra; if (typentry == NULL || - typentry->type_id != element_type) + typentry->type_id != element_type || + typentry->cmp_proc_finfo.fn_collation != collation) { typentry = lookup_type_cache(element_type, TYPECACHE_CMP_PROC_FINFO); @@ -3351,6 +3353,7 @@ array_cmp(FunctionCallInfo fcinfo) errmsg("could not identify a comparison function for type %s", format_type_be(element_type)))); fcinfo->flinfo->fn_extra = (void *) typentry; + typentry->cmp_proc_finfo.fn_collation = collation; } typlen = typentry->typlen; typbyval = typentry->typbyval; diff --git a/src/backend/utils/adt/format_type.c b/src/backend/utils/adt/format_type.c index b56bb74bdc..f85e0bbd00 100644 --- a/src/backend/utils/adt/format_type.c +++ b/src/backend/utils/adt/format_type.c @@ -18,6 +18,7 @@ #include <ctype.h> #include "catalog/namespace.h" +#include "catalog/pg_collation.h" #include "catalog/pg_type.h" #include "utils/builtins.h" #include "utils/lsyscache.h" @@ -28,7 +29,8 @@ #define MAX_INT32_LEN 11 static char *format_type_internal(Oid type_oid, int32 typemod, - bool typemod_given, bool allow_invalid); + bool typemod_given, bool allow_invalid, + Oid collation_oid); static char *printTypmod(const char *typname, int32 typmod, Oid typmodout); static char * psnprintf(size_t len, const char *fmt,...) @@ -67,6 +69,7 @@ format_type(PG_FUNCTION_ARGS) { Oid type_oid; int32 typemod; + Oid collation_oid; char *result; /* Since this function is not strict, we must test for null args */ @@ -74,13 +77,14 @@ format_type(PG_FUNCTION_ARGS) PG_RETURN_NULL(); type_oid = PG_GETARG_OID(0); + collation_oid = PG_ARGISNULL(2) ? InvalidOid : PG_GETARG_OID(2); if (PG_ARGISNULL(1)) - result = format_type_internal(type_oid, -1, false, true); + result = format_type_internal(type_oid, -1, false, true, collation_oid); else { typemod = PG_GETARG_INT32(1); - result = format_type_internal(type_oid, typemod, true, true); + result = format_type_internal(type_oid, typemod, true, true, collation_oid); } PG_RETURN_TEXT_P(cstring_to_text(result)); @@ -95,7 +99,7 @@ format_type(PG_FUNCTION_ARGS) char * format_type_be(Oid type_oid) { - return format_type_internal(type_oid, -1, false, false); + return format_type_internal(type_oid, -1, false, false, InvalidOid); } /* @@ -104,14 +108,15 @@ format_type_be(Oid type_oid) char * format_type_with_typemod(Oid type_oid, int32 typemod) { - return format_type_internal(type_oid, typemod, true, false); + return format_type_internal(type_oid, typemod, true, false, InvalidOid); } static char * format_type_internal(Oid type_oid, int32 typemod, - bool typemod_given, bool allow_invalid) + bool typemod_given, bool allow_invalid, + Oid collation_oid) { bool with_typemod = typemod_given && (typemod >= 0); HeapTuple tuple; @@ -317,6 +322,12 @@ format_type_internal(Oid type_oid, int32 typemod, ReleaseSysCache(tuple); + if (collation_oid && collation_oid != DEFAULT_COLLATION_OID) + { + char *collstr = generate_collation_name(collation_oid); + buf = psnprintf(strlen(buf) + 10 + strlen(collstr), "%s COLLATE %s", buf, collstr); + } + return buf; } @@ -420,7 +431,7 @@ oidvectortypes(PG_FUNCTION_ARGS) for (num = 0; num < numargs; num++) { char *typename = format_type_internal(oidArray->values[num], -1, - false, true); + false, true, InvalidOid); size_t slen = strlen(typename); if (left < (slen + 2)) diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 4855bac41d..f90d36d24c 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -82,6 +82,7 @@ #include <wctype.h> #endif +#include "catalog/pg_collation.h" #include "mb/pg_wchar.h" #include "utils/builtins.h" #include "utils/date.h" @@ -953,7 +954,7 @@ static void parse_format(FormatNode *node, char *str, const KeyWord *kw, KeySuffix *suf, const int *index, int ver, NUMDesc *Num); static void DCH_to_char(FormatNode *node, bool is_interval, - TmToChar *in, char *out); + TmToChar *in, char *out, Oid collid); static void DCH_from_char(FormatNode *node, char *in, TmFromChar *out); #ifdef DEBUG_TO_FROM_CHAR @@ -981,7 +982,7 @@ static char *get_last_relevant_decnum(char *num); static void NUM_numpart_from_char(NUMProc *Np, int id, int plen); static void NUM_numpart_to_char(NUMProc *Np, int id); static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, char *number, - int plen, int sign, bool is_to_char); + int plen, int sign, bool is_to_char, Oid collid); static DCHCacheEntry *DCH_cache_search(char *str); static DCHCacheEntry *DCH_cache_getnew(char *str); @@ -1470,15 +1471,19 @@ str_numth(char *dest, char *num, int type) * to this function. The result is a palloc'd, null-terminated string. */ char * -str_tolower(const char *buff, size_t nbytes) +str_tolower(const char *buff, size_t nbytes, Oid collid) { char *result; + pg_locale_t mylocale = 0; if (!buff) return NULL; + if (collid != DEFAULT_COLLATION_OID) + mylocale = pg_newlocale_from_collation(collid); + #ifdef USE_WIDE_UPPER_LOWER - if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) + if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c(collid)) { wchar_t *workspace; size_t curr_char; @@ -1493,16 +1498,21 @@ str_tolower(const char *buff, size_t nbytes) /* Output workspace cannot have more codes than input bytes */ workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); - char2wchar(workspace, nbytes + 1, buff, nbytes); + char2wchar(workspace, nbytes + 1, buff, nbytes, collid); for (curr_char = 0; workspace[curr_char] != 0; curr_char++) +#ifdef HAVE_LOCALE_T + if (mylocale) + workspace[curr_char] = towlower_l(workspace[curr_char], mylocale); + else +#endif workspace[curr_char] = towlower(workspace[curr_char]); /* Make result large enough; case change might change number of bytes */ result_size = curr_char * pg_database_encoding_max_length() + 1; result = palloc(result_size); - wchar2char(result, workspace, result_size); + wchar2char(result, workspace, result_size, collid); pfree(workspace); } else @@ -1526,15 +1536,19 @@ str_tolower(const char *buff, size_t nbytes) * to this function. The result is a palloc'd, null-terminated string. */ char * -str_toupper(const char *buff, size_t nbytes) +str_toupper(const char *buff, size_t nbytes, Oid collid) { char *result; + pg_locale_t mylocale = 0; if (!buff) return NULL; + if (collid != DEFAULT_COLLATION_OID) + mylocale = pg_newlocale_from_collation(collid); + #ifdef USE_WIDE_UPPER_LOWER - if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) + if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c(collid)) { wchar_t *workspace; size_t curr_char; @@ -1549,16 +1563,21 @@ str_toupper(const char *buff, size_t nbytes) /* Output workspace cannot have more codes than input bytes */ workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); - char2wchar(workspace, nbytes + 1, buff, nbytes); + char2wchar(workspace, nbytes + 1, buff, nbytes, collid); for (curr_char = 0; workspace[curr_char] != 0; curr_char++) +#ifdef HAVE_LOCALE_T + if (mylocale) + workspace[curr_char] = towupper_l(workspace[curr_char], mylocale); + else +#endif workspace[curr_char] = towupper(workspace[curr_char]); /* Make result large enough; case change might change number of bytes */ result_size = curr_char * pg_database_encoding_max_length() + 1; result = palloc(result_size); - wchar2char(result, workspace, result_size); + wchar2char(result, workspace, result_size, collid); pfree(workspace); } else @@ -1582,16 +1601,20 @@ str_toupper(const char *buff, size_t nbytes) * to this function. The result is a palloc'd, null-terminated string. */ char * -str_initcap(const char *buff, size_t nbytes) +str_initcap(const char *buff, size_t nbytes, Oid collid) { char *result; int wasalnum = false; + pg_locale_t mylocale = 0; if (!buff) return NULL; + if (collid != DEFAULT_COLLATION_OID) + mylocale = pg_newlocale_from_collation(collid); + #ifdef USE_WIDE_UPPER_LOWER - if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) + if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c(collid)) { wchar_t *workspace; size_t curr_char; @@ -1606,22 +1629,35 @@ str_initcap(const char *buff, size_t nbytes) /* Output workspace cannot have more codes than input bytes */ workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); - char2wchar(workspace, nbytes + 1, buff, nbytes); + char2wchar(workspace, nbytes + 1, buff, nbytes, collid); for (curr_char = 0; workspace[curr_char] != 0; curr_char++) { - if (wasalnum) - workspace[curr_char] = towlower(workspace[curr_char]); +#ifdef HAVE_LOCALE_T + if (mylocale) + { + if (wasalnum) + workspace[curr_char] = towlower_l(workspace[curr_char], mylocale); + else + workspace[curr_char] = towupper_l(workspace[curr_char], mylocale); + wasalnum = iswalnum_l(workspace[curr_char], mylocale); + } else - workspace[curr_char] = towupper(workspace[curr_char]); - wasalnum = iswalnum(workspace[curr_char]); +#endif + { + if (wasalnum) + workspace[curr_char] = towlower(workspace[curr_char]); + else + workspace[curr_char] = towupper(workspace[curr_char]); + wasalnum = iswalnum(workspace[curr_char]); + } } /* Make result large enough; case change might change number of bytes */ result_size = curr_char * pg_database_encoding_max_length() + 1; result = palloc(result_size); - wchar2char(result, workspace, result_size); + wchar2char(result, workspace, result_size, collid); pfree(workspace); } else @@ -1647,21 +1683,21 @@ str_initcap(const char *buff, size_t nbytes) /* convenience routines for when the input is null-terminated */ static char * -str_tolower_z(const char *buff) +str_tolower_z(const char *buff, Oid collid) { - return str_tolower(buff, strlen(buff)); + return str_tolower(buff, strlen(buff), collid); } static char * -str_toupper_z(const char *buff) +str_toupper_z(const char *buff, Oid collid) { - return str_toupper(buff, strlen(buff)); + return str_toupper(buff, strlen(buff), collid); } static char * -str_initcap_z(const char *buff) +str_initcap_z(const char *buff, Oid collid) { - return str_initcap(buff, strlen(buff)); + return str_initcap(buff, strlen(buff), collid); } @@ -2039,7 +2075,7 @@ from_char_seq_search(int *dest, char **src, char **array, int type, int max, * ---------- */ static void -DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) +DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid collid) { FormatNode *n; char *s; @@ -2151,7 +2187,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) INVALID_FOR_INTERVAL; if (tmtcTzn(in)) { - char *p = str_tolower_z(tmtcTzn(in)); + char *p = str_tolower_z(tmtcTzn(in), collid); strcpy(s, p); pfree(p); @@ -2195,10 +2231,10 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) if (!tm->tm_mon) break; if (S_TM(n->suffix)) - strcpy(s, str_toupper_z(localized_full_months[tm->tm_mon - 1])); + strcpy(s, str_toupper_z(localized_full_months[tm->tm_mon - 1], collid)); else sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, - str_toupper_z(months_full[tm->tm_mon - 1])); + str_toupper_z(months_full[tm->tm_mon - 1], collid)); s += strlen(s); break; case DCH_Month: @@ -2206,7 +2242,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) if (!tm->tm_mon) break; if (S_TM(n->suffix)) - strcpy(s, str_initcap_z(localized_full_months[tm->tm_mon - 1])); + strcpy(s, str_initcap_z(localized_full_months[tm->tm_mon - 1], collid)); else sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]); s += strlen(s); @@ -2216,7 +2252,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) if (!tm->tm_mon) break; if (S_TM(n->suffix)) - strcpy(s, str_tolower_z(localized_full_months[tm->tm_mon - 1])); + strcpy(s, str_tolower_z(localized_full_months[tm->tm_mon - 1], collid)); else { sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]); @@ -2229,9 +2265,9 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) if (!tm->tm_mon) break; if (S_TM(n->suffix)) - strcpy(s, str_toupper_z(localized_abbrev_months[tm->tm_mon - 1])); + strcpy(s, str_toupper_z(localized_abbrev_months[tm->tm_mon - 1], collid)); else - strcpy(s, str_toupper_z(months[tm->tm_mon - 1])); + strcpy(s, str_toupper_z(months[tm->tm_mon - 1], collid)); s += strlen(s); break; case DCH_Mon: @@ -2239,7 +2275,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) if (!tm->tm_mon) break; if (S_TM(n->suffix)) - strcpy(s, str_initcap_z(localized_abbrev_months[tm->tm_mon - 1])); + strcpy(s, str_initcap_z(localized_abbrev_months[tm->tm_mon - 1], collid)); else strcpy(s, months[tm->tm_mon - 1]); s += strlen(s); @@ -2249,7 +2285,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) if (!tm->tm_mon) break; if (S_TM(n->suffix)) - strcpy(s, str_tolower_z(localized_abbrev_months[tm->tm_mon - 1])); + strcpy(s, str_tolower_z(localized_abbrev_months[tm->tm_mon - 1], collid)); else { strcpy(s, months[tm->tm_mon - 1]); @@ -2266,16 +2302,16 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) case DCH_DAY: INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) - strcpy(s, str_toupper_z(localized_full_days[tm->tm_wday])); + strcpy(s, str_toupper_z(localized_full_days[tm->tm_wday], collid)); else sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, - str_toupper_z(days[tm->tm_wday])); + str_toupper_z(days[tm->tm_wday], collid)); s += strlen(s); break; case DCH_Day: INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) - strcpy(s, str_initcap_z(localized_full_days[tm->tm_wday])); + strcpy(s, str_initcap_z(localized_full_days[tm->tm_wday], collid)); else sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]); s += strlen(s); @@ -2283,7 +2319,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) case DCH_day: INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) - strcpy(s, str_tolower_z(localized_full_days[tm->tm_wday])); + strcpy(s, str_tolower_z(localized_full_days[tm->tm_wday], collid)); else { sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]); @@ -2294,15 +2330,15 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) case DCH_DY: INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) - strcpy(s, str_toupper_z(localized_abbrev_days[tm->tm_wday])); + strcpy(s, str_toupper_z(localized_abbrev_days[tm->tm_wday], collid)); else - strcpy(s, str_toupper_z(days_short[tm->tm_wday])); + strcpy(s, str_toupper_z(days_short[tm->tm_wday], collid)); s += strlen(s); break; case DCH_Dy: INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) - strcpy(s, str_initcap_z(localized_abbrev_days[tm->tm_wday])); + strcpy(s, str_initcap_z(localized_abbrev_days[tm->tm_wday], collid)); else strcpy(s, days_short[tm->tm_wday]); s += strlen(s); @@ -2310,7 +2346,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) case DCH_dy: INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) - strcpy(s, str_tolower_z(localized_abbrev_days[tm->tm_wday])); + strcpy(s, str_tolower_z(localized_abbrev_days[tm->tm_wday], collid)); else { strcpy(s, days_short[tm->tm_wday]); @@ -2846,7 +2882,7 @@ DCH_cache_search(char *str) * for formatting. */ static text * -datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval) +datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid) { FormatNode *format; char *fmt_str, @@ -2912,7 +2948,7 @@ datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval) } /* The real work is here */ - DCH_to_char(format, is_interval, tmtc, result); + DCH_to_char(format, is_interval, tmtc, result, collid); if (!incache) pfree(format); @@ -2959,7 +2995,7 @@ timestamp_to_char(PG_FUNCTION_ARGS) tm->tm_wday = (thisdate + 1) % 7; tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1; - if (!(res = datetime_to_char_body(&tmtc, fmt, false))) + if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION()))) PG_RETURN_NULL(); PG_RETURN_TEXT_P(res); @@ -2991,7 +3027,7 @@ timestamptz_to_char(PG_FUNCTION_ARGS) tm->tm_wday = (thisdate + 1) % 7; tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1; - if (!(res = datetime_to_char_body(&tmtc, fmt, false))) + if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION()))) PG_RETURN_NULL(); PG_RETURN_TEXT_P(res); @@ -3023,7 +3059,7 @@ interval_to_char(PG_FUNCTION_ARGS) /* wday is meaningless, yday approximates the total span in days */ tm->tm_yday = (tm->tm_year * MONTHS_PER_YEAR + tm->tm_mon) * DAYS_PER_MONTH + tm->tm_mday; - if (!(res = datetime_to_char_body(&tmtc, fmt, true))) + if (!(res = datetime_to_char_body(&tmtc, fmt, true, PG_GET_COLLATION()))) PG_RETURN_NULL(); PG_RETURN_TEXT_P(res); @@ -4123,7 +4159,7 @@ NUM_numpart_to_char(NUMProc *Np, int id) */ static char * NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, char *number, - int plen, int sign, bool is_to_char) + int plen, int sign, bool is_to_char, Oid collid) { FormatNode *n; NUMProc _Np, @@ -4403,12 +4439,12 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, char *number, case NUM_rn: if (IS_FILLMODE(Np->Num)) { - strcpy(Np->inout_p, str_tolower_z(Np->number_p)); + strcpy(Np->inout_p, str_tolower_z(Np->number_p, collid)); Np->inout_p += strlen(Np->inout_p) - 1; } else { - sprintf(Np->inout_p, "%15s", str_tolower_z(Np->number_p)); + sprintf(Np->inout_p, "%15s", str_tolower_z(Np->number_p, collid)); Np->inout_p += strlen(Np->inout_p) - 1; } break; @@ -4541,7 +4577,7 @@ do { \ */ #define NUM_TOCHAR_finish \ do { \ - NUM_processor(format, &Num, VARDATA(result), numstr, plen, sign, true); \ + NUM_processor(format, &Num, VARDATA(result), numstr, plen, sign, true, PG_GET_COLLATION()); \ \ if (shouldFree) \ pfree(format); \ @@ -4583,7 +4619,7 @@ numeric_to_number(PG_FUNCTION_ARGS) numstr = (char *) palloc((len * NUM_MAX_ITEM_SIZ) + 1); NUM_processor(format, &Num, VARDATA(value), numstr, - VARSIZE(value) - VARHDRSZ, 0, false); + VARSIZE(value) - VARHDRSZ, 0, false, PG_GET_COLLATION()); scale = Num.post; precision = Max(0, Num.pre) + scale; diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c index 1e7a6f32ea..1edbe88b74 100644 --- a/src/backend/utils/adt/like.c +++ b/src/backend/utils/adt/like.c @@ -39,7 +39,7 @@ static int UTF8_MatchText(char *t, int tlen, char *p, int plen); static int SB_IMatchText(char *t, int tlen, char *p, int plen); static int GenericMatchText(char *s, int slen, char *p, int plen); -static int Generic_Text_IC_like(text *str, text *pat); +static int Generic_Text_IC_like(text *str, text *pat, Oid collation); /*-------------------- * Support routine for MatchText. Compares given multibyte streams @@ -133,7 +133,7 @@ GenericMatchText(char *s, int slen, char *p, int plen) } static inline int -Generic_Text_IC_like(text *str, text *pat) +Generic_Text_IC_like(text *str, text *pat, Oid collation) { char *s, *p; @@ -149,10 +149,10 @@ Generic_Text_IC_like(text *str, text *pat) if (pg_database_encoding_max_length() > 1) { /* lower's result is never packed, so OK to use old macros here */ - pat = DatumGetTextP(DirectFunctionCall1(lower, PointerGetDatum(pat))); + pat = DatumGetTextP(DirectFunctionCall1WithCollation(lower, collation, PointerGetDatum(pat))); p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); - str = DatumGetTextP(DirectFunctionCall1(lower, PointerGetDatum(str))); + str = DatumGetTextP(DirectFunctionCall1WithCollation(lower, collation, PointerGetDatum(str))); s = VARDATA(str); slen = (VARSIZE(str) - VARHDRSZ); if (GetDatabaseEncoding() == PG_UTF8) @@ -314,7 +314,7 @@ nameiclike(PG_FUNCTION_ARGS) strtext = DatumGetTextP(DirectFunctionCall1(name_text, NameGetDatum(str))); - result = (Generic_Text_IC_like(strtext, pat) == LIKE_TRUE); + result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE); PG_RETURN_BOOL(result); } @@ -329,7 +329,7 @@ nameicnlike(PG_FUNCTION_ARGS) strtext = DatumGetTextP(DirectFunctionCall1(name_text, NameGetDatum(str))); - result = (Generic_Text_IC_like(strtext, pat) != LIKE_TRUE); + result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE); PG_RETURN_BOOL(result); } @@ -341,7 +341,7 @@ texticlike(PG_FUNCTION_ARGS) text *pat = PG_GETARG_TEXT_PP(1); bool result; - result = (Generic_Text_IC_like(str, pat) == LIKE_TRUE); + result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE); PG_RETURN_BOOL(result); } @@ -353,7 +353,7 @@ texticnlike(PG_FUNCTION_ARGS) text *pat = PG_GETARG_TEXT_PP(1); bool result; - result = (Generic_Text_IC_like(str, pat) != LIKE_TRUE); + result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE); PG_RETURN_BOOL(result); } diff --git a/src/backend/utils/adt/oracle_compat.c b/src/backend/utils/adt/oracle_compat.c index 65559dff58..4487b0a181 100644 --- a/src/backend/utils/adt/oracle_compat.c +++ b/src/backend/utils/adt/oracle_compat.c @@ -47,7 +47,8 @@ lower(PG_FUNCTION_ARGS) text *result; out_string = str_tolower(VARDATA_ANY(in_string), - VARSIZE_ANY_EXHDR(in_string)); + VARSIZE_ANY_EXHDR(in_string), + PG_GET_COLLATION()); result = cstring_to_text(out_string); pfree(out_string); @@ -77,7 +78,8 @@ upper(PG_FUNCTION_ARGS) text *result; out_string = str_toupper(VARDATA_ANY(in_string), - VARSIZE_ANY_EXHDR(in_string)); + VARSIZE_ANY_EXHDR(in_string), + PG_GET_COLLATION()); result = cstring_to_text(out_string); pfree(out_string); @@ -110,7 +112,8 @@ initcap(PG_FUNCTION_ARGS) text *result; out_string = str_initcap(VARDATA_ANY(in_string), - VARSIZE_ANY_EXHDR(in_string)); + VARSIZE_ANY_EXHDR(in_string), + PG_GET_COLLATION()); result = cstring_to_text(out_string); pfree(out_string); diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index f76305a219..2b9b321b26 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -54,10 +54,13 @@ #include <locale.h> #include <time.h> +#include "catalog/pg_collation.h" #include "catalog/pg_control.h" #include "mb/pg_wchar.h" +#include "utils/hsearch.h" #include "utils/memutils.h" #include "utils/pg_locale.h" +#include "utils/syscache.h" #ifdef WIN32 #include <shlwapi.h> @@ -100,6 +103,11 @@ static char lc_time_envbuf[LC_ENV_BUFSIZE]; static char *IsoLocaleName(const char *); /* MSVC specific */ #endif +static HTAB *locale_cness_cache = NULL; +#ifdef HAVE_LOCALE_T +static HTAB *locale_t_cache = NULL; +#endif + /* * pg_perm_setlocale @@ -305,16 +313,90 @@ locale_messages_assign(const char *value, bool doit, GucSource source) /* - * We'd like to cache whether LC_COLLATE is C (or POSIX), so we can - * optimize a few code paths in various places. + * We'd like to cache whether LC_COLLATE or LC_CTYPE is C (or POSIX), + * so we can optimize a few code paths in various places. + * + * Note that some code relies on this not reporting false negatives + * (that is, saying it's not C when it is). For example, char2wchar() + * could fail if the locale is C, so str_tolower() shouldn't call it + * in that case. */ + +struct locale_cness_cache_entry +{ + Oid collid; + bool collate_is_c; + bool ctype_is_c; +}; + +static void +init_locale_cness_cache(void) +{ + HASHCTL ctl; + + memset(&ctl, 0, sizeof(ctl)); + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(struct locale_cness_cache_entry); + ctl.hash = oid_hash; + locale_cness_cache = hash_create("locale C-ness cache", 1000, &ctl, HASH_ELEM | HASH_FUNCTION); +} + +/* + * Handle caching of locale "C-ness" for nondefault collation objects. + * Relying on the system cache directly isn't fast enough. + */ +static bool +lookup_collation_cness(Oid collation, int category) +{ + struct locale_cness_cache_entry *cache_entry; + bool found; + HeapTuple tp; + char *localeptr; + + Assert(OidIsValid(collation)); + Assert(category == LC_COLLATE || category == LC_CTYPE); + + if (!locale_cness_cache) + init_locale_cness_cache(); + + cache_entry = hash_search(locale_cness_cache, &collation, HASH_ENTER, &found); + if (found) + { + if (category == LC_COLLATE) + return cache_entry->collate_is_c; + else + return cache_entry->ctype_is_c; + } + + tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for collation %u", collation); + + localeptr = NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate); + cache_entry->collate_is_c = (strcmp(localeptr, "C") == 0) || (strcmp(localeptr, "POSIX") == 0); + + localeptr = NameStr(((Form_pg_collation) GETSTRUCT(tp))->collctype); + cache_entry->ctype_is_c = (strcmp(localeptr, "C") == 0) || (strcmp(localeptr, "POSIX") == 0); + + ReleaseSysCache(tp); + + return category == LC_COLLATE ? cache_entry->collate_is_c : cache_entry->ctype_is_c; +} + + bool -lc_collate_is_c(void) +lc_collate_is_c(Oid collation) { /* Cache result so we only have to compute it once */ static int result = -1; char *localeptr; + if (!OidIsValid(collation)) + return false; + + if (collation != DEFAULT_COLLATION_OID) + return lookup_collation_cness(collation, LC_COLLATE); + if (result >= 0) return (bool) result; localeptr = setlocale(LC_COLLATE, NULL); @@ -331,17 +413,19 @@ lc_collate_is_c(void) } -/* - * We'd like to cache whether LC_CTYPE is C (or POSIX), so we can - * optimize a few code paths in various places. - */ bool -lc_ctype_is_c(void) +lc_ctype_is_c(Oid collation) { /* Cache result so we only have to compute it once */ static int result = -1; char *localeptr; + if (!OidIsValid(collation)) + return false; + + if (collation != DEFAULT_COLLATION_OID) + return lookup_collation_cness(collation, LC_CTYPE); + if (result >= 0) return (bool) result; localeptr = setlocale(LC_CTYPE, NULL); @@ -483,7 +567,7 @@ PGLC_localeconv(void) /* Get formatting information for numeric */ setlocale(LC_NUMERIC, locale_numeric); extlconv = localeconv(); - encoding = pg_get_encoding_from_locale(locale_numeric); + encoding = pg_get_encoding_from_locale(locale_numeric, true); decimal_point = db_encoding_strdup(encoding, extlconv->decimal_point); thousands_sep = db_encoding_strdup(encoding, extlconv->thousands_sep); @@ -497,7 +581,7 @@ PGLC_localeconv(void) /* Get formatting information for monetary */ setlocale(LC_MONETARY, locale_monetary); extlconv = localeconv(); - encoding = pg_get_encoding_from_locale(locale_monetary); + encoding = pg_get_encoding_from_locale(locale_monetary, true); /* * Must copy all values since restoring internal settings may overwrite @@ -758,3 +842,118 @@ IsoLocaleName(const char *winlocname) } #endif /* WIN32 && LC_MESSAGES */ + + +#ifdef HAVE_LOCALE_T +struct locale_t_cache_entry +{ + Oid collid; + locale_t locale; +}; + +static void +init_locale_t_cache(void) +{ + HASHCTL ctl; + + memset(&ctl, 0, sizeof(ctl)); + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(struct locale_t_cache_entry); + ctl.hash = oid_hash; + locale_t_cache = hash_create("locale_t cache", 1000, &ctl, HASH_ELEM | HASH_FUNCTION); +} +#endif /* HAVE_LOCALE_T */ + +/* + * Create a locale_t from a collation OID. Results are cached for the + * lifetime of the backend. Thus, do not free the result with + * freelocale(). + * + * As a special optimization, the default/database collation returns + * 0. Callers should then revert to the non-locale_t-enabled code + * path. In fact, they shouldn't call this function at all when they + * are dealing with the default locale. That can save quite a bit in + * hotspots. + * + * For simplicity, we always generate COLLATE + CTYPE even though we + * might only need one of them. Since this is called only once per + * session, it shouldn't cost much. + */ +pg_locale_t +pg_newlocale_from_collation(Oid collid) +{ +#ifdef HAVE_LOCALE_T + HeapTuple tp; + const char *collcollate; + const char *collctype; + locale_t result; + struct locale_t_cache_entry *cache_entry; + bool found; + + if (collid == DEFAULT_COLLATION_OID) + return (locale_t) 0; + + if (!OidIsValid(collid)) + elog(ERROR, "locale operation to be invoked, but no collation was derived"); + + if (!locale_t_cache) + init_locale_t_cache(); + + cache_entry = hash_search(locale_t_cache, &collid, HASH_ENTER, &found); + if (found) + return cache_entry->locale; + + tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for collation %u", collid); + + collcollate = NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate); + collctype = NameStr(((Form_pg_collation) GETSTRUCT(tp))->collctype); + + if (strcmp(collcollate, collctype) == 0) + { + result = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate, NULL); + if (!result) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create locale \"%s\": %m", collcollate))); + } + else + { + locale_t loc1; + + loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL); + if (!loc1) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create locale \"%s\": %m", collcollate))); + result = newlocale(LC_CTYPE_MASK, collctype, loc1); + if (!result) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create locale \"%s\": %m", collctype))); + } + + ReleaseSysCache(tp); + + cache_entry->locale = result; + + return result; +#else /* not HAVE_LOCALE_T */ + /* + * For platforms that don't support locale_t, check that we are + * dealing with the default locale. It's unlikely that we'll get + * here, but it's possible if users are creating collations even + * though they are not supported, or they are mixing builds in odd + * ways. + */ + if (!OidIsValid(collid)) + elog(ERROR, "locale operation to be invoked, but no collation was derived"); + else if (collid != DEFAULT_COLLATION_OID) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondefault collations are not supported on this platform"))); + + return 0; +#endif /* not HAVE_LOCALE_T */ +} diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index b8259febb8..cd64235438 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -23,6 +23,7 @@ #include "catalog/dependency.h" #include "catalog/indexing.h" #include "catalog/pg_authid.h" +#include "catalog/pg_collation.h" #include "catalog/pg_constraint.h" #include "catalog/pg_depend.h" #include "catalog/pg_language.h" @@ -233,7 +234,7 @@ static void get_from_clause_item(Node *jtnode, Query *query, deparse_context *context); static void get_from_clause_alias(Alias *alias, RangeTblEntry *rte, deparse_context *context); -static void get_from_clause_coldeflist(List *names, List *types, List *typmods, +static void get_from_clause_coldeflist(List *names, List *types, List *typmods, List *collations, deparse_context *context); static void get_opclass_name(Oid opclass, Oid actual_datatype, StringInfo buf); @@ -788,9 +789,11 @@ pg_get_indexdef_worker(Oid indexrelid, int colno, Oid indrelid; int keyno; Oid keycoltype; + Datum indcollDatum; Datum indclassDatum; Datum indoptionDatum; bool isnull; + oidvector *indcollation; oidvector *indclass; int2vector *indoption; StringInfoData buf; @@ -808,11 +811,17 @@ pg_get_indexdef_worker(Oid indexrelid, int colno, indrelid = idxrec->indrelid; Assert(indexrelid == idxrec->indexrelid); - /* Must get indclass and indoption the hard way */ + /* Must get indcollation, indclass, and indoption the hard way */ + indcollDatum = SysCacheGetAttr(INDEXRELID, ht_idx, + Anum_pg_index_indcollation, &isnull); + Assert(!isnull); + indcollation = (oidvector *) DatumGetPointer(indcollDatum); + indclassDatum = SysCacheGetAttr(INDEXRELID, ht_idx, Anum_pg_index_indclass, &isnull); Assert(!isnull); indclass = (oidvector *) DatumGetPointer(indclassDatum); + indoptionDatum = SysCacheGetAttr(INDEXRELID, ht_idx, Anum_pg_index_indoption, &isnull); Assert(!isnull); @@ -928,6 +937,13 @@ pg_get_indexdef_worker(Oid indexrelid, int colno, if (!attrsOnly && (!colno || colno == keyno + 1)) { + Oid coll; + + /* Add collation, if not default */ + coll = indcollation->values[keyno]; + if (coll && coll != DEFAULT_COLLATION_OID && coll != get_attcollation(indrelid, attnum)) + appendStringInfo(&buf, " COLLATE %s", generate_collation_name((indcollation->values[keyno]))); + /* Add the operator class name, if not default */ get_opclass_name(indclass->values[keyno], keycoltype, &buf); @@ -5054,6 +5070,20 @@ get_rule_expr(Node *node, deparse_context *context, } break; + case T_CollateClause: + { + CollateClause *collate = (CollateClause *) node; + Node *arg = (Node *) collate->arg; + + if (!PRETTY_PAREN(context)) + appendStringInfoChar(buf, '('); + get_rule_expr_paren(arg, context, false, node); + appendStringInfo(buf, " COLLATE %s", generate_collation_name(collate->collOid)); + if (!PRETTY_PAREN(context)) + appendStringInfoChar(buf, ')'); + } + break; + case T_CoerceViaIO: { CoerceViaIO *iocoerce = (CoerceViaIO *) node; @@ -6345,6 +6375,7 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context) get_from_clause_coldeflist(rte->eref->colnames, rte->funccoltypes, rte->funccoltypmods, + rte->funccolcollations, context); } else @@ -6543,35 +6574,42 @@ get_from_clause_alias(Alias *alias, RangeTblEntry *rte, * responsible for ensuring that an alias or AS is present before it. */ static void -get_from_clause_coldeflist(List *names, List *types, List *typmods, +get_from_clause_coldeflist(List *names, List *types, List *typmods, List *collations, deparse_context *context) { StringInfo buf = context->buf; ListCell *l1; ListCell *l2; ListCell *l3; + ListCell *l4; int i = 0; appendStringInfoChar(buf, '('); l2 = list_head(types); l3 = list_head(typmods); + l4 = list_head(collations); foreach(l1, names) { char *attname = strVal(lfirst(l1)); Oid atttypid; int32 atttypmod; + Oid attcollation; atttypid = lfirst_oid(l2); l2 = lnext(l2); atttypmod = lfirst_int(l3); l3 = lnext(l3); + attcollation = lfirst_oid(l4); + l4 = lnext(l4); if (i > 0) appendStringInfo(buf, ", "); appendStringInfo(buf, "%s %s", quote_identifier(attname), format_type_with_typemod(atttypid, atttypmod)); + if (attcollation && attcollation != DEFAULT_COLLATION_OID) + appendStringInfo(buf, " COLLATE %s", generate_collation_name(attcollation)); i++; } @@ -7039,6 +7077,39 @@ generate_operator_name(Oid operid, Oid arg1, Oid arg2) } /* + * generate_collation_name + * Compute the name to display for a collation specified by OID + * + * The result includes all necessary quoting and schema-prefixing. + */ +char * +generate_collation_name(Oid collid) +{ + HeapTuple tp; + Form_pg_collation colltup; + char *collname; + char *nspname; + char *result; + + tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for collation %u", collid); + colltup = (Form_pg_collation) GETSTRUCT(tp); + collname = NameStr(colltup->collname); + + if (!CollationIsVisible(collid)) + nspname = get_namespace_name(colltup->collnamespace); + else + nspname = NULL; + + result = quote_qualified_identifier(nspname, collname); + + ReleaseSysCache(tp); + + return result; +} + +/* * Given a C string, produce a TEXT datum. * * We assume that the input was palloc'd and may be freed. diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 7e3ff864c8..b3299b56d8 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -94,6 +94,7 @@ #include "access/gin.h" #include "access/sysattr.h" #include "catalog/index.h" +#include "catalog/pg_collation.h" #include "catalog/pg_opfamily.h" #include "catalog/pg_statistic.h" #include "catalog/pg_type.h" @@ -144,7 +145,7 @@ static double eqjoinsel_inner(Oid operator, static double eqjoinsel_semi(Oid operator, VariableStatData *vardata1, VariableStatData *vardata2); static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, - Datum lobound, Datum hibound, Oid boundstypid, + Datum lobound, Datum hibound, Oid boundstypid, Oid boundscollid, double *scaledlobound, double *scaledhibound); static double convert_numeric_to_scalar(Datum value, Oid typid); static void convert_string_to_scalar(char *value, @@ -163,10 +164,10 @@ static double convert_one_string_to_scalar(char *value, int rangelo, int rangehi); static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen, int rangelo, int rangehi); -static char *convert_string_datum(Datum value, Oid typid); +static char *convert_string_datum(Datum value, Oid typid, Oid collid); static double convert_timevalue_to_scalar(Datum value, Oid typid); static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata, - Oid sortop, Datum *min, Datum *max); + Oid sortop, Oid collation, Datum *min, Datum *max); static bool get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, @@ -513,6 +514,7 @@ scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); fmgr_info(get_opcode(operator), &opproc); + fmgr_info_collation(vardata->attcollation, &opproc); /* * If we have most-common-values info, add up the fractions of the MCV @@ -837,7 +839,7 @@ ineq_histogram_selectivity(PlannerInfo *root, */ if (convert_to_scalar(constval, consttype, &val, values[i - 1], values[i], - vardata->vartype, + vardata->vartype, vardata->attcollation, &low, &high)) { if (high <= low) @@ -1249,6 +1251,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate) /* Try to use the histogram entries to get selectivity */ fmgr_info(get_opcode(operator), &opproc); + fmgr_info_collation(DEFAULT_COLLATION_OID, &opproc); selec = histogram_selectivity(&vardata, &opproc, constval, true, 10, 1, &hist_size); @@ -2585,7 +2588,7 @@ icnlikejoinsel(PG_FUNCTION_ARGS) */ void mergejoinscansel(PlannerInfo *root, Node *clause, - Oid opfamily, int strategy, bool nulls_first, + Oid opfamily, Oid collation, int strategy, bool nulls_first, Selectivity *leftstart, Selectivity *leftend, Selectivity *rightstart, Selectivity *rightend) { @@ -2754,20 +2757,20 @@ mergejoinscansel(PlannerInfo *root, Node *clause, /* Try to get ranges of both inputs */ if (!isgt) { - if (!get_variable_range(root, &leftvar, lstatop, + if (!get_variable_range(root, &leftvar, lstatop, collation, &leftmin, &leftmax)) goto fail; /* no range available from stats */ - if (!get_variable_range(root, &rightvar, rstatop, + if (!get_variable_range(root, &rightvar, rstatop, collation, &rightmin, &rightmax)) goto fail; /* no range available from stats */ } else { /* need to swap the max and min */ - if (!get_variable_range(root, &leftvar, lstatop, + if (!get_variable_range(root, &leftvar, lstatop, collation, &leftmax, &leftmin)) goto fail; /* no range available from stats */ - if (!get_variable_range(root, &rightvar, rstatop, + if (!get_variable_range(root, &rightvar, rstatop, collation, &rightmax, &rightmin)) goto fail; /* no range available from stats */ } @@ -3368,7 +3371,7 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets) */ static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, - Datum lobound, Datum hibound, Oid boundstypid, + Datum lobound, Datum hibound, Oid boundstypid, Oid boundscollid, double *scaledlobound, double *scaledhibound) { /* @@ -3421,9 +3424,9 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, case TEXTOID: case NAMEOID: { - char *valstr = convert_string_datum(value, valuetypid); - char *lostr = convert_string_datum(lobound, boundstypid); - char *histr = convert_string_datum(hibound, boundstypid); + char *valstr = convert_string_datum(value, valuetypid, boundscollid); + char *lostr = convert_string_datum(lobound, boundstypid, boundscollid); + char *histr = convert_string_datum(hibound, boundstypid, boundscollid); convert_string_to_scalar(valstr, scaledvalue, lostr, scaledlobound, @@ -3667,7 +3670,7 @@ convert_one_string_to_scalar(char *value, int rangelo, int rangehi) * before continuing, so as to generate correct locale-specific results. */ static char * -convert_string_datum(Datum value, Oid typid) +convert_string_datum(Datum value, Oid typid, Oid collid) { char *val; @@ -3700,7 +3703,7 @@ convert_string_datum(Datum value, Oid typid) return NULL; } - if (!lc_collate_is_c()) + if (!lc_collate_is_c(collid)) { char *xfrmstr; size_t xfrmlen; @@ -4099,6 +4102,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, vardata->rel = find_base_rel(root, var->varno); vardata->atttype = var->vartype; vardata->atttypmod = var->vartypmod; + vardata->attcollation = var->varcollid; vardata->isunique = has_unique_index(vardata->rel, var->varattno); rte = root->simple_rte_array[var->varno]; @@ -4184,6 +4188,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, vardata->var = node; vardata->atttype = exprType(node); vardata->atttypmod = exprTypmod(node); + vardata->attcollation = exprCollation(node); if (onerel) { @@ -4392,7 +4397,7 @@ get_variable_numdistinct(VariableStatData *vardata) * be "<" not ">", as only the former is likely to be found in pg_statistic. */ static bool -get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, +get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Oid collation, Datum *min, Datum *max) { Datum tmin = 0; @@ -4477,6 +4482,7 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, FmgrInfo opproc; fmgr_info(get_opcode(sortop), &opproc); + fmgr_info_collation(collation, &opproc); for (i = 0; i < nvalues; i++) { @@ -5482,7 +5488,7 @@ make_greater_string(const Const *str_const, FmgrInfo *ltproc) { workstr = TextDatumGetCString(str_const->constvalue); len = strlen(workstr); - if (lc_collate_is_c() || len == 0) + if (lc_collate_is_c(ltproc->fn_collation) || len == 0) cmpstr = str_const->constvalue; else { @@ -5494,11 +5500,11 @@ make_greater_string(const Const *str_const, FmgrInfo *ltproc) char *best; best = "Z"; - if (varstr_cmp(best, 1, "z", 1) < 0) + if (varstr_cmp(best, 1, "z", 1, DEFAULT_COLLATION_OID) < 0) best = "z"; - if (varstr_cmp(best, 1, "y", 1) < 0) + if (varstr_cmp(best, 1, "y", 1, DEFAULT_COLLATION_OID) < 0) best = "y"; - if (varstr_cmp(best, 1, "9", 1) < 0) + if (varstr_cmp(best, 1, "9", 1, DEFAULT_COLLATION_OID) < 0) best = "9"; suffixchar = *best; } diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c index 08be966249..1c0ef921a7 100644 --- a/src/backend/utils/adt/varchar.c +++ b/src/backend/utils/adt/varchar.c @@ -737,7 +737,8 @@ bpcharlt(PG_FUNCTION_ARGS) len1 = bcTruelen(arg1); len2 = bcTruelen(arg2); - cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2); + cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + PG_GET_COLLATION()); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -757,7 +758,8 @@ bpcharle(PG_FUNCTION_ARGS) len1 = bcTruelen(arg1); len2 = bcTruelen(arg2); - cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2); + cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + PG_GET_COLLATION()); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -777,7 +779,8 @@ bpchargt(PG_FUNCTION_ARGS) len1 = bcTruelen(arg1); len2 = bcTruelen(arg2); - cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2); + cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + PG_GET_COLLATION()); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -797,7 +800,8 @@ bpcharge(PG_FUNCTION_ARGS) len1 = bcTruelen(arg1); len2 = bcTruelen(arg2); - cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2); + cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + PG_GET_COLLATION()); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -817,7 +821,8 @@ bpcharcmp(PG_FUNCTION_ARGS) len1 = bcTruelen(arg1); len2 = bcTruelen(arg2); - cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2); + cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + PG_GET_COLLATION()); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -837,7 +842,8 @@ bpchar_larger(PG_FUNCTION_ARGS) len1 = bcTruelen(arg1); len2 = bcTruelen(arg2); - cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2); + cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + PG_GET_COLLATION()); PG_RETURN_BPCHAR_P((cmp >= 0) ? arg1 : arg2); } @@ -854,7 +860,8 @@ bpchar_smaller(PG_FUNCTION_ARGS) len1 = bcTruelen(arg1); len2 = bcTruelen(arg2); - cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2); + cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + PG_GET_COLLATION()); PG_RETURN_BPCHAR_P((cmp <= 0) ? arg1 : arg2); } diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index e111d2650b..8a7a3cf45b 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -18,6 +18,7 @@ #include <limits.h> #include "access/tuptoaster.h" +#include "catalog/pg_collation.h" #include "catalog/pg_type.h" #include "libpq/md5.h" #include "libpq/pqformat.h" @@ -55,7 +56,7 @@ typedef struct #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n)) #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x) -static int text_cmp(text *arg1, text *arg2); +static int text_cmp(text *arg1, text *arg2, Oid collid); static int32 text_length(Datum str); static int text_position(text *t1, text *t2); static void text_position_setup(text *t1, text *t2, TextPositionState *state); @@ -1274,7 +1275,7 @@ text_position_cleanup(TextPositionState *state) * whether arg1 is less than, equal to, or greater than arg2. */ int -varstr_cmp(char *arg1, int len1, char *arg2, int len2) +varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid) { int result; @@ -1284,7 +1285,7 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2) * slower, so we optimize the case where LC_COLLATE is C. We also try to * optimize relatively-short strings by avoiding palloc/pfree overhead. */ - if (lc_collate_is_c()) + if (lc_collate_is_c(collid)) { result = memcmp(arg1, arg2, Min(len1, len2)); if ((result == 0) && (len1 != len2)) @@ -1298,6 +1299,10 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2) char a2buf[STACKBUFLEN]; char *a1p, *a2p; + pg_locale_t mylocale = 0; + + if (collid != DEFAULT_COLLATION_OID) + mylocale = pg_newlocale_from_collation(collid); #ifdef WIN32 /* Win32 does not have UTF-8, so we need to map to UTF-16 */ @@ -1398,6 +1403,11 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2) memcpy(a2p, arg2, len2); a2p[len2] = '\0'; +#ifdef HAVE_LOCALE_T + if (mylocale) + result = strcoll_l(a1p, a2p, mylocale); + else +#endif result = strcoll(a1p, a2p); /* @@ -1424,7 +1434,7 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2) * Returns -1, 0 or 1 */ static int -text_cmp(text *arg1, text *arg2) +text_cmp(text *arg1, text *arg2, Oid collid) { char *a1p, *a2p; @@ -1437,7 +1447,7 @@ text_cmp(text *arg1, text *arg2) len1 = VARSIZE_ANY_EXHDR(arg1); len2 = VARSIZE_ANY_EXHDR(arg2); - return varstr_cmp(a1p, len1, a2p, len2); + return varstr_cmp(a1p, len1, a2p, len2, collid); } /* @@ -1519,7 +1529,7 @@ text_lt(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_PP(1); bool result; - result = (text_cmp(arg1, arg2) < 0); + result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -1534,7 +1544,7 @@ text_le(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_PP(1); bool result; - result = (text_cmp(arg1, arg2) <= 0); + result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -1549,7 +1559,7 @@ text_gt(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_PP(1); bool result; - result = (text_cmp(arg1, arg2) > 0); + result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -1564,7 +1574,7 @@ text_ge(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_PP(1); bool result; - result = (text_cmp(arg1, arg2) >= 0); + result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -1579,7 +1589,7 @@ bttextcmp(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_PP(1); int32 result; - result = text_cmp(arg1, arg2); + result = text_cmp(arg1, arg2, PG_GET_COLLATION()); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -1595,7 +1605,7 @@ text_larger(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_PP(1); text *result; - result = ((text_cmp(arg1, arg2) > 0) ? arg1 : arg2); + result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2); PG_RETURN_TEXT_P(result); } @@ -1607,7 +1617,7 @@ text_smaller(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_PP(1); text *result; - result = ((text_cmp(arg1, arg2) < 0) ? arg1 : arg2); + result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2); PG_RETURN_TEXT_P(result); } diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index 0a4144ba54..6af23429ad 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -20,6 +20,7 @@ #include "bootstrap/bootstrap.h" #include "catalog/pg_amop.h" #include "catalog/pg_amproc.h" +#include "catalog/pg_collation.h" #include "catalog/pg_constraint.h" #include "catalog/pg_namespace.h" #include "catalog/pg_opclass.h" @@ -903,6 +904,33 @@ get_atttypmod(Oid relid, AttrNumber attnum) } /* + * get_attcollation + * + * Given the relation id and the attribute number, + * return the "attcollation" field from the attribute relation. + */ +Oid +get_attcollation(Oid relid, AttrNumber attnum) +{ + HeapTuple tp; + + tp = SearchSysCache2(ATTNUM, + ObjectIdGetDatum(relid), + Int16GetDatum(attnum)); + if (HeapTupleIsValid(tp)) + { + Form_pg_attribute att_tup = (Form_pg_attribute) GETSTRUCT(tp); + Oid result; + + result = att_tup->attcollation; + ReleaseSysCache(tp); + return result; + } + else + return InvalidOid; +} + +/* * get_atttypetypmod * * A two-fer: given the relation id and the attribute number, @@ -931,6 +959,36 @@ get_atttypetypmod(Oid relid, AttrNumber attnum, ReleaseSysCache(tp); } +/* ---------- COLLATION CACHE ---------- */ + +/* + * get_collation_name + * Returns the name of a given pg_collation entry. + * + * Returns a palloc'd copy of the string, or NULL if no such constraint. + * + * NOTE: since collation name is not unique, be wary of code that uses this + * for anything except preparing error messages. + */ +char * +get_collation_name(Oid colloid) +{ + HeapTuple tp; + + tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(colloid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_collation colltup = (Form_pg_collation) GETSTRUCT(tp); + char *result; + + result = pstrdup(NameStr(colltup->collname)); + ReleaseSysCache(tp); + return result; + } + else + return NULL; +} + /* ---------- CONSTRAINT CACHE ---------- */ /* @@ -2523,6 +2581,42 @@ get_typmodout(Oid typid) } #endif /* NOT_USED */ +/* + * get_typcollation + * + * Given the type OID, return the type's typcollation attribute. + */ +Oid +get_typcollation(Oid typid) +{ + HeapTuple tp; + + tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_type typtup = (Form_pg_type) GETSTRUCT(tp); + Oid result; + + result = typtup->typcollation; + ReleaseSysCache(tp); + return result; + } + else + return InvalidOid; +} + + +/* + * type_is_collatable + * + * Return whether the type cares about collations + */ +bool +type_is_collatable(Oid typid) +{ + return OidIsValid(get_typcollation(typid)); +} + /* ---------- STATISTICS CACHE ---------- */ diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 3b40acf4df..90464fd066 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -976,9 +976,11 @@ RelationInitIndexAccessInfo(Relation relation) { HeapTuple tuple; Form_pg_am aform; + Datum indcollDatum; Datum indclassDatum; Datum indoptionDatum; bool isnull; + oidvector *indcoll; oidvector *indclass; int2vector *indoption; MemoryContext indexcxt; @@ -1061,10 +1063,26 @@ RelationInitIndexAccessInfo(Relation relation) relation->rd_supportinfo = NULL; } + relation->rd_indcollation = (Oid *) + MemoryContextAllocZero(indexcxt, natts * sizeof(Oid)); + relation->rd_indoption = (int16 *) MemoryContextAllocZero(indexcxt, natts * sizeof(int16)); /* + * indcollation cannot be referenced directly through the C struct, because it + * comes after the variable-width indkey field. Must extract the datum + * the hard way... + */ + indcollDatum = fastgetattr(relation->rd_indextuple, + Anum_pg_index_indcollation, + GetPgIndexDescriptor(), + &isnull); + Assert(!isnull); + indcoll = (oidvector *) DatumGetPointer(indcollDatum); + memcpy(relation->rd_indcollation, indcoll->values, natts * sizeof(Oid)); + + /* * indclass cannot be referenced directly through the C struct, because it * comes after the variable-width indkey field. Must extract the datum * the hard way... @@ -3988,6 +4006,7 @@ load_relcache_init_file(bool shared) RegProcedure *support; int nsupport; int16 *indoption; + Oid *indcollation; /* Count nailed indexes to ensure we have 'em all */ if (rel->rd_isnailed) @@ -4054,6 +4073,16 @@ load_relcache_init_file(bool shared) rel->rd_support = support; + /* next, read the vector of collation OIDs */ + if (fread(&len, 1, sizeof(len), fp) != sizeof(len)) + goto read_failed; + + indcollation = (Oid *) MemoryContextAlloc(indexcxt, len); + if (fread(indcollation, 1, len, fp) != len) + goto read_failed; + + rel->rd_indcollation = indcollation; + /* finally, read the vector of indoption values */ if (fread(&len, 1, sizeof(len), fp) != sizeof(len)) goto read_failed; @@ -4087,6 +4116,7 @@ load_relcache_init_file(bool shared) Assert(rel->rd_support == NULL); Assert(rel->rd_supportinfo == NULL); Assert(rel->rd_indoption == NULL); + Assert(rel->rd_indcollation == NULL); } /* @@ -4305,6 +4335,11 @@ write_relcache_init_file(bool shared) relform->relnatts * (am->amsupport * sizeof(RegProcedure)), fp); + /* next, write the vector of collation OIDs */ + write_item(rel->rd_indcollation, + relform->relnatts * sizeof(Oid), + fp); + /* finally, write the vector of indoption values */ write_item(rel->rd_indoption, relform->relnatts * sizeof(int16), diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c index 191953b972..715341f842 100644 --- a/src/backend/utils/cache/syscache.c +++ b/src/backend/utils/cache/syscache.c @@ -28,6 +28,7 @@ #include "catalog/pg_auth_members.h" #include "catalog/pg_authid.h" #include "catalog/pg_cast.h" +#include "catalog/pg_collation.h" #include "catalog/pg_constraint.h" #include "catalog/pg_conversion.h" #include "catalog/pg_database.h" @@ -267,6 +268,28 @@ static const struct cachedesc cacheinfo[] = { }, 64 }, + {CollationRelationId, /* COLLNAMEENCNSP */ + CollationNameEncNspIndexId, + 3, + { + Anum_pg_collation_collname, + Anum_pg_collation_collencoding, + Anum_pg_collation_collnamespace, + 0 + }, + 256 + }, + {CollationRelationId, /* COLLOID */ + CollationOidIndexId, + 1, + { + ObjectIdAttributeNumber, + 0, + 0, + 0 + }, + 256 + }, {ConversionRelationId, /* CONDEFAULT */ ConversionDefaultIndexId, 4, diff --git a/src/backend/utils/errcodes.txt b/src/backend/utils/errcodes.txt index 6f1d766859..0315f6b6f0 100644 --- a/src/backend/utils/errcodes.txt +++ b/src/backend/utils/errcodes.txt @@ -310,6 +310,8 @@ Section: Class 42 - Syntax Error or Access Rule Violation 42939 E ERRCODE_RESERVED_NAME reserved_name 42804 E ERRCODE_DATATYPE_MISMATCH datatype_mismatch 42P18 E ERRCODE_INDETERMINATE_DATATYPE indeterminate_datatype +42P21 E ERRCODE_COLLATION_MISMATCH collation_mismatch +42P22 E ERRCODE_INDETERMINATE_COLLATION indeterminate_collation 42809 E ERRCODE_WRONG_OBJECT_TYPE wrong_object_type # Note: for ERRCODE purposes, we divide namable objects into these categories: diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c index 54d50e9637..d05e4d2dd8 100644 --- a/src/backend/utils/fmgr/fmgr.c +++ b/src/backend/utils/fmgr/fmgr.c @@ -192,6 +192,7 @@ fmgr_info_cxt_security(Oid functionId, FmgrInfo *finfo, MemoryContext mcxt, * elogs. */ finfo->fn_oid = InvalidOid; + finfo->fn_collation = InvalidOid; finfo->fn_extra = NULL; finfo->fn_mcxt = mcxt; finfo->fn_expr = NULL; /* caller may set this later */ @@ -420,6 +421,25 @@ fmgr_info_other_lang(Oid functionId, FmgrInfo *finfo, HeapTuple procedureTuple) } /* + * Initialize the fn_collation field + */ +void +fmgr_info_collation(Oid collationId, FmgrInfo *finfo) +{ + finfo->fn_collation = collationId; +} + +/* + * Initialize the fn_expr field and set the collation based on it + */ +void +fmgr_info_expr(Node *expr, FmgrInfo *finfo) +{ + finfo->fn_expr = expr; + finfo->fn_collation = exprCollation(expr); +} + +/* * Fetch and validate the information record for the given external function. * The function is specified by a handle for the containing library * (obtained from load_external_function) as well as the function name. @@ -1273,6 +1293,52 @@ DirectFunctionCall9(PGFunction func, Datum arg1, Datum arg2, return result; } +Datum +DirectFunctionCall1WithCollation(PGFunction func, Oid collation, Datum arg1) +{ + FunctionCallInfoData fcinfo; + FmgrInfo flinfo; + Datum result; + + InitFunctionCallInfoData(fcinfo, &flinfo, 1, NULL, NULL); + fcinfo.flinfo->fn_collation = collation; + + fcinfo.arg[0] = arg1; + fcinfo.argnull[0] = false; + + result = (*func) (&fcinfo); + + /* Check for null result, since caller is clearly not expecting one */ + if (fcinfo.isnull) + elog(ERROR, "function %p returned NULL", (void *) func); + + return result; +} + +Datum +DirectFunctionCall2WithCollation(PGFunction func, Oid collation, Datum arg1, Datum arg2) +{ + FunctionCallInfoData fcinfo; + FmgrInfo flinfo; + Datum result; + + InitFunctionCallInfoData(fcinfo, &flinfo, 2, NULL, NULL); + fcinfo.flinfo->fn_collation = collation; + + fcinfo.arg[0] = arg1; + fcinfo.arg[1] = arg2; + fcinfo.argnull[0] = false; + fcinfo.argnull[1] = false; + + result = (*func) (&fcinfo); + + /* Check for null result, since caller is clearly not expecting one */ + if (fcinfo.isnull) + elog(ERROR, "function %p returned NULL", (void *) func); + + return result; +} + /* * These are for invocation of a previously-looked-up function with a diff --git a/src/backend/utils/fmgr/funcapi.c b/src/backend/utils/fmgr/funcapi.c index e32c716392..321b4e7f8f 100644 --- a/src/backend/utils/fmgr/funcapi.c +++ b/src/backend/utils/fmgr/funcapi.c @@ -468,7 +468,6 @@ resolve_polymorphic_tupdesc(TupleDesc tupdesc, oidvector *declared_args, /* If nothing found, parser messed up */ if (!OidIsValid(anyelement_type) && !OidIsValid(anyarray_type)) return false; - /* If needed, deduce one polymorphic type from the other */ if (have_anyelement_result && !OidIsValid(anyelement_type)) anyelement_type = resolve_generic_type(ANYELEMENTOID, @@ -511,6 +510,9 @@ resolve_polymorphic_tupdesc(TupleDesc tupdesc, oidvector *declared_args, default: break; } + /* Set collation based on actual argument types */ + TupleDescInitEntryCollation(tupdesc, i + 1, + exprCollation(call_expr)); } return true; diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index a04181286a..5ee74f747d 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -629,7 +629,7 @@ perform_default_encoding_conversion(const char *src, int len, bool is_client_to_ * zero-terminated. The output will be zero-terminated iff there is room. */ size_t -wchar2char(char *to, const wchar_t *from, size_t tolen) +wchar2char(char *to, const wchar_t *from, size_t tolen, Oid collation) { size_t result; @@ -660,7 +660,7 @@ wchar2char(char *to, const wchar_t *from, size_t tolen) else #endif /* WIN32 */ { - Assert(!lc_ctype_is_c()); + Assert(!lc_ctype_is_c(collation)); result = wcstombs(to, from, tolen); } return result; @@ -676,7 +676,7 @@ wchar2char(char *to, const wchar_t *from, size_t tolen) * The output will be zero-terminated iff there is room. */ size_t -char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen) +char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, Oid collation) { size_t result; @@ -711,7 +711,7 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen) /* mbstowcs requires ending '\0' */ char *str = pnstrdup(from, fromlen); - Assert(!lc_ctype_is_c()); + Assert(!lc_ctype_is_c(collation)); result = mbstowcs(to, str, tolen); pfree(str); } @@ -983,7 +983,7 @@ GetPlatformEncoding(void) if (PlatformEncoding == NULL) { /* try to determine encoding of server's environment locale */ - int encoding = pg_get_encoding_from_locale(""); + int encoding = pg_get_encoding_from_locale("", true); if (encoding < 0) encoding = PG_SQL_ASCII; diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index d20a3b3739..f2449ea6b1 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -582,7 +582,7 @@ tuplesort_begin_common(int workMem, bool randomAccess) Tuplesortstate * tuplesort_begin_heap(TupleDesc tupDesc, int nkeys, AttrNumber *attNums, - Oid *sortOperators, bool *nullsFirstFlags, + Oid *sortOperators, Oid *collations, bool *nullsFirstFlags, int workMem, bool randomAccess) { Tuplesortstate *state = tuplesort_begin_common(workMem, randomAccess); @@ -640,6 +640,10 @@ tuplesort_begin_heap(TupleDesc tupDesc, sortFunction, (Datum) 0); + if (collations) + ScanKeyEntryInitializeCollation(&state->scanKeys[i], + collations[i]); + /* However, we use btree's conventions for encoding directionality */ if (reverse) state->scanKeys[i].sk_flags |= SK_BT_DESC; @@ -791,7 +795,7 @@ tuplesort_begin_index_hash(Relation indexRel, Tuplesortstate * tuplesort_begin_datum(Oid datumType, - Oid sortOperator, bool nullsFirstFlag, + Oid sortOperator, Oid sortCollation, bool nullsFirstFlag, int workMem, bool randomAccess) { Tuplesortstate *state = tuplesort_begin_common(workMem, randomAccess); @@ -832,6 +836,7 @@ tuplesort_begin_datum(Oid datumType, elog(ERROR, "operator %u is not a valid ordering operator", sortOperator); fmgr_info(sortFunction, &state->sortOpFn); + fmgr_info_collation(sortCollation, &state->sortOpFn); /* set ordering flags */ state->sortFnFlags = reverse ? SK_BT_DESC : 0; |