From ed87e1980706975e7aa412bee200087774c5ff22 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 22 Sep 2017 11:35:12 -0400 Subject: Mop-up for commit 85feb77aa09cda9ff3e12cf95c757c499dc25343. Adjust commentary in regc_pg_locale.c to remove mention of the possibility of not having functions, since we no longer consider that. Eliminate duplicate code in wparser_def.c by generalizing the p_iswhat macro to take a parameter saying what to return for non-ASCII chars in C locale. (That's not really a consequence of the USE_WIDE_UPPER_LOWER-ectomy, but I noticed it while doing that.) --- src/backend/tsearch/wparser_def.c | 113 ++++++++++---------------------------- 1 file changed, 28 insertions(+), 85 deletions(-) (limited to 'src/backend/tsearch') diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c index c118357336..8450e1c08e 100644 --- a/src/backend/tsearch/wparser_def.c +++ b/src/backend/tsearch/wparser_def.c @@ -427,94 +427,45 @@ TParserCopyClose(TParser *prs) * - if locale is C then we use pgwstr instead of wstr. */ -#define p_iswhat(type) \ +#define p_iswhat(type, nonascii) \ + \ static int \ -p_is##type(TParser *prs) { \ - Assert( prs->state ); \ - if ( prs->usewide ) \ +p_is##type(TParser *prs) \ +{ \ + Assert(prs->state); \ + if (prs->usewide) \ { \ - if ( prs->pgwstr ) \ + if (prs->pgwstr) \ { \ unsigned int c = *(prs->pgwstr + prs->state->poschar); \ - if ( c > 0x7f ) \ - return 0; \ - return is##type( c ); \ + if (c > 0x7f) \ + return nonascii; \ + return is##type(c); \ } \ - return isw##type( *( prs->wstr + prs->state->poschar ) ); \ + return isw##type(*(prs->wstr + prs->state->poschar)); \ } \ - \ - return is##type( *(unsigned char*)( prs->str + prs->state->posbyte ) ); \ -} \ + return is##type(*(unsigned char *) (prs->str + prs->state->posbyte)); \ +} \ \ static int \ -p_isnot##type(TParser *prs) { \ +p_isnot##type(TParser *prs) \ +{ \ return !p_is##type(prs); \ } -static int -p_isalnum(TParser *prs) -{ - Assert(prs->state); - - if (prs->usewide) - { - if (prs->pgwstr) - { - unsigned int c = *(prs->pgwstr + prs->state->poschar); - - /* - * any non-ascii symbol with multibyte encoding with C-locale is - * an alpha character - */ - if (c > 0x7f) - return 1; - - return isalnum(c); - } - - return iswalnum(*(prs->wstr + prs->state->poschar)); - } - - return isalnum(*(unsigned char *) (prs->str + prs->state->posbyte)); -} -static int -p_isnotalnum(TParser *prs) -{ - return !p_isalnum(prs); -} - -static int -p_isalpha(TParser *prs) -{ - Assert(prs->state); - - if (prs->usewide) - { - if (prs->pgwstr) - { - unsigned int c = *(prs->pgwstr + prs->state->poschar); - - /* - * any non-ascii symbol with multibyte encoding with C-locale is - * an alpha character - */ - if (c > 0x7f) - return 1; - - return isalpha(c); - } - - return iswalpha(*(prs->wstr + prs->state->poschar)); - } - - return isalpha(*(unsigned char *) (prs->str + prs->state->posbyte)); -} - -static int -p_isnotalpha(TParser *prs) -{ - return !p_isalpha(prs); -} +/* + * In C locale with a multibyte encoding, any non-ASCII symbol is considered + * an alpha character, but not a member of other char classes. + */ +p_iswhat(alnum, 1) +p_iswhat(alpha, 1) +p_iswhat(digit, 0) +p_iswhat(lower, 0) +p_iswhat(print, 0) +p_iswhat(punct, 0) +p_iswhat(space, 0) +p_iswhat(upper, 0) +p_iswhat(xdigit, 0) /* p_iseq should be used only for ascii symbols */ @@ -525,14 +476,6 @@ p_iseq(TParser *prs, char c) return ((prs->state->charlen == 1 && *(prs->str + prs->state->posbyte) == c)) ? 1 : 0; } -p_iswhat(digit) -p_iswhat(lower) -p_iswhat(print) -p_iswhat(punct) -p_iswhat(space) -p_iswhat(upper) -p_iswhat(xdigit) - static int p_isEOF(TParser *prs) { -- cgit v1.2.1