diff options
author | Petr Pisar <ppisar@redhat.com> | 2012-10-03 11:10:21 +0200 |
---|---|---|
committer | Paolo Bonzini <bonzini@gnu.org> | 2012-10-03 11:22:09 +0200 |
commit | 67436786c110bbb565f0c1b96f1ca1d4586e1c8d (patch) | |
tree | b931a7343cffcc5c647a0a7dff22a696d1a77523 | |
parent | c1996dd3c98a81cf700ba11e1521ca3d964aeabe (diff) | |
download | grep-67436786c110bbb565f0c1b96f1ca1d4586e1c8d.tar.gz |
pcresearch: set UTF-8 flag correctly for UTF-8 locales
Otherwise, Unicode properties (\p{XXX}) do not work with characters
outside the 7-bit ASCII character set.
* src/pcresearch.c (Pcompile): Look for UTF-8 locales and set PCRE_UTF8
if one is found.
-rw-r--r-- | NEWS | 6 | ||||
-rw-r--r-- | src/pcresearch.c | 8 |
2 files changed, 14 insertions, 0 deletions
@@ -2,6 +2,12 @@ GNU grep NEWS -*- outline -*- * Noteworthy changes in release ?.? (????-??-??) [?] +** Bug fixes + + While multi-byte mode is only supported by PCRE with UTF-8 locales, + grep did not activate it. This can cause failures to match multibyte + characters against some regular expressions, especially those including + the '.' or '\p' metacharacters. * Noteworthy changes in release 2.14 (2012-08-20) [stable] diff --git a/src/pcresearch.c b/src/pcresearch.c index 2994e653..3539b580 100644 --- a/src/pcresearch.c +++ b/src/pcresearch.c @@ -25,6 +25,9 @@ #elif HAVE_PCRE_PCRE_H # include <pcre/pcre.h> #endif +#if HAVE_LANGINFO_CODESET +# include <langinfo.h> +#endif #if HAVE_LIBPCRE /* Compiled internal form of a Perl regular expression. */ @@ -51,6 +54,11 @@ Pcompile (char const *pattern, size_t size) char const *p; char const *pnul; +#if defined HAVE_LANGINFO_CODESET + if (!strcmp(nl_langinfo(CODESET), "UTF-8")) + flags |= PCRE_UTF8; +#endif + /* FIXME: Remove these restrictions. */ if (memchr(pattern, '\n', size)) error (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern")); |