summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPetr Pisar <ppisar@redhat.com>2012-10-03 11:10:21 +0200
committerPaolo Bonzini <bonzini@gnu.org>2012-10-03 11:22:09 +0200
commit67436786c110bbb565f0c1b96f1ca1d4586e1c8d (patch)
treeb931a7343cffcc5c647a0a7dff22a696d1a77523
parentc1996dd3c98a81cf700ba11e1521ca3d964aeabe (diff)
downloadgrep-67436786c110bbb565f0c1b96f1ca1d4586e1c8d.tar.gz
pcresearch: set UTF-8 flag correctly for UTF-8 locales
Otherwise, Unicode properties (\p{XXX}) do not work with characters outside the 7-bit ASCII character set. * src/pcresearch.c (Pcompile): Look for UTF-8 locales and set PCRE_UTF8 if one is found.
-rw-r--r--NEWS6
-rw-r--r--src/pcresearch.c8
2 files changed, 14 insertions, 0 deletions
diff --git a/NEWS b/NEWS
index 9309f622..bc669b98 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,12 @@ GNU grep NEWS -*- outline -*-
* Noteworthy changes in release ?.? (????-??-??) [?]
+** Bug fixes
+
+ While multi-byte mode is only supported by PCRE with UTF-8 locales,
+ grep did not activate it. This can cause failures to match multibyte
+ characters against some regular expressions, especially those including
+ the '.' or '\p' metacharacters.
* Noteworthy changes in release 2.14 (2012-08-20) [stable]
diff --git a/src/pcresearch.c b/src/pcresearch.c
index 2994e653..3539b580 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -25,6 +25,9 @@
#elif HAVE_PCRE_PCRE_H
# include <pcre/pcre.h>
#endif
+#if HAVE_LANGINFO_CODESET
+# include <langinfo.h>
+#endif
#if HAVE_LIBPCRE
/* Compiled internal form of a Perl regular expression. */
@@ -51,6 +54,11 @@ Pcompile (char const *pattern, size_t size)
char const *p;
char const *pnul;
+#if defined HAVE_LANGINFO_CODESET
+ if (!strcmp(nl_langinfo(CODESET), "UTF-8"))
+ flags |= PCRE_UTF8;
+#endif
+
/* FIXME: Remove these restrictions. */
if (memchr(pattern, '\n', size))
error (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern"));