summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2014-09-15 16:18:00 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2014-09-17 20:37:48 -0700
commit000737024019deb7b59a14f336ded508bd271352 (patch)
tree5fa4c8a9f18258abe7c1dfebadd57e177d48f67e
parent543fb3e3bc68eb689c91f5f68c9e25d3f703260b (diff)
downloadgrep-000737024019deb7b59a14f336ded508bd271352.tar.gz
grep: -z no longer considers '\200' to be binary data
This avoids a problem when using grep -z in a Windows-1252 locale. Plus, it lets 'grep -z' run a bit faster. * NEWS: Document this. * src/grep.c (buffer_textbin): Don't look for '\200' if -z. * tests/pcre-z: Test for new behavior.
-rw-r--r--NEWS2
-rw-r--r--src/grep.c12
-rwxr-xr-xtests/pcre-z4
3 files changed, 9 insertions, 9 deletions
diff --git a/NEWS b/NEWS
index 9377d7d6..51b63fb4 100644
--- a/NEWS
+++ b/NEWS
@@ -26,6 +26,8 @@ GNU grep NEWS -*- outline -*-
In locales with multibyte character encodings other than UTF-8,
grep -P now reports an error and exits instead of misbehaving.
+ grep -z no longer automatically treats the byte '\200' as binary data.
+
* Noteworthy changes in release 2.20 (2014-06-03) [stable]
** Bug fixes
diff --git a/src/grep.c b/src/grep.c
index 1aa64db2..1c6fee87 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -462,14 +462,10 @@ textbin_is_binary (enum textbin textbin)
static enum textbin
buffer_textbin (char const *buf, size_t size)
{
- char badbyte = eolbyte ? '\0' : '\200';
+ if (eolbyte && memchr (buf, '\0', size))
+ return TEXTBIN_BINARY;
- if (MB_CUR_MAX <= 1)
- {
- if (memchr (buf, badbyte, size))
- return TEXTBIN_BINARY;
- }
- else
+ if (1 < MB_CUR_MAX)
{
mbstate_t mbs = { 0 };
size_t clen;
@@ -477,8 +473,6 @@ buffer_textbin (char const *buf, size_t size)
for (p = buf; p < buf + size; p += clen)
{
- if (*p == badbyte)
- return TEXTBIN_BINARY;
clen = mb_clen (p, buf + size - p, &mbs);
if ((size_t) -2 <= clen)
return clen == (size_t) -2 ? TEXTBIN_UNKNOWN : TEXTBIN_BINARY;
diff --git a/tests/pcre-z b/tests/pcre-z
index 99ebc43c..6bbde947 100755
--- a/tests/pcre-z
+++ b/tests/pcre-z
@@ -20,4 +20,8 @@ grep -Pz "$REGEX" in > out 2>err || fail=1
compare exp out || fail=1
compare /dev/null err || fail=1
+printf '\200\0' >in0
+LC_ALL=C grep -z . in0 >out || fail=1
+compare in0 out || fail=1
+
Exit $fail