summaryrefslogtreecommitdiff
path: root/src/grep.c
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2014-09-15 16:18:00 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2014-09-17 20:37:48 -0700
commit000737024019deb7b59a14f336ded508bd271352 (patch)
tree5fa4c8a9f18258abe7c1dfebadd57e177d48f67e /src/grep.c
parent543fb3e3bc68eb689c91f5f68c9e25d3f703260b (diff)
downloadgrep-000737024019deb7b59a14f336ded508bd271352.tar.gz
grep: -z no longer considers '\200' to be binary data
This avoids a problem when using grep -z in a Windows-1252 locale. Plus, it lets 'grep -z' run a bit faster. * NEWS: Document this. * src/grep.c (buffer_textbin): Don't look for '\200' if -z. * tests/pcre-z: Test for new behavior.
Diffstat (limited to 'src/grep.c')
-rw-r--r--src/grep.c12
1 files changed, 3 insertions, 9 deletions
diff --git a/src/grep.c b/src/grep.c
index 1aa64db2..1c6fee87 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -462,14 +462,10 @@ textbin_is_binary (enum textbin textbin)
static enum textbin
buffer_textbin (char const *buf, size_t size)
{
- char badbyte = eolbyte ? '\0' : '\200';
+ if (eolbyte && memchr (buf, '\0', size))
+ return TEXTBIN_BINARY;
- if (MB_CUR_MAX <= 1)
- {
- if (memchr (buf, badbyte, size))
- return TEXTBIN_BINARY;
- }
- else
+ if (1 < MB_CUR_MAX)
{
mbstate_t mbs = { 0 };
size_t clen;
@@ -477,8 +473,6 @@ buffer_textbin (char const *buf, size_t size)
for (p = buf; p < buf + size; p += clen)
{
- if (*p == badbyte)
- return TEXTBIN_BINARY;
clen = mb_clen (p, buf + size - p, &mbs);
if ((size_t) -2 <= clen)
return clen == (size_t) -2 ? TEXTBIN_UNKNOWN : TEXTBIN_BINARY;