diff options
author | Paul Eggert <eggert@cs.ucla.edu> | 2014-09-15 16:18:00 -0700 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2014-09-17 20:37:48 -0700 |
commit | 000737024019deb7b59a14f336ded508bd271352 (patch) | |
tree | 5fa4c8a9f18258abe7c1dfebadd57e177d48f67e /src/grep.c | |
parent | 543fb3e3bc68eb689c91f5f68c9e25d3f703260b (diff) | |
download | grep-000737024019deb7b59a14f336ded508bd271352.tar.gz |
grep: -z no longer considers '\200' to be binary data
This avoids a problem when using grep -z in a Windows-1252 locale.
Plus, it lets 'grep -z' run a bit faster.
* NEWS: Document this.
* src/grep.c (buffer_textbin): Don't look for '\200' if -z.
* tests/pcre-z: Test for new behavior.
Diffstat (limited to 'src/grep.c')
-rw-r--r-- | src/grep.c | 12 |
1 files changed, 3 insertions, 9 deletions
@@ -462,14 +462,10 @@ textbin_is_binary (enum textbin textbin) static enum textbin buffer_textbin (char const *buf, size_t size) { - char badbyte = eolbyte ? '\0' : '\200'; + if (eolbyte && memchr (buf, '\0', size)) + return TEXTBIN_BINARY; - if (MB_CUR_MAX <= 1) - { - if (memchr (buf, badbyte, size)) - return TEXTBIN_BINARY; - } - else + if (1 < MB_CUR_MAX) { mbstate_t mbs = { 0 }; size_t clen; @@ -477,8 +473,6 @@ buffer_textbin (char const *buf, size_t size) for (p = buf; p < buf + size; p += clen) { - if (*p == badbyte) - return TEXTBIN_BINARY; clen = mb_clen (p, buf + size - p, &mbs); if ((size_t) -2 <= clen) return clen == (size_t) -2 ? TEXTBIN_UNKNOWN : TEXTBIN_BINARY; |