diff options
author | Paul Eggert <eggert@cs.ucla.edu> | 2014-09-15 16:18:00 -0700 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2014-09-17 20:37:48 -0700 |
commit | 000737024019deb7b59a14f336ded508bd271352 (patch) | |
tree | 5fa4c8a9f18258abe7c1dfebadd57e177d48f67e | |
parent | 543fb3e3bc68eb689c91f5f68c9e25d3f703260b (diff) | |
download | grep-000737024019deb7b59a14f336ded508bd271352.tar.gz |
grep: -z no longer considers '\200' to be binary data
This avoids a problem when using grep -z in a Windows-1252 locale.
Plus, it lets 'grep -z' run a bit faster.
* NEWS: Document this.
* src/grep.c (buffer_textbin): Don't look for '\200' if -z.
* tests/pcre-z: Test for new behavior.
-rw-r--r-- | NEWS | 2 | ||||
-rw-r--r-- | src/grep.c | 12 | ||||
-rwxr-xr-x | tests/pcre-z | 4 |
3 files changed, 9 insertions, 9 deletions
@@ -26,6 +26,8 @@ GNU grep NEWS -*- outline -*- In locales with multibyte character encodings other than UTF-8, grep -P now reports an error and exits instead of misbehaving. + grep -z no longer automatically treats the byte '\200' as binary data. + * Noteworthy changes in release 2.20 (2014-06-03) [stable] ** Bug fixes @@ -462,14 +462,10 @@ textbin_is_binary (enum textbin textbin) static enum textbin buffer_textbin (char const *buf, size_t size) { - char badbyte = eolbyte ? '\0' : '\200'; + if (eolbyte && memchr (buf, '\0', size)) + return TEXTBIN_BINARY; - if (MB_CUR_MAX <= 1) - { - if (memchr (buf, badbyte, size)) - return TEXTBIN_BINARY; - } - else + if (1 < MB_CUR_MAX) { mbstate_t mbs = { 0 }; size_t clen; @@ -477,8 +473,6 @@ buffer_textbin (char const *buf, size_t size) for (p = buf; p < buf + size; p += clen) { - if (*p == badbyte) - return TEXTBIN_BINARY; clen = mb_clen (p, buf + size - p, &mbs); if ((size_t) -2 <= clen) return clen == (size_t) -2 ? TEXTBIN_UNKNOWN : TEXTBIN_BINARY; diff --git a/tests/pcre-z b/tests/pcre-z index 99ebc43c..6bbde947 100755 --- a/tests/pcre-z +++ b/tests/pcre-z @@ -20,4 +20,8 @@ grep -Pz "$REGEX" in > out 2>err || fail=1 compare exp out || fail=1 compare /dev/null err || fail=1 +printf '\200\0' >in0 +LC_ALL=C grep -z . in0 >out || fail=1 +compare in0 out || fail=1 + Exit $fail |