diff options
author | Paul Eggert <eggert@cs.ucla.edu> | 2012-07-27 14:35:26 -0700 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2012-07-27 14:36:02 -0700 |
commit | aa99fbeb9ff85ce8ff024c5ace4e0690726e0ca7 (patch) | |
tree | 8d5bcba9a9a458ba408ca10152a12dceb0103092 | |
parent | 2f0255e9f4cc5cc8bd619d1f217902eb29b30bc2 (diff) | |
download | grep-aa99fbeb9ff85ce8ff024c5ace4e0690726e0ca7.tar.gz |
grep: don't falsely report compressed text files as binary
* NEWS: Document this.
* src/main.c (file_is_binary): Remove the heuristic based on
st_blocks, as it does not work for compressed file systems.
On Solaris, it'd be cheap to test whether the file system is known
to be uncompressed, which allow the heuristic, but Solaris has
SEEK_HOLE so there's little point.
-rw-r--r-- | NEWS | 4 | ||||
-rw-r--r-- | src/main.c | 39 |
2 files changed, 9 insertions, 34 deletions
@@ -4,8 +4,8 @@ GNU grep NEWS -*- outline -*- ** Bug fixes - 'grep' no longer falsely reports tiny text files as being binary - on file systems that store tiny files' contents in metadata. + 'grep' no longer falsely reports text files as being binary on file + systems that compress contents or that store tiny contents in metadata. * Noteworthy changes in release 2.13 (2012-07-04) [stable] @@ -443,9 +443,6 @@ clean_up_stdout (void) static int file_is_binary (char const *buf, size_t bufsize, int fd, struct stat const *st) { - #ifndef HAVE_STRUCT_STAT_ST_BLOCKS - enum { HAVE_STRUCT_STAT_ST_BLOCKS = 0 }; - #endif #ifndef SEEK_HOLE enum { SEEK_HOLE = SEEK_END }; #endif @@ -461,8 +458,7 @@ file_is_binary (char const *buf, size_t bufsize, int fd, struct stat const *st) return 1; /* If the file has holes, it must contain a null byte somewhere. */ - if ((HAVE_STRUCT_STAT_ST_BLOCKS || SEEK_HOLE != SEEK_END) - && usable_st_size (st)) + if (SEEK_HOLE != SEEK_END && usable_st_size (st)) { off_t cur = bufsize; if (O_BINARY || fd == STDIN_FILENO) @@ -472,35 +468,14 @@ file_is_binary (char const *buf, size_t bufsize, int fd, struct stat const *st) return 0; } - /* If the file has fewer blocks than would be needed to - represent its data, then it must have at least one hole. */ - if (HAVE_STRUCT_STAT_ST_BLOCKS) - { - /* Some servers store tiny files using zero blocks, so skip - this check at apparent EOF, to avoid falsely reporting - that a tiny zero-block file is binary. */ - off_t not_yet_read = st->st_size - cur; - if (0 < not_yet_read) - { - off_t nonzeros_needed = not_yet_read + bufsize; - off_t full_blocks = nonzeros_needed / ST_NBLOCKSIZE; - int partial_block = 0 < nonzeros_needed % ST_NBLOCKSIZE; - if (ST_NBLOCKS (*st) < full_blocks + partial_block) - return 1; - } - } - /* Look for a hole after the current location. */ - if (SEEK_HOLE != SEEK_END) + off_t hole_start = lseek (fd, cur, SEEK_HOLE); + if (0 <= hole_start) { - off_t hole_start = lseek (fd, cur, SEEK_HOLE); - if (0 <= hole_start) - { - if (lseek (fd, cur, SEEK_SET) < 0) - suppressible_error (filename, errno); - if (hole_start < st->st_size) - return 1; - } + if (lseek (fd, cur, SEEK_SET) < 0) + suppressible_error (filename, errno); + if (hole_start < st->st_size) + return 1; } } |