summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2012-07-27 14:35:26 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2012-07-27 14:36:02 -0700
commitaa99fbeb9ff85ce8ff024c5ace4e0690726e0ca7 (patch)
tree8d5bcba9a9a458ba408ca10152a12dceb0103092
parent2f0255e9f4cc5cc8bd619d1f217902eb29b30bc2 (diff)
downloadgrep-aa99fbeb9ff85ce8ff024c5ace4e0690726e0ca7.tar.gz
grep: don't falsely report compressed text files as binary
* NEWS: Document this. * src/main.c (file_is_binary): Remove the heuristic based on st_blocks, as it does not work for compressed file systems. On Solaris, it'd be cheap to test whether the file system is known to be uncompressed, which allow the heuristic, but Solaris has SEEK_HOLE so there's little point.
-rw-r--r--NEWS4
-rw-r--r--src/main.c39
2 files changed, 9 insertions, 34 deletions
diff --git a/NEWS b/NEWS
index 753aedc2..fdba25e1 100644
--- a/NEWS
+++ b/NEWS
@@ -4,8 +4,8 @@ GNU grep NEWS -*- outline -*-
** Bug fixes
- 'grep' no longer falsely reports tiny text files as being binary
- on file systems that store tiny files' contents in metadata.
+ 'grep' no longer falsely reports text files as being binary on file
+ systems that compress contents or that store tiny contents in metadata.
* Noteworthy changes in release 2.13 (2012-07-04) [stable]
diff --git a/src/main.c b/src/main.c
index 96e4f37f..69547609 100644
--- a/src/main.c
+++ b/src/main.c
@@ -443,9 +443,6 @@ clean_up_stdout (void)
static int
file_is_binary (char const *buf, size_t bufsize, int fd, struct stat const *st)
{
- #ifndef HAVE_STRUCT_STAT_ST_BLOCKS
- enum { HAVE_STRUCT_STAT_ST_BLOCKS = 0 };
- #endif
#ifndef SEEK_HOLE
enum { SEEK_HOLE = SEEK_END };
#endif
@@ -461,8 +458,7 @@ file_is_binary (char const *buf, size_t bufsize, int fd, struct stat const *st)
return 1;
/* If the file has holes, it must contain a null byte somewhere. */
- if ((HAVE_STRUCT_STAT_ST_BLOCKS || SEEK_HOLE != SEEK_END)
- && usable_st_size (st))
+ if (SEEK_HOLE != SEEK_END && usable_st_size (st))
{
off_t cur = bufsize;
if (O_BINARY || fd == STDIN_FILENO)
@@ -472,35 +468,14 @@ file_is_binary (char const *buf, size_t bufsize, int fd, struct stat const *st)
return 0;
}
- /* If the file has fewer blocks than would be needed to
- represent its data, then it must have at least one hole. */
- if (HAVE_STRUCT_STAT_ST_BLOCKS)
- {
- /* Some servers store tiny files using zero blocks, so skip
- this check at apparent EOF, to avoid falsely reporting
- that a tiny zero-block file is binary. */
- off_t not_yet_read = st->st_size - cur;
- if (0 < not_yet_read)
- {
- off_t nonzeros_needed = not_yet_read + bufsize;
- off_t full_blocks = nonzeros_needed / ST_NBLOCKSIZE;
- int partial_block = 0 < nonzeros_needed % ST_NBLOCKSIZE;
- if (ST_NBLOCKS (*st) < full_blocks + partial_block)
- return 1;
- }
- }
-
/* Look for a hole after the current location. */
- if (SEEK_HOLE != SEEK_END)
+ off_t hole_start = lseek (fd, cur, SEEK_HOLE);
+ if (0 <= hole_start)
{
- off_t hole_start = lseek (fd, cur, SEEK_HOLE);
- if (0 <= hole_start)
- {
- if (lseek (fd, cur, SEEK_SET) < 0)
- suppressible_error (filename, errno);
- if (hole_start < st->st_size)
- return 1;
- }
+ if (lseek (fd, cur, SEEK_SET) < 0)
+ suppressible_error (filename, errno);
+ if (hole_start < st->st_size)
+ return 1;
}
}