summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2023-03-04 11:42:16 -0800
committerPaul Eggert <eggert@cs.ucla.edu>2023-03-04 14:49:45 -0800
commitaa266f1b3dc4e12acdc46cc0f562adc03c2c0b8f (patch)
tree3c34b524283b144e8e437eccbf5b11690279eb3d
parentfe64f8be015050500b2be4678a8ce954fde576db (diff)
downloadcoreutils-aa266f1b3dc4e12acdc46cc0f562adc03c2c0b8f.tar.gz
split: port ‘split -n N /dev/null’ better to macOS
* src/split.c (input_file_size): Do not bother with lseek if the initial read probe reaches EOF, since the file size is known then. This works better on macOS, which doesn’t allow lseek on /dev/null. Do not special-case size-zero files, as the issue can occur with any size file (though /proc files are the most common). If the current position is past end of file, treat this as size zero regardless of whether the file has a usable st_size. Pass through lseek -1 return values rather than using ‘return -1’; this makes the code a bit easier to analyze (and a bit faster). Avoid undefined behavior if the size calculation overflows. (lines_chunk_split): Do not bother with lseek if it would have no effect if successful. This works better on macOS, which doesn’t allow lseek on /dev/null. * tests/split/l-chunk.sh: Adjust to match fixed behavior.
-rw-r--r--NEWS4
-rw-r--r--src/split.c66
-rwxr-xr-xtests/split/l-chunk.sh7
3 files changed, 40 insertions, 37 deletions
diff --git a/NEWS b/NEWS
index 31b3e3065..849f174de 100644
--- a/NEWS
+++ b/NEWS
@@ -54,6 +54,10 @@ GNU coreutils NEWS -*- outline -*-
long been documented to be platform-dependent.
[bug introduced 1999-05-02 and only partly fixed in coreutils-8.14]
+ split with -l or -n no longer misbehaves on small piped input, on
+ small GNU/Linux /proc files, or on macOS /dev/null.
+ [bug introduced in coreutils-8.8]
+
stty ispeed and ospeed options no longer accept and silently ignore
invalid speed arguments, or give false warnings for valid speeds.
Now they're validated against both the general accepted set,
diff --git a/src/split.c b/src/split.c
index c66bc69a2..424ca9fe0 100644
--- a/src/split.c
+++ b/src/split.c
@@ -283,14 +283,6 @@ CHUNKS may be:\n\
static off_t
input_file_size (int fd, struct stat const *st, char *buf, size_t bufsize)
{
- off_t cur = lseek (fd, 0, SEEK_CUR);
- if (cur < 0)
- {
- if (errno == ESPIPE)
- errno = 0; /* Suppress confusing seek error. */
- return -1;
- }
-
off_t size = 0;
do
{
@@ -303,45 +295,49 @@ input_file_size (int fd, struct stat const *st, char *buf, size_t bufsize)
}
while (size < bufsize);
- /* Note we check st_size _after_ the read() above
- because /proc files on GNU/Linux are seekable
- but have st_size == 0. */
- if (st->st_size == 0)
+ off_t cur = lseek (fd, 0, SEEK_CUR);
+ if (cur < 0)
{
- /* We've filled the buffer, from a seekable file,
- which has an st_size==0, E.g., /dev/zero on GNU/Linux.
- Assume there is no limit to file size. */
- errno = EOVERFLOW;
- return -1;
+ if (errno == ESPIPE)
+ errno = 0; /* Suppress confusing seek error. */
+ return cur;
}
- cur += size;
off_t end;
- if (usable_st_size (st) && cur <= st->st_size)
+ if (usable_st_size (st))
end = st->st_size;
else
{
end = lseek (fd, 0, SEEK_END);
if (end < 0)
- return -1;
- if (end != cur)
+ return end;
+ if (end == OFF_T_MAX)
+ goto overflow; /* E.g., /dev/zero on GNU/Hurd. */
+ if (cur < end)
{
- if (lseek (fd, cur, SEEK_SET) < 0)
- return -1;
- if (end < cur)
- end = cur;
+ off_t cur1 = lseek (fd, cur, SEEK_SET);
+ if (cur1 < 0)
+ return cur1;
}
}
- size += end - cur;
- if (size == OFF_T_MAX)
- {
- /* E.g., /dev/zero on GNU/Hurd. */
- errno = EOVERFLOW;
- return -1;
- }
+ /* Report overflow if we filled the buffer from a file with more
+ bytes than stat or lseek reports. This can happen with mutating
+ (e.g., /proc) files that are larger than the input block size.
+ FIXME: Handle this properly, e.g., by copying the growing file's
+ data into the first output file, and then splitting that output
+ file (which should not grow) into the other output files. */
+ if (end < size)
+ goto overflow;
+
+ if (cur < end && INT_ADD_WRAPV (size, end - cur, &size))
+ goto overflow;
return size;
+
+ overflow:
+ errno = EOVERFLOW;
+ return -1;
}
/* Compute the next sequential output file name and store it into the
@@ -886,7 +882,8 @@ lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
}
else
{
- if (lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0)
+ if (initial_read < start
+ && lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0)
die (EXIT_FAILURE, errno, "%s", quotef (infile));
initial_read = SIZE_MAX;
}
@@ -1005,7 +1002,8 @@ bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
}
else
{
- if (lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0)
+ if (initial_read < start
+ && lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0)
die (EXIT_FAILURE, errno, "%s", quotef (infile));
initial_read = SIZE_MAX;
}
diff --git a/tests/split/l-chunk.sh b/tests/split/l-chunk.sh
index cdb201746..c94380e87 100755
--- a/tests/split/l-chunk.sh
+++ b/tests/split/l-chunk.sh
@@ -24,9 +24,10 @@ echo "split: invalid number of chunks: '1o'" > exp
returns_ 1 split -n l/1o 2>err || fail=1
compare exp err || fail=1
-echo "split: -: cannot determine file size" > exp
-: | returns_ 1 split -n l/1 2>err || fail=1
-compare exp err || fail=1
+rm -f x* || fail=1
+: | split -n l/1 || fail=1
+compare /dev/null xaa || fail=1
+test ! -f xab || fail=1
# N can be greater than the file size
# in which case no data is extracted, or empty files are written