summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPádraig Brady <P@draigBrady.com>2023-05-08 21:22:48 +0100
committerPádraig Brady <P@draigBrady.com>2023-05-08 21:34:58 +0100
commit059e53e5b49d8a2314045c796d813f51dd862069 (patch)
tree93ddb70b92b2d0bdacc6d58cf4df79ec736dbac1
parentba128e628cfa0dd111cf235d965200d1cdf77f52 (diff)
downloadcoreutils-059e53e5b49d8a2314045c796d813f51dd862069.tar.gz
split: advise the kernel of sequential access pattern
As split is often dealing with large files, ensure we indicate to the kernel our sequential access pattern. This was seen to operate 5% faster when reading from SSD, as tested with: dd bs=1M count=2K if=/dev/urandom of=big.in for split in split.orig split; do # Ensure big file is not cached dd of=big.in oflag=nocache conv=notrunc,fdatasync count=0 status=none # Test read efficiency CWD=$PWD; (cd /dev/shm && time $CWD/src/$split -n2 $CWD/big.in) done real 0m9.039s user 0m0.055s sys 0m3.510s real 0m8.568s user 0m0.056s sys 0m3.752s * src/split.c (main): Use fdadvise to help the kernel choose a more appropriate readahead buffer. * NEWS: Mention the improvement.
-rw-r--r--NEWS5
-rw-r--r--src/split.c4
2 files changed, 9 insertions, 0 deletions
diff --git a/NEWS b/NEWS
index 9fad8a775..e07b10e31 100644
--- a/NEWS
+++ b/NEWS
@@ -27,6 +27,11 @@ GNU coreutils NEWS -*- outline -*-
due to -i, or -u. Instead they only output this information with --debug.
I.e., 'cp -u -v' etc. will have the same verbosity as before coreutils-9.3.
+** Improvements
+
+ split now uses more tuned access patterns for its potentially large input.
+ This was seen to improve throughput by 5% when reading from SSD.
+
* Noteworthy changes in release 9.3 (2023-04-18) [stable]
diff --git a/src/split.c b/src/split.c
index d872ec56a..09209cc5a 100644
--- a/src/split.c
+++ b/src/split.c
@@ -32,6 +32,7 @@
#include "alignalloc.h"
#include "die.h"
#include "error.h"
+#include "fadvise.h"
#include "fd-reopen.h"
#include "fcntl--.h"
#include "full-write.h"
@@ -1621,6 +1622,9 @@ main (int argc, char **argv)
/* Binary I/O is safer when byte counts are used. */
xset_binary_mode (STDIN_FILENO, O_BINARY);
+ /* Advise the kernel of our access pattern. */
+ fdadvise (STDIN_FILENO, 0, 0, FADVISE_SEQUENTIAL);
+
/* Get the optimal block size of input device and make a buffer. */
if (fstat (STDIN_FILENO, &in_stat_buf) != 0)