summaryrefslogtreecommitdiff
path: root/src/basic/copy.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/basic/copy.c')
-rw-r--r--src/basic/copy.c339
1 files changed, 269 insertions, 70 deletions
diff --git a/src/basic/copy.c b/src/basic/copy.c
index a9e1a87622..e06a503a29 100644
--- a/src/basic/copy.c
+++ b/src/basic/copy.c
@@ -1,22 +1,4 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
-/***
- This file is part of systemd.
-
- Copyright 2014 Lennart Poettering
-
- systemd is free software; you can redistribute it and/or modify it
- under the terms of the GNU Lesser General Public License as published by
- the Free Software Foundation; either version 2.1 of the License, or
- (at your option) any later version.
-
- systemd is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public License
- along with systemd; If not, see <http://www.gnu.org/licenses/>.
-***/
#include <dirent.h>
#include <errno.h>
@@ -42,6 +24,7 @@
#include "io-util.h"
#include "macro.h"
#include "missing.h"
+#include "mount-util.h"
#include "string-util.h"
#include "strv.h"
#include "time-util.h"
@@ -49,56 +32,149 @@
#include "user-util.h"
#include "xattr-util.h"
-#define COPY_BUFFER_SIZE (16*1024u)
+#define COPY_BUFFER_SIZE (16U*1024U)
+
+/* A safety net for descending recursively into file system trees to copy. On Linux PATH_MAX is 4096, which means the
+ * deepest valid path one can build is around 2048, which we hence use as a safety net here, to not spin endlessly in
+ * case of bind mount cycles and suchlike. */
+#define COPY_DEPTH_MAX 2048U
+
+static ssize_t try_copy_file_range(
+ int fd_in, loff_t *off_in,
+ int fd_out, loff_t *off_out,
+ size_t len,
+ unsigned int flags) {
-static ssize_t try_copy_file_range(int fd_in, loff_t *off_in,
- int fd_out, loff_t *off_out,
- size_t len,
- unsigned int flags) {
static int have = -1;
ssize_t r;
- if (have == false)
+ if (have == 0)
return -ENOSYS;
r = copy_file_range(fd_in, off_in, fd_out, off_out, len, flags);
- if (_unlikely_(have < 0))
+ if (have < 0)
have = r >= 0 || errno != ENOSYS;
- if (r >= 0)
- return r;
- else
+ if (r < 0)
return -errno;
+
+ return r;
}
-int copy_bytes(int fdf, int fdt, uint64_t max_bytes, CopyFlags copy_flags) {
+enum {
+ FD_IS_NO_PIPE,
+ FD_IS_BLOCKING_PIPE,
+ FD_IS_NONBLOCKING_PIPE,
+};
+
+static int fd_is_nonblock_pipe(int fd) {
+ struct stat st;
+ int flags;
+
+ /* Checks whether the specified file descriptor refers to a pipe, and if so if O_NONBLOCK is set. */
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISFIFO(st.st_mode))
+ return FD_IS_NO_PIPE;
+
+ flags = fcntl(fd, F_GETFL);
+ if (flags < 0)
+ return -errno;
+
+ return FLAGS_SET(flags, O_NONBLOCK) ? FD_IS_NONBLOCKING_PIPE : FD_IS_BLOCKING_PIPE;
+}
+
+int copy_bytes_full(
+ int fdf, int fdt,
+ uint64_t max_bytes,
+ CopyFlags copy_flags,
+ void **ret_remains,
+ size_t *ret_remains_size) {
+
bool try_cfr = true, try_sendfile = true, try_splice = true;
- int r;
+ int r, nonblock_pipe = -1;
size_t m = SSIZE_MAX; /* that is the maximum that sendfile and c_f_r accept */
assert(fdf >= 0);
assert(fdt >= 0);
- /* Try btrfs reflinks first. */
- if ((copy_flags & COPY_REFLINK) &&
- max_bytes == (uint64_t) -1 &&
- lseek(fdf, 0, SEEK_CUR) == 0 &&
- lseek(fdt, 0, SEEK_CUR) == 0) {
+ /* Tries to copy bytes from the file descriptor 'fdf' to 'fdt' in the smartest possible way. Copies a maximum
+ * of 'max_bytes', which may be specified as UINT64_MAX, in which no maximum is applied. Returns negative on
+ * error, zero if EOF is hit before the bytes limit is hit and positive otherwise. If the copy fails for some
+ * reason but we read but didn't yet write some data an ret_remains/ret_remains_size is not NULL, then it will
+ * be initialized with an allocated buffer containing this "remaining" data. Note that these two parameters are
+ * initialized with a valid buffer only on failure and only if there's actually data already read. Otherwise
+ * these parameters if non-NULL are set to NULL. */
+
+ if (ret_remains)
+ *ret_remains = NULL;
+ if (ret_remains_size)
+ *ret_remains_size = 0;
+
+ /* Try btrfs reflinks first. This only works on regular, seekable files, hence let's check the file offsets of
+ * source and destination first. */
+ if ((copy_flags & COPY_REFLINK)) {
+ off_t foffset;
+
+ foffset = lseek(fdf, 0, SEEK_CUR);
+ if (foffset >= 0) {
+ off_t toffset;
+
+ toffset = lseek(fdt, 0, SEEK_CUR);
+ if (toffset >= 0) {
+
+ if (foffset == 0 && toffset == 0 && max_bytes == UINT64_MAX)
+ r = btrfs_reflink(fdf, fdt); /* full file reflink */
+ else
+ r = btrfs_clone_range(fdf, foffset, fdt, toffset, max_bytes == UINT64_MAX ? 0 : max_bytes); /* partial reflink */
+ if (r >= 0) {
+ off_t t;
+
+ /* This worked, yay! Now — to be fully correct — let's adjust the file pointers */
+ if (max_bytes == UINT64_MAX) {
+
+ /* We cloned to the end of the source file, let's position the read
+ * pointer there, and query it at the same time. */
+ t = lseek(fdf, 0, SEEK_END);
+ if (t < 0)
+ return -errno;
+ if (t < foffset)
+ return -ESPIPE;
+
+ /* Let's adjust the destination file write pointer by the same number
+ * of bytes. */
+ t = lseek(fdt, toffset + (t - foffset), SEEK_SET);
+ if (t < 0)
+ return -errno;
+
+ return 0; /* we copied the whole thing, hence hit EOF, return 0 */
+ } else {
+ t = lseek(fdf, foffset + max_bytes, SEEK_SET);
+ if (t < 0)
+ return -errno;
+
+ t = lseek(fdt, toffset + max_bytes, SEEK_SET);
+ if (t < 0)
+ return -errno;
+
+ return 1; /* we copied only some number of bytes, which worked, but this means we didn't hit EOF, return 1 */
+ }
+ }
- r = btrfs_reflink(fdf, fdt);
- if (r >= 0)
- return 0; /* we copied the whole thing, hence hit EOF, return 0 */
+ log_debug_errno(r, "Reflinking didn't work, falling back to non-reflink copying: %m");
+ }
+ }
}
for (;;) {
ssize_t n;
- if (max_bytes != (uint64_t) -1) {
- if (max_bytes <= 0)
- return 1; /* return > 0 if we hit the max_bytes limit */
+ if (max_bytes <= 0)
+ return 1; /* return > 0 if we hit the max_bytes limit */
- if (m > max_bytes)
- m = max_bytes;
- }
+ if (max_bytes != UINT64_MAX && m > max_bytes)
+ m = max_bytes;
/* First try copy_file_range(), unless we already tried */
if (try_cfr) {
@@ -132,9 +208,51 @@ int copy_bytes(int fdf, int fdt, uint64_t max_bytes, CopyFlags copy_flags) {
goto next;
}
- /* Then try splice, unless we already tried */
+ /* Then try splice, unless we already tried. */
+ if (try_splice) {
+
+ /* splice()'s asynchronous I/O support is a bit weird. When it encounters a pipe file
+ * descriptor, then it will ignore its O_NONBLOCK flag and instead only honour the
+ * SPLICE_F_NONBLOCK flag specified in its flag parameter. Let's hide this behaviour here, and
+ * check if either of the specified fds are a pipe, and if so, let's pass the flag
+ * automatically, depending on O_NONBLOCK being set.
+ *
+ * Here's a twist though: when we use it to move data between two pipes of which one has
+ * O_NONBLOCK set and the other has not, then we have no individual control over O_NONBLOCK
+ * behaviour. Hence in that case we can't use splice() and still guarantee systematic
+ * O_NONBLOCK behaviour, hence don't. */
+
+ if (nonblock_pipe < 0) {
+ int a, b;
+
+ /* Check if either of these fds is a pipe, and if so non-blocking or not */
+ a = fd_is_nonblock_pipe(fdf);
+ if (a < 0)
+ return a;
+
+ b = fd_is_nonblock_pipe(fdt);
+ if (b < 0)
+ return b;
+
+ if ((a == FD_IS_NO_PIPE && b == FD_IS_NO_PIPE) ||
+ (a == FD_IS_BLOCKING_PIPE && b == FD_IS_NONBLOCKING_PIPE) ||
+ (a == FD_IS_NONBLOCKING_PIPE && b == FD_IS_BLOCKING_PIPE))
+
+ /* splice() only works if one of the fds is a pipe. If neither is, let's skip
+ * this step right-away. As mentioned above, if one of the two fds refers to a
+ * blocking pipe and the other to a non-blocking pipe, we can't use splice()
+ * either, hence don't try either. This hence means we can only use splice() if
+ * either only one of the two fds is a pipe, or if both are pipes with the same
+ * nonblocking flag setting. */
+
+ try_splice = false;
+ else
+ nonblock_pipe = a == FD_IS_NONBLOCKING_PIPE || b == FD_IS_NONBLOCKING_PIPE;
+ }
+ }
+
if (try_splice) {
- n = splice(fdf, NULL, fdt, NULL, m, 0);
+ n = splice(fdf, NULL, fdt, NULL, m, nonblock_pipe ? SPLICE_F_NONBLOCK : 0);
if (n < 0) {
if (!IN_SET(errno, EINVAL, ENOSYS))
return -errno;
@@ -150,7 +268,8 @@ int copy_bytes(int fdf, int fdt, uint64_t max_bytes, CopyFlags copy_flags) {
/* As a fallback just copy bits by hand */
{
- uint8_t buf[MIN(m, COPY_BUFFER_SIZE)];
+ uint8_t buf[MIN(m, COPY_BUFFER_SIZE)], *p = buf;
+ ssize_t z;
n = read(fdf, buf, sizeof buf);
if (n < 0)
@@ -158,9 +277,34 @@ int copy_bytes(int fdf, int fdt, uint64_t max_bytes, CopyFlags copy_flags) {
if (n == 0) /* EOF */
break;
- r = loop_write(fdt, buf, (size_t) n, false);
- if (r < 0)
- return r;
+ z = (size_t) n;
+ do {
+ ssize_t k;
+
+ k = write(fdt, p, z);
+ if (k < 0) {
+ r = -errno;
+
+ if (ret_remains) {
+ void *copy;
+
+ copy = memdup(p, z);
+ if (!copy)
+ return -ENOMEM;
+
+ *ret_remains = copy;
+ }
+
+ if (ret_remains_size)
+ *ret_remains_size = z;
+
+ return r;
+ }
+
+ assert(k <= z);
+ z -= k;
+ p += k;
+ } while (z > 0);
}
next:
@@ -336,6 +480,7 @@ static int fd_copy_directory(
int dt,
const char *to,
dev_t original_device,
+ unsigned depth_left,
uid_t override_uid,
gid_t override_gid,
CopyFlags copy_flags) {
@@ -349,6 +494,9 @@ static int fd_copy_directory(
assert(st);
assert(to);
+ if (depth_left == 0)
+ return -ENAMETOOLONG;
+
if (from)
fdf = openat(df, from, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
else
@@ -387,13 +535,40 @@ static int fd_copy_directory(
continue;
}
- if (buf.st_dev != original_device)
- continue;
+ if (S_ISDIR(buf.st_mode)) {
+ /*
+ * Don't descend into directories on other file systems, if this is requested. We do a simple
+ * .st_dev check here, which basically comes for free. Note that we do this check only on
+ * directories, not other kind of file system objects, for two reason:
+ *
+ * • The kernel's overlayfs pseudo file system that overlays multiple real file systems
+ * propagates the .st_dev field of the file system a file originates from all the way up
+ * through the stack to stat(). It doesn't do that for directories however. This means that
+ * comparing .st_dev on non-directories suggests that they all are mount points. To avoid
+ * confusion we hence avoid relying on this check for regular files.
+ *
+ * • The main reason we do this check at all is to protect ourselves from bind mount cycles,
+ * where we really want to avoid descending down in all eternity. However the .st_dev check
+ * is usually not sufficient for this protection anyway, as bind mount cycles from the same
+ * file system onto itself can't be detected that way. (Note we also do a recursion depth
+ * check, which is probably the better protection in this regard, which is why
+ * COPY_SAME_MOUNT is optional).
+ */
+
+ if (FLAGS_SET(copy_flags, COPY_SAME_MOUNT)) {
+ if (buf.st_dev != original_device)
+ continue;
+
+ r = fd_is_mount_point(dirfd(d), de->d_name, 0);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+ }
- if (S_ISREG(buf.st_mode))
+ q = fd_copy_directory(dirfd(d), de->d_name, &buf, fdt, de->d_name, original_device, depth_left-1, override_uid, override_gid, copy_flags);
+ } else if (S_ISREG(buf.st_mode))
q = fd_copy_regular(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags);
- else if (S_ISDIR(buf.st_mode))
- q = fd_copy_directory(dirfd(d), de->d_name, &buf, fdt, de->d_name, original_device, override_uid, override_gid, copy_flags);
else if (S_ISLNK(buf.st_mode))
q = fd_copy_symlink(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags);
else if (S_ISFIFO(buf.st_mode))
@@ -443,7 +618,7 @@ int copy_tree_at(int fdf, const char *from, int fdt, const char *to, uid_t overr
if (S_ISREG(st.st_mode))
return fd_copy_regular(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags);
else if (S_ISDIR(st.st_mode))
- return fd_copy_directory(fdf, from, &st, fdt, to, st.st_dev, override_uid, override_gid, copy_flags);
+ return fd_copy_directory(fdf, from, &st, fdt, to, st.st_dev, COPY_DEPTH_MAX, override_uid, override_gid, copy_flags);
else if (S_ISLNK(st.st_mode))
return fd_copy_symlink(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags);
else if (S_ISFIFO(st.st_mode))
@@ -470,7 +645,7 @@ int copy_directory_fd(int dirfd, const char *to, CopyFlags copy_flags) {
if (!S_ISDIR(st.st_mode))
return -ENOTDIR;
- return fd_copy_directory(dirfd, NULL, &st, AT_FDCWD, to, st.st_dev, UID_INVALID, GID_INVALID, copy_flags);
+ return fd_copy_directory(dirfd, NULL, &st, AT_FDCWD, to, st.st_dev, COPY_DEPTH_MAX, UID_INVALID, GID_INVALID, copy_flags);
}
int copy_directory(const char *from, const char *to, CopyFlags copy_flags) {
@@ -485,7 +660,7 @@ int copy_directory(const char *from, const char *to, CopyFlags copy_flags) {
if (!S_ISDIR(st.st_mode))
return -ENOTDIR;
- return fd_copy_directory(AT_FDCWD, from, &st, AT_FDCWD, to, st.st_dev, UID_INVALID, GID_INVALID, copy_flags);
+ return fd_copy_directory(AT_FDCWD, from, &st, AT_FDCWD, to, st.st_dev, COPY_DEPTH_MAX, UID_INVALID, GID_INVALID, copy_flags);
}
int copy_file_fd(const char *from, int fdt, CopyFlags copy_flags) {
@@ -538,31 +713,55 @@ int copy_file(const char *from, const char *to, int flags, mode_t mode, unsigned
}
int copy_file_atomic(const char *from, const char *to, mode_t mode, unsigned chattr_flags, CopyFlags copy_flags) {
- _cleanup_free_ char *t = NULL;
+ _cleanup_(unlink_and_freep) char *t = NULL;
+ _cleanup_close_ int fdt = -1;
int r;
assert(from);
assert(to);
- r = tempfn_random(to, NULL, &t);
- if (r < 0)
- return r;
+ /* We try to use O_TMPFILE here to create the file if we can. Note that that only works if COPY_REPLACE is not
+ * set though as we need to use linkat() for linking the O_TMPFILE file into the file system but that system
+ * call can't replace existing files. Hence, if COPY_REPLACE is set we create a temporary name in the file
+ * system right-away and unconditionally which we then can renameat() to the right name after we completed
+ * writing it. */
+
+ if (copy_flags & COPY_REPLACE) {
+ r = tempfn_random(to, NULL, &t);
+ if (r < 0)
+ return r;
- r = copy_file(from, t, O_NOFOLLOW|O_EXCL, mode, chattr_flags, copy_flags);
+ fdt = open(t, O_CREAT|O_EXCL|O_NOFOLLOW|O_NOCTTY|O_WRONLY|O_CLOEXEC, 0600);
+ if (fdt < 0) {
+ t = mfree(t);
+ return -errno;
+ }
+ } else {
+ fdt = open_tmpfile_linkable(to, O_WRONLY|O_CLOEXEC, &t);
+ if (fdt < 0)
+ return fdt;
+ }
+
+ if (chattr_flags != 0)
+ (void) chattr_fd(fdt, chattr_flags, (unsigned) -1);
+
+ r = copy_file_fd(from, fdt, copy_flags);
if (r < 0)
return r;
+ if (fchmod(fdt, mode) < 0)
+ return -errno;
+
if (copy_flags & COPY_REPLACE) {
- r = renameat(AT_FDCWD, t, AT_FDCWD, to);
+ if (renameat(AT_FDCWD, t, AT_FDCWD, to) < 0)
+ return -errno;
+ } else {
+ r = link_tmpfile(fdt, t, to);
if (r < 0)
- r = -errno;
- } else
- r = rename_noreplace(AT_FDCWD, t, AT_FDCWD, to);
- if (r < 0) {
- (void) unlink(t);
- return r;
+ return r;
}
+ t = mfree(t);
return 0;
}