From 0da948a5d13faa9d39c2170125edf51bdce301aa Mon Sep 17 00:00:00 2001 From: Philip Withnall Date: Fri, 17 Mar 2023 13:16:02 +0000 Subject: gfile: Eliminate a stat() call from the file copy code The start of the `g_file_copy()` implementation stats the source file to find all the attributes to copy onto the destination file, so it makes sense to get it to store the source file size at the same time. This saves a subsequent `stat()` call on the source FD in the btrfs reflink or splice code. Every little helps. Signed-off-by: Philip Withnall --- gio/gfile.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/gio/gfile.c b/gio/gfile.c index 94786c84a..4558f2d3b 100644 --- a/gio/gfile.c +++ b/gio/gfile.c @@ -2807,6 +2807,11 @@ g_file_build_attribute_list_for_copy (GFile *file, first = TRUE; s = g_string_new (""); + /* Always query the source file size, even though we can’t set that on the + * destination. This is useful for the copy functions. */ + first = FALSE; + g_string_append (s, G_FILE_ATTRIBUTE_STANDARD_SIZE); + if (attributes) { for (i = 0; i < attributes->n_infos; i++) @@ -3042,6 +3047,7 @@ retry: static gboolean splice_stream_with_progress (GInputStream *in, + GFileInfo *in_info, GOutputStream *out, GCancellable *cancellable, GFileProgressCallback progress_callback, @@ -3085,10 +3091,8 @@ splice_stream_with_progress (GInputStream *in, /* avoid performance impact of querying total size when it's not needed */ if (progress_callback) { - struct stat sbuf; - - if (fstat (fd_in, &sbuf) == 0) - total_size = sbuf.st_size; + g_assert (g_file_info_has_attribute (in_info, G_FILE_ATTRIBUTE_STANDARD_SIZE)); + total_size = g_file_info_get_size (in_info); } if (total_size == -1) @@ -3151,6 +3155,7 @@ splice_stream_with_progress (GInputStream *in, #ifdef __linux__ static gboolean btrfs_reflink_with_progress (GInputStream *in, + GFileInfo *in_info, GOutputStream *out, GFileInfo *info, GCancellable *cancellable, @@ -3169,10 +3174,8 @@ btrfs_reflink_with_progress (GInputStream *in, /* avoid performance impact of querying total size when it's not needed */ if (progress_callback) { - struct stat sbuf; - - if (fstat (fd_in, &sbuf) == 0) - total_size = sbuf.st_size; + g_assert (g_file_info_has_attribute (in_info, G_FILE_ATTRIBUTE_STANDARD_SIZE)); + total_size = g_file_info_get_size (in_info); } if (total_size == -1) @@ -3376,7 +3379,7 @@ file_copy_fallback (GFile *source, { GError *reflink_err = NULL; - if (!btrfs_reflink_with_progress (in, out, info, cancellable, + if (!btrfs_reflink_with_progress (in, info, out, info, cancellable, progress_callback, progress_callback_data, &reflink_err)) { @@ -3403,7 +3406,7 @@ file_copy_fallback (GFile *source, { GError *splice_err = NULL; - if (!splice_stream_with_progress (in, out, cancellable, + if (!splice_stream_with_progress (in, info, out, cancellable, progress_callback, progress_callback_data, &splice_err)) { -- cgit v1.2.1 From aafa19fc0d267ff490cbc95edaaa8dd575cfc8b6 Mon Sep 17 00:00:00 2001 From: Philip Withnall Date: Thu, 27 Apr 2023 22:42:13 +0100 Subject: tests: Add a test for progress callbacks from g_file_copy() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This wasn’t previously tested. Signed-off-by: Philip Withnall Helps: #2863 --- gio/tests/file.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/gio/tests/file.c b/gio/tests/file.c index 754c6c326..7fbb3ef0c 100644 --- a/gio/tests/file.c +++ b/gio/tests/file.c @@ -2513,6 +2513,80 @@ test_copy_preserve_mode (void) #endif } +typedef struct +{ + goffset current_num_bytes; + goffset total_num_bytes; +} CopyProgressData; + +static void +file_copy_progress_cb (goffset current_num_bytes, + goffset total_num_bytes, + gpointer user_data) +{ + CopyProgressData *prev_data = user_data; + + g_assert_cmpuint (total_num_bytes, ==, prev_data->total_num_bytes); + g_assert_cmpuint (current_num_bytes, >=, prev_data->current_num_bytes); + + /* Update it for the next callback. */ + prev_data->current_num_bytes = current_num_bytes; +} + +static void +test_copy_progress (void) +{ + GFile *src_tmpfile = NULL; + GFile *dest_tmpfile = NULL; + GFileIOStream *iostream; + GOutputStream *ostream; + GError *local_error = NULL; + const guint8 buffer[] = { 1, 2, 3, 4, 5 }; + CopyProgressData progress_data; + + src_tmpfile = g_file_new_tmp ("tmp-copy-progressXXXXXX", + &iostream, &local_error); + g_assert_no_error (local_error); + + /* Write some content to the file for testing. */ + ostream = g_io_stream_get_output_stream (G_IO_STREAM (iostream)); + g_output_stream_write (ostream, buffer, sizeof (buffer), NULL, &local_error); + g_assert_no_error (local_error); + + g_io_stream_close ((GIOStream *) iostream, NULL, &local_error); + g_assert_no_error (local_error); + g_clear_object (&iostream); + + /* Grab a unique destination filename. */ + dest_tmpfile = g_file_new_tmp ("tmp-copy-progressXXXXXX", + &iostream, &local_error); + g_assert_no_error (local_error); + g_io_stream_close ((GIOStream *) iostream, NULL, &local_error); + g_assert_no_error (local_error); + g_clear_object (&iostream); + + /* Set the progress data to an initial offset of zero. The callback will + * assert that progress is non-decreasing and reaches the total length of + * the file. */ + progress_data.current_num_bytes = 0; + progress_data.total_num_bytes = sizeof (buffer); + + /* Copy the file with progress reporting. */ + g_file_copy (src_tmpfile, dest_tmpfile, G_FILE_COPY_OVERWRITE, + NULL, file_copy_progress_cb, &progress_data, &local_error); + g_assert_no_error (local_error); + + g_assert_cmpuint (progress_data.current_num_bytes, ==, progress_data.total_num_bytes); + g_assert_cmpuint (progress_data.total_num_bytes, ==, sizeof (buffer)); + + /* Clean up. */ + (void) g_file_delete (src_tmpfile, NULL, NULL); + (void) g_file_delete (dest_tmpfile, NULL, NULL); + + g_clear_object (&src_tmpfile); + g_clear_object (&dest_tmpfile); +} + static void test_measure (void) { @@ -3844,6 +3918,7 @@ main (int argc, char *argv[]) g_test_add_func ("/file/async-delete", test_async_delete); g_test_add_func ("/file/async-make-symlink", test_async_make_symlink); g_test_add_func ("/file/copy-preserve-mode", test_copy_preserve_mode); + g_test_add_func ("/file/copy/progress", test_copy_progress); g_test_add_func ("/file/measure", test_measure); g_test_add_func ("/file/measure-async", test_measure_async); g_test_add_func ("/file/load-bytes", test_load_bytes); @@ -3869,3 +3944,4 @@ main (int argc, char *argv[]) return g_test_run (); } + -- cgit v1.2.1 From 0e5d9fd249e357934c81a40a9ea4693aee07f5f5 Mon Sep 17 00:00:00 2001 From: Philip Withnall Date: Fri, 17 Mar 2023 13:17:40 +0000 Subject: gfile: Support copy_file_range() for file copies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While it can’t be used in all situations, it is a little bit faster than `splice()` in some situations, basically if the file system supports copy on write. In other situations it’s no slower than `splice()`. See `man copy_file_range` for the situations where it doesn’t work. In all of these situations, it will return an error, and the GLib code will fall through and try the existing `splice()` copy code instead. From my testing of `time gio copy A B` with a 9GB file, the `splice()` code path takes 22s, and the `copy_file_range()` code path takes 20s. Signed-off-by: Philip Withnall Fixes: #2863 --- gio/gfile.c | 140 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ meson.build | 1 + 2 files changed, 141 insertions(+) diff --git a/gio/gfile.c b/gio/gfile.c index 4558f2d3b..31da0b05e 100644 --- a/gio/gfile.c +++ b/gio/gfile.c @@ -3007,6 +3007,122 @@ copy_stream_with_progress (GInputStream *in, return res; } +#ifdef HAVE_COPY_FILE_RANGE +static gboolean +do_copy_file_range (int fd_in, + loff_t *off_in, + int fd_out, + loff_t *off_out, + size_t len, + size_t *bytes_transferred, + GError **error) +{ + ssize_t result; + + do + { + result = copy_file_range (fd_in, off_in, fd_out, off_out, len, 0); + + if (result == -1) + { + int errsv = errno; + + if (errsv == EINTR) + { + continue; + } + else if (errsv == ENOSYS || errsv == EINVAL || errsv == EOPNOTSUPP || errsv == EXDEV) + { + g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED, + _("Copy file range not supported")); + } + else + { + g_set_error (error, G_IO_ERROR, + g_io_error_from_errno (errsv), + _("Error splicing file: %s"), + g_strerror (errsv)); + } + + return FALSE; + } + } while (result == -1); + + g_assert (result >= 0); + *bytes_transferred = result; + + return TRUE; +} + +static gboolean +copy_file_range_with_progress (GInputStream *in, + GFileInfo *in_info, + GOutputStream *out, + GCancellable *cancellable, + GFileProgressCallback progress_callback, + gpointer progress_callback_data, + GError **error) +{ + goffset total_size, last_notified_size; + size_t copy_len; + loff_t offset_in; + loff_t offset_out; + int fd_in, fd_out; + + fd_in = g_file_descriptor_based_get_fd (G_FILE_DESCRIPTOR_BASED (in)); + fd_out = g_file_descriptor_based_get_fd (G_FILE_DESCRIPTOR_BASED (out)); + + g_assert (g_file_info_has_attribute (in_info, G_FILE_ATTRIBUTE_STANDARD_SIZE)); + total_size = g_file_info_get_size (in_info); + + /* Bail out if the reported size of the file is zero. It might be zero, but it + * might also just be a kernel file in /proc. They report their file size as + * zero, but then have data when you start reading. Go to the fallback code + * path for those. */ + if (total_size == 0) + { + g_set_error (error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED, + _("Copy file range not supported")); + return FALSE; + } + + offset_in = offset_out = 0; + copy_len = total_size; + last_notified_size = 0; + + /* Call copy_file_range() in a loop until the whole contents are copied. For + * smaller files, this loop will iterate only once. For larger files, the + * kernel (at least, kernel 6.1.6) will return after 2GB anyway, so that gives + * us more loop iterations and more progress reporting. */ + while (copy_len > 0) + { + size_t n_copied; + + if (g_cancellable_set_error_if_cancelled (cancellable, error) || + !do_copy_file_range (fd_in, &offset_in, fd_out, &offset_out, copy_len, &n_copied, error)) + return FALSE; + + if (n_copied == 0) + break; + + g_assert (n_copied <= copy_len); + copy_len -= n_copied; + + if (progress_callback) + { + progress_callback (offset_in, total_size, progress_callback_data); + last_notified_size = total_size; + } + } + + /* Make sure we send full copied size */ + if (progress_callback && last_notified_size != total_size) + progress_callback (offset_in, total_size, progress_callback_data); + + return TRUE; +} +#endif /* HAVE_COPY_FILE_RANGE */ + #ifdef HAVE_SPLICE static gboolean @@ -3401,6 +3517,30 @@ file_copy_fallback (GFile *source, } #endif +#ifdef HAVE_COPY_FILE_RANGE + if (G_IS_FILE_DESCRIPTOR_BASED (in) && G_IS_FILE_DESCRIPTOR_BASED (out)) + { + GError *copy_file_range_error = NULL; + + if (copy_file_range_with_progress (in, info, out, cancellable, + progress_callback, progress_callback_data, + ©_file_range_error)) + { + ret = TRUE; + goto out; + } + else if (!g_error_matches (copy_file_range_error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED)) + { + g_propagate_error (error, g_steal_pointer (©_file_range_error)); + goto out; + } + else + { + g_clear_error (©_file_range_error); + } + } +#endif /* HAVE_COPY_FILE_RANGE */ + #ifdef HAVE_SPLICE if (G_IS_FILE_DESCRIPTOR_BASED (in) && G_IS_FILE_DESCRIPTOR_BASED (out)) { diff --git a/meson.build b/meson.build index f275a3475..a5aade045 100644 --- a/meson.build +++ b/meson.build @@ -611,6 +611,7 @@ endif functions = [ 'accept4', 'close_range', + 'copy_file_range', 'endmntent', 'endservent', 'epoll_create', -- cgit v1.2.1