summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNeeraj Singh <neerajsi@microsoft.com>2022-03-10 22:43:20 +0000
committerJunio C Hamano <gitster@pobox.com>2022-03-10 15:10:22 -0800
commitabf38abec201cded6094801766d69e11a6c112b6 (patch)
treef1258b40f52f600be015a365ae5e57ed3b977a33
parent19d3f228c8df7e946278c96fb52acf1cea0f6a7a (diff)
downloadgit-abf38abec201cded6094801766d69e11a6c112b6.tar.gz
core.fsyncmethod: add writeout-only mode
This commit introduces the `core.fsyncMethod` configuration knob, which can currently be set to `fsync` or `writeout-only`. The new writeout-only mode attempts to tell the operating system to flush its in-memory page cache to the storage hardware without issuing a CACHE_FLUSH command to the storage controller. Writeout-only fsync is significantly faster than a vanilla fsync on common hardware, since data is written to a disk-side cache rather than all the way to a durable medium. Later changes in this patch series will take advantage of this primitive to implement batching of hardware flushes. When git_fsync is called with FSYNC_WRITEOUT_ONLY, it may fail and the caller is expected to do an ordinary fsync as needed. On Apple platforms, the fsync system call does not issue a CACHE_FLUSH directive to the storage controller. This change updates fsync to do fcntl(F_FULLFSYNC) to make fsync actually durable. We maintain parity with existing behavior on Apple platforms by setting the default value of the new core.fsyncMethod option. Signed-off-by: Neeraj Singh <neerajsi@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
-rw-r--r--Documentation/config/core.txt9
-rw-r--r--Makefile6
-rw-r--r--cache.h7
-rw-r--r--compat/mingw.h3
-rw-r--r--compat/win32/flush.c28
-rw-r--r--config.c12
-rw-r--r--config.mak.uname3
-rw-r--r--configure.ac8
-rw-r--r--contrib/buildsystems/CMakeLists.txt16
-rw-r--r--environment.c1
-rw-r--r--git-compat-util.h24
-rw-r--r--wrapper.c64
-rw-r--r--write-or-die.c11
13 files changed, 184 insertions, 8 deletions
diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt
index c04f62a54a..dbb134f713 100644
--- a/Documentation/config/core.txt
+++ b/Documentation/config/core.txt
@@ -547,6 +547,15 @@ core.whitespace::
is relevant for `indent-with-non-tab` and when Git fixes `tab-in-indent`
errors. The default tab width is 8. Allowed values are 1 to 63.
+core.fsyncMethod::
+ A value indicating the strategy Git will use to harden repository data
+ using fsync and related primitives.
++
+* `fsync` uses the fsync() system call or platform equivalents.
+* `writeout-only` issues pagecache writeback requests, but depending on the
+ filesystem and storage hardware, data added to the repository may not be
+ durable in the event of a system crash. This is the default mode on macOS.
+
core.fsyncObjectFiles::
This boolean will enable 'fsync()' when writing object files.
+
diff --git a/Makefile b/Makefile
index 6f0b4b775f..17fd9b023a 100644
--- a/Makefile
+++ b/Makefile
@@ -411,6 +411,8 @@ all::
#
# Define HAVE_CLOCK_MONOTONIC if your platform has CLOCK_MONOTONIC.
#
+# Define HAVE_SYNC_FILE_RANGE if your platform has sync_file_range.
+#
# Define NEEDS_LIBRT if your platform requires linking with librt (glibc version
# before 2.17) for clock_gettime and CLOCK_MONOTONIC.
#
@@ -1897,6 +1899,10 @@ ifdef HAVE_CLOCK_MONOTONIC
BASIC_CFLAGS += -DHAVE_CLOCK_MONOTONIC
endif
+ifdef HAVE_SYNC_FILE_RANGE
+ BASIC_CFLAGS += -DHAVE_SYNC_FILE_RANGE
+endif
+
ifdef NEEDS_LIBRT
EXTLIBS += -lrt
endif
diff --git a/cache.h b/cache.h
index 04d4d2db25..82f0194a3d 100644
--- a/cache.h
+++ b/cache.h
@@ -995,6 +995,13 @@ extern char *git_replace_ref_base;
extern int fsync_object_files;
extern int use_fsync;
+
+enum fsync_method {
+ FSYNC_METHOD_FSYNC,
+ FSYNC_METHOD_WRITEOUT_ONLY
+};
+
+extern enum fsync_method fsync_method;
extern int core_preload_index;
extern int precomposed_unicode;
extern int protect_hfs;
diff --git a/compat/mingw.h b/compat/mingw.h
index c9a52ad64a..6074a3d3ce 100644
--- a/compat/mingw.h
+++ b/compat/mingw.h
@@ -329,6 +329,9 @@ int mingw_getpagesize(void);
#define getpagesize mingw_getpagesize
#endif
+int win32_fsync_no_flush(int fd);
+#define fsync_no_flush win32_fsync_no_flush
+
struct rlimit {
unsigned int rlim_cur;
};
diff --git a/compat/win32/flush.c b/compat/win32/flush.c
new file mode 100644
index 0000000000..291f90ea94
--- /dev/null
+++ b/compat/win32/flush.c
@@ -0,0 +1,28 @@
+#include "git-compat-util.h"
+#include <winternl.h>
+#include "lazyload.h"
+
+int win32_fsync_no_flush(int fd)
+{
+ IO_STATUS_BLOCK io_status;
+
+#define FLUSH_FLAGS_FILE_DATA_ONLY 1
+
+ DECLARE_PROC_ADDR(ntdll.dll, NTSTATUS, NTAPI, NtFlushBuffersFileEx,
+ HANDLE FileHandle, ULONG Flags, PVOID Parameters, ULONG ParameterSize,
+ PIO_STATUS_BLOCK IoStatusBlock);
+
+ if (!INIT_PROC_ADDR(NtFlushBuffersFileEx)) {
+ errno = ENOSYS;
+ return -1;
+ }
+
+ memset(&io_status, 0, sizeof(io_status));
+ if (NtFlushBuffersFileEx((HANDLE)_get_osfhandle(fd), FLUSH_FLAGS_FILE_DATA_ONLY,
+ NULL, 0, &io_status)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/config.c b/config.c
index 383b1a4885..f3ff80b01c 100644
--- a/config.c
+++ b/config.c
@@ -1600,6 +1600,18 @@ static int git_default_core_config(const char *var, const char *value, void *cb)
return 0;
}
+ if (!strcmp(var, "core.fsyncmethod")) {
+ if (!value)
+ return config_error_nonbool(var);
+ if (!strcmp(value, "fsync"))
+ fsync_method = FSYNC_METHOD_FSYNC;
+ else if (!strcmp(value, "writeout-only"))
+ fsync_method = FSYNC_METHOD_WRITEOUT_ONLY;
+ else
+ warning(_("ignoring unknown core.fsyncMethod value '%s'"), value);
+
+ }
+
if (!strcmp(var, "core.fsyncobjectfiles")) {
fsync_object_files = git_config_bool(var, value);
return 0;
diff --git a/config.mak.uname b/config.mak.uname
index 4352ea39e9..404fff5dd0 100644
--- a/config.mak.uname
+++ b/config.mak.uname
@@ -57,6 +57,7 @@ ifeq ($(uname_S),Linux)
HAVE_CLOCK_MONOTONIC = YesPlease
# -lrt is needed for clock_gettime on glibc <= 2.16
NEEDS_LIBRT = YesPlease
+ HAVE_SYNC_FILE_RANGE = YesPlease
HAVE_GETDELIM = YesPlease
FREAD_READS_DIRECTORIES = UnfortunatelyYes
BASIC_CFLAGS += -DHAVE_SYSINFO
@@ -463,6 +464,7 @@ endif
CFLAGS =
BASIC_CFLAGS = -nologo -I. -Icompat/vcbuild/include -DWIN32 -D_CONSOLE -DHAVE_STRING_H -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE
COMPAT_OBJS = compat/msvc.o compat/winansi.o \
+ compat/win32/flush.o \
compat/win32/path-utils.o \
compat/win32/pthread.o compat/win32/syslog.o \
compat/win32/trace2_win32_process_info.o \
@@ -640,6 +642,7 @@ ifeq ($(uname_S),MINGW)
COMPAT_CFLAGS += -DSTRIP_EXTENSION=\".exe\"
COMPAT_OBJS += compat/mingw.o compat/winansi.o \
compat/win32/trace2_win32_process_info.o \
+ compat/win32/flush.o \
compat/win32/path-utils.o \
compat/win32/pthread.o compat/win32/syslog.o \
compat/win32/dirent.o
diff --git a/configure.ac b/configure.ac
index 5ee25ec95c..6bd6bef1c4 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1082,6 +1082,14 @@ AC_COMPILE_IFELSE([CLOCK_MONOTONIC_SRC],
[AC_MSG_RESULT([no])
HAVE_CLOCK_MONOTONIC=])
GIT_CONF_SUBST([HAVE_CLOCK_MONOTONIC])
+
+#
+# Define HAVE_SYNC_FILE_RANGE=YesPlease if sync_file_range is available.
+GIT_CHECK_FUNC(sync_file_range,
+ [HAVE_SYNC_FILE_RANGE=YesPlease],
+ [HAVE_SYNC_FILE_RANGE])
+GIT_CONF_SUBST([HAVE_SYNC_FILE_RANGE])
+
#
# Define NO_SETITIMER if you don't have setitimer.
GIT_CHECK_FUNC(setitimer,
diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt
index e44232f85d..3a9e624166 100644
--- a/contrib/buildsystems/CMakeLists.txt
+++ b/contrib/buildsystems/CMakeLists.txt
@@ -261,10 +261,18 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Windows")
NOGDI OBJECT_CREATION_MODE=1 __USE_MINGW_ANSI_STDIO=0
USE_NED_ALLOCATOR OVERRIDE_STRDUP MMAP_PREVENTS_DELETE USE_WIN32_MMAP
UNICODE _UNICODE HAVE_WPGMPTR ENSURE_MSYSTEM_IS_SET HAVE_RTLGENRANDOM)
- list(APPEND compat_SOURCES compat/mingw.c compat/winansi.c compat/win32/path-utils.c
- compat/win32/pthread.c compat/win32mmap.c compat/win32/syslog.c
- compat/win32/trace2_win32_process_info.c compat/win32/dirent.c
- compat/nedmalloc/nedmalloc.c compat/strdup.c)
+ list(APPEND compat_SOURCES
+ compat/mingw.c
+ compat/winansi.c
+ compat/win32/flush.c
+ compat/win32/path-utils.c
+ compat/win32/pthread.c
+ compat/win32mmap.c
+ compat/win32/syslog.c
+ compat/win32/trace2_win32_process_info.c
+ compat/win32/dirent.c
+ compat/nedmalloc/nedmalloc.c
+ compat/strdup.c)
set(NO_UNIX_SOCKETS 1)
elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
diff --git a/environment.c b/environment.c
index fd0501e77a..3e3620d759 100644
--- a/environment.c
+++ b/environment.c
@@ -44,6 +44,7 @@ int zlib_compression_level = Z_BEST_SPEED;
int pack_compression_level = Z_DEFAULT_COMPRESSION;
int fsync_object_files;
int use_fsync = -1;
+enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
size_t delta_base_cache_limit = 96 * 1024 * 1024;
diff --git a/git-compat-util.h b/git-compat-util.h
index d210cff058..00356476a9 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -1271,6 +1271,30 @@ __attribute__((format (printf, 1, 2))) NORETURN
void BUG(const char *fmt, ...);
#endif
+#ifdef __APPLE__
+#define FSYNC_METHOD_DEFAULT FSYNC_METHOD_WRITEOUT_ONLY
+#else
+#define FSYNC_METHOD_DEFAULT FSYNC_METHOD_FSYNC
+#endif
+
+enum fsync_action {
+ FSYNC_WRITEOUT_ONLY,
+ FSYNC_HARDWARE_FLUSH
+};
+
+/*
+ * Issues an fsync against the specified file according to the specified mode.
+ *
+ * FSYNC_WRITEOUT_ONLY attempts to use interfaces available on some operating
+ * systems to flush the OS cache without issuing a flush command to the storage
+ * controller. If those interfaces are unavailable, the function fails with
+ * ENOSYS.
+ *
+ * FSYNC_HARDWARE_FLUSH does an OS writeout and hardware flush to ensure that
+ * changes are durable. It is not expected to fail.
+ */
+int git_fsync(int fd, enum fsync_action action);
+
/*
* Preserves errno, prints a message, but gives no warning for ENOENT.
* Returns 0 on success, which includes trying to unlink an object that does
diff --git a/wrapper.c b/wrapper.c
index 1108e4840a..354d784c03 100644
--- a/wrapper.c
+++ b/wrapper.c
@@ -546,6 +546,70 @@ int xmkstemp_mode(char *filename_template, int mode)
return fd;
}
+/*
+ * Some platforms return EINTR from fsync. Since fsync is invoked in some
+ * cases by a wrapper that dies on failure, do not expose EINTR to callers.
+ */
+static int fsync_loop(int fd)
+{
+ int err;
+
+ do {
+ err = fsync(fd);
+ } while (err < 0 && errno == EINTR);
+ return err;
+}
+
+int git_fsync(int fd, enum fsync_action action)
+{
+ switch (action) {
+ case FSYNC_WRITEOUT_ONLY:
+
+#ifdef __APPLE__
+ /*
+ * On macOS, fsync just causes filesystem cache writeback but
+ * does not flush hardware caches.
+ */
+ return fsync_loop(fd);
+#endif
+
+#ifdef HAVE_SYNC_FILE_RANGE
+ /*
+ * On linux 2.6.17 and above, sync_file_range is the way to
+ * issue a writeback without a hardware flush. An offset of
+ * 0 and size of 0 indicates writeout of the entire file and the
+ * wait flags ensure that all dirty data is written to the disk
+ * (potentially in a disk-side cache) before we continue.
+ */
+
+ return sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WAIT_BEFORE |
+ SYNC_FILE_RANGE_WRITE |
+ SYNC_FILE_RANGE_WAIT_AFTER);
+#endif
+
+#ifdef fsync_no_flush
+ return fsync_no_flush(fd);
+#endif
+
+ errno = ENOSYS;
+ return -1;
+
+ case FSYNC_HARDWARE_FLUSH:
+ /*
+ * On macOS, a special fcntl is required to really flush the
+ * caches within the storage controller. As of this writing,
+ * this is a very expensive operation on Apple SSDs.
+ */
+#ifdef __APPLE__
+ return fcntl(fd, F_FULLFSYNC);
+#else
+ return fsync_loop(fd);
+#endif
+ default:
+ BUG("unexpected git_fsync(%d) call", action);
+ }
+}
+
static int warn_if_unremovable(const char *op, const char *file, int rc)
{
int err;
diff --git a/write-or-die.c b/write-or-die.c
index a3d5784cec..9faa5f9f56 100644
--- a/write-or-die.c
+++ b/write-or-die.c
@@ -62,10 +62,13 @@ void fsync_or_die(int fd, const char *msg)
use_fsync = git_env_bool("GIT_TEST_FSYNC", 1);
if (!use_fsync)
return;
- while (fsync(fd) < 0) {
- if (errno != EINTR)
- die_errno("fsync error on '%s'", msg);
- }
+
+ if (fsync_method == FSYNC_METHOD_WRITEOUT_ONLY &&
+ git_fsync(fd, FSYNC_WRITEOUT_ONLY) >= 0)
+ return;
+
+ if (git_fsync(fd, FSYNC_HARDWARE_FLUSH) < 0)
+ die_errno("fsync error on '%s'", msg);
}
void write_or_die(int fd, const void *buf, size_t count)