summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2017-09-25 16:09:20 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2017-09-25 16:09:20 -0400
commit05297416f362b50985b3cd3473778fbb0842295d (patch)
tree61b3d96993b49cd0cf291c3251226866c0ce4b8c
parentacae13faabc505146817f4834a8c9e9b43788312 (diff)
downloadpostgresql-05297416f362b50985b3cd3473778fbb0842295d.tar.gz
Avoid SIGBUS on Linux when a DSM memory request overruns tmpfs.
On Linux, shared memory segments created with shm_open() are backed by swap files created in tmpfs. If the swap file needs to be extended, but there's no tmpfs space left, you get a very unfriendly SIGBUS trap. To avoid this, force allocation of the full request size when we create the segment. This adds a few cycles, but none that we wouldn't expend later anyway, assuming the request isn't hugely bigger than the actual need. Make this code #ifdef __linux__, because (a) there's not currently a reason to think the same problem exists on other platforms, and (b) applying posix_fallocate() to an FD created by shm_open() isn't very portable anyway. Back-patch to 9.4 where the DSM code came in. Thomas Munro, per a bug report from Amul Sul Discussion: https://postgr.es/m/1002664500.12301802.1471008223422.JavaMail.yahoo@mail.yahoo.com
-rwxr-xr-xconfigure2
-rw-r--r--configure.in2
-rw-r--r--src/backend/storage/ipc/dsm_impl.c54
-rw-r--r--src/include/pg_config.h.in3
-rw-r--r--src/include/pg_config.h.win323
5 files changed, 60 insertions, 4 deletions
diff --git a/configure b/configure
index eec527c847..ad127abac2 100755
--- a/configure
+++ b/configure
@@ -11662,7 +11662,7 @@ fi
LIBS_including_readline="$LIBS"
LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
-for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat pthread_is_threaded_np readlink setproctitle setsid shm_open sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l
+for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fallocate pstat pthread_is_threaded_np readlink setproctitle setsid shm_open sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l
do :
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
diff --git a/configure.in b/configure.in
index f87fc3639b..70983762d0 100644
--- a/configure.in
+++ b/configure.in
@@ -1337,7 +1337,7 @@ PGAC_FUNC_WCSTOMBS_L
LIBS_including_readline="$LIBS"
LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
-AC_CHECK_FUNCS([cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat pthread_is_threaded_np readlink setproctitle setsid shm_open sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l])
+AC_CHECK_FUNCS([cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fallocate pstat pthread_is_threaded_np readlink setproctitle setsid shm_open sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l])
AC_REPLACE_FUNCS(fseeko)
case $host_os in
diff --git a/src/backend/storage/ipc/dsm_impl.c b/src/backend/storage/ipc/dsm_impl.c
index c611ab0baf..0418427880 100644
--- a/src/backend/storage/ipc/dsm_impl.c
+++ b/src/backend/storage/ipc/dsm_impl.c
@@ -73,6 +73,7 @@
static bool dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
void **impl_private, void **mapped_address,
Size *mapped_size, int elevel);
+static int dsm_impl_posix_resize(int fd, off_t size);
#endif
#ifdef USE_DSM_SYSV
static bool dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
@@ -319,7 +320,8 @@ dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
}
request_size = st.st_size;
}
- else if (*mapped_size != request_size && ftruncate(fd, request_size))
+ else if (*mapped_size != request_size &&
+ dsm_impl_posix_resize(fd, request_size) != 0)
{
int save_errno;
@@ -392,7 +394,55 @@ dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
return true;
}
-#endif
+
+/*
+ * Set the size of a virtual memory region associated with a file descriptor.
+ * If necessary, also ensure that virtual memory is actually allocated by the
+ * operating system, to avoid nasty surprises later.
+ *
+ * Returns non-zero if either truncation or allocation fails, and sets errno.
+ */
+static int
+dsm_impl_posix_resize(int fd, off_t size)
+{
+ int rc;
+
+ /* Truncate (or extend) the file to the requested size. */
+ rc = ftruncate(fd, size);
+
+ /*
+ * On Linux, a shm_open fd is backed by a tmpfs file. After resizing with
+ * ftruncate, the file may contain a hole. Accessing memory backed by a
+ * hole causes tmpfs to allocate pages, which fails with SIGBUS if there
+ * is no more tmpfs space available. So we ask tmpfs to allocate pages
+ * here, so we can fail gracefully with ENOSPC now rather than risking
+ * SIGBUS later.
+ */
+#if defined(HAVE_POSIX_FALLOCATE) && defined(__linux__)
+ if (rc == 0)
+ {
+ /* We may get interrupted, if so just retry. */
+ do
+ {
+ rc = posix_fallocate(fd, 0, size);
+ } while (rc == -1 && errno == EINTR);
+
+ if (rc != 0 && errno == ENOSYS)
+ {
+ /*
+ * Kernel too old (< 2.6.23). Rather than fail, just trust that
+ * we won't hit the problem (it typically doesn't show up without
+ * many-GB-sized requests, anyway).
+ */
+ rc = 0;
+ }
+ }
+#endif /* HAVE_POSIX_FALLOCATE && __linux__ */
+
+ return rc;
+}
+
+#endif /* USE_DSM_POSIX */
#ifdef USE_DSM_SYSV
/*
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index c7be116e12..95d30d16b6 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -390,6 +390,9 @@
/* Define to 1 if you have the `posix_fadvise' function. */
#undef HAVE_POSIX_FADVISE
+/* Define to 1 if you have the `posix_fallocate' function. */
+#undef HAVE_POSIX_FALLOCATE
+
/* Define to 1 if you have the POSIX signal interface. */
#undef HAVE_POSIX_SIGNALS
diff --git a/src/include/pg_config.h.win32 b/src/include/pg_config.h.win32
index 688192dd03..508608ed36 100644
--- a/src/include/pg_config.h.win32
+++ b/src/include/pg_config.h.win32
@@ -258,6 +258,9 @@
/* Define to 1 if you have the <poll.h> header file. */
/* #undef HAVE_POLL_H */
+/* Define to 1 if you have the `posix_fallocate' function. */
+/* #undef HAVE_POSIX_FALLOCATE */
+
/* Define to 1 if you have the POSIX signal interface. */
/* #undef HAVE_POSIX_SIGNALS */