diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2017-09-25 16:09:20 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2017-09-25 16:09:20 -0400 |
commit | 05297416f362b50985b3cd3473778fbb0842295d (patch) | |
tree | 61b3d96993b49cd0cf291c3251226866c0ce4b8c | |
parent | acae13faabc505146817f4834a8c9e9b43788312 (diff) | |
download | postgresql-05297416f362b50985b3cd3473778fbb0842295d.tar.gz |
Avoid SIGBUS on Linux when a DSM memory request overruns tmpfs.
On Linux, shared memory segments created with shm_open() are backed by
swap files created in tmpfs. If the swap file needs to be extended,
but there's no tmpfs space left, you get a very unfriendly SIGBUS trap.
To avoid this, force allocation of the full request size when we create
the segment. This adds a few cycles, but none that we wouldn't expend
later anyway, assuming the request isn't hugely bigger than the actual
need.
Make this code #ifdef __linux__, because (a) there's not currently a
reason to think the same problem exists on other platforms, and (b)
applying posix_fallocate() to an FD created by shm_open() isn't very
portable anyway.
Back-patch to 9.4 where the DSM code came in.
Thomas Munro, per a bug report from Amul Sul
Discussion: https://postgr.es/m/1002664500.12301802.1471008223422.JavaMail.yahoo@mail.yahoo.com
-rwxr-xr-x | configure | 2 | ||||
-rw-r--r-- | configure.in | 2 | ||||
-rw-r--r-- | src/backend/storage/ipc/dsm_impl.c | 54 | ||||
-rw-r--r-- | src/include/pg_config.h.in | 3 | ||||
-rw-r--r-- | src/include/pg_config.h.win32 | 3 |
5 files changed, 60 insertions, 4 deletions
@@ -11662,7 +11662,7 @@ fi LIBS_including_readline="$LIBS" LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'` -for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat pthread_is_threaded_np readlink setproctitle setsid shm_open sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l +for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fallocate pstat pthread_is_threaded_np readlink setproctitle setsid shm_open sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l do : as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" diff --git a/configure.in b/configure.in index f87fc3639b..70983762d0 100644 --- a/configure.in +++ b/configure.in @@ -1337,7 +1337,7 @@ PGAC_FUNC_WCSTOMBS_L LIBS_including_readline="$LIBS" LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'` -AC_CHECK_FUNCS([cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat pthread_is_threaded_np readlink setproctitle setsid shm_open sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l]) +AC_CHECK_FUNCS([cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fallocate pstat pthread_is_threaded_np readlink setproctitle setsid shm_open sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l]) AC_REPLACE_FUNCS(fseeko) case $host_os in diff --git a/src/backend/storage/ipc/dsm_impl.c b/src/backend/storage/ipc/dsm_impl.c index c611ab0baf..0418427880 100644 --- a/src/backend/storage/ipc/dsm_impl.c +++ b/src/backend/storage/ipc/dsm_impl.c @@ -73,6 +73,7 @@ static bool dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size, void **impl_private, void **mapped_address, Size *mapped_size, int elevel); +static int dsm_impl_posix_resize(int fd, off_t size); #endif #ifdef USE_DSM_SYSV static bool dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size, @@ -319,7 +320,8 @@ dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size, } request_size = st.st_size; } - else if (*mapped_size != request_size && ftruncate(fd, request_size)) + else if (*mapped_size != request_size && + dsm_impl_posix_resize(fd, request_size) != 0) { int save_errno; @@ -392,7 +394,55 @@ dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size, return true; } -#endif + +/* + * Set the size of a virtual memory region associated with a file descriptor. + * If necessary, also ensure that virtual memory is actually allocated by the + * operating system, to avoid nasty surprises later. + * + * Returns non-zero if either truncation or allocation fails, and sets errno. + */ +static int +dsm_impl_posix_resize(int fd, off_t size) +{ + int rc; + + /* Truncate (or extend) the file to the requested size. */ + rc = ftruncate(fd, size); + + /* + * On Linux, a shm_open fd is backed by a tmpfs file. After resizing with + * ftruncate, the file may contain a hole. Accessing memory backed by a + * hole causes tmpfs to allocate pages, which fails with SIGBUS if there + * is no more tmpfs space available. So we ask tmpfs to allocate pages + * here, so we can fail gracefully with ENOSPC now rather than risking + * SIGBUS later. + */ +#if defined(HAVE_POSIX_FALLOCATE) && defined(__linux__) + if (rc == 0) + { + /* We may get interrupted, if so just retry. */ + do + { + rc = posix_fallocate(fd, 0, size); + } while (rc == -1 && errno == EINTR); + + if (rc != 0 && errno == ENOSYS) + { + /* + * Kernel too old (< 2.6.23). Rather than fail, just trust that + * we won't hit the problem (it typically doesn't show up without + * many-GB-sized requests, anyway). + */ + rc = 0; + } + } +#endif /* HAVE_POSIX_FALLOCATE && __linux__ */ + + return rc; +} + +#endif /* USE_DSM_POSIX */ #ifdef USE_DSM_SYSV /* diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index c7be116e12..95d30d16b6 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -390,6 +390,9 @@ /* Define to 1 if you have the `posix_fadvise' function. */ #undef HAVE_POSIX_FADVISE +/* Define to 1 if you have the `posix_fallocate' function. */ +#undef HAVE_POSIX_FALLOCATE + /* Define to 1 if you have the POSIX signal interface. */ #undef HAVE_POSIX_SIGNALS diff --git a/src/include/pg_config.h.win32 b/src/include/pg_config.h.win32 index 688192dd03..508608ed36 100644 --- a/src/include/pg_config.h.win32 +++ b/src/include/pg_config.h.win32 @@ -258,6 +258,9 @@ /* Define to 1 if you have the <poll.h> header file. */ /* #undef HAVE_POLL_H */ +/* Define to 1 if you have the `posix_fallocate' function. */ +/* #undef HAVE_POSIX_FALLOCATE */ + /* Define to 1 if you have the POSIX signal interface. */ /* #undef HAVE_POSIX_SIGNALS */ |