summaryrefslogtreecommitdiff
path: root/src/backend/port
diff options
context:
space:
mode:
authorNoah Misch <noah@leadboat.com>2019-04-12 22:36:38 -0700
committerNoah Misch <noah@leadboat.com>2019-04-12 22:36:38 -0700
commitc098509927f9a49ebceb301a2cb6a477ecd4ac3c (patch)
treec53f974f64d6915c4cb1172924ec94a7349fa779 /src/backend/port
parentdb8db624e826efbe16aab1ae921bae071f98f099 (diff)
downloadpostgresql-c098509927f9a49ebceb301a2cb6a477ecd4ac3c.tar.gz
Consistently test for in-use shared memory.
postmaster startup scrutinizes any shared memory segment recorded in postmaster.pid, exiting if that segment matches the current data directory and has an attached process. When the postmaster.pid file was missing, a starting postmaster used weaker checks. Change to use the same checks in both scenarios. This increases the chance of a startup failure, in lieu of data corruption, if the DBA does "kill -9 `head -n1 postmaster.pid` && rm postmaster.pid && pg_ctl -w start". A postmaster will no longer stop if shmat() of an old segment fails with EACCES. A postmaster will no longer recycle segments pertaining to other data directories. That's good for production, but it's bad for integration tests that crash a postmaster and immediately delete its data directory. Such a test now leaks a segment indefinitely. No "make check-world" test does that. win32_shmem.c already avoided all these problems. In 9.6 and later, enhance PostgresNode to facilitate testing. Back-patch to 9.4 (all supported versions). Reviewed (in earlier versions) by Daniel Gustafsson and Kyotaro HORIGUCHI. Discussion: https://postgr.es/m/20190408064141.GA2016666@rfd.leadboat.com
Diffstat (limited to 'src/backend/port')
-rw-r--r--src/backend/port/sysv_shmem.c269
-rw-r--r--src/backend/port/win32_shmem.c7
2 files changed, 157 insertions, 119 deletions
diff --git a/src/backend/port/sysv_shmem.c b/src/backend/port/sysv_shmem.c
index 1511a61bc5..a9d7bf9a14 100644
--- a/src/backend/port/sysv_shmem.c
+++ b/src/backend/port/sysv_shmem.c
@@ -72,6 +72,26 @@
typedef key_t IpcMemoryKey; /* shared memory key passed to shmget(2) */
typedef int IpcMemoryId; /* shared memory ID returned by shmget(2) */
+/*
+ * How does a given IpcMemoryId relate to this PostgreSQL process?
+ *
+ * One could recycle unattached segments of different data directories if we
+ * distinguished that case from other SHMSTATE_FOREIGN cases. Doing so would
+ * cause us to visit less of the key space, making us less likely to detect a
+ * SHMSTATE_ATTACHED key. It would also complicate the concurrency analysis,
+ * in that postmasters of different data directories could simultaneously
+ * attempt to recycle a given key. We'll waste keys longer in some cases, but
+ * avoiding the problems of the alternative justifies that loss.
+ */
+typedef enum
+{
+ SHMSTATE_ANALYSIS_FAILURE, /* unexpected failure to analyze the ID */
+ SHMSTATE_ATTACHED, /* pertinent to DataDir, has attached PIDs */
+ SHMSTATE_ENOENT, /* no segment of that ID */
+ SHMSTATE_FOREIGN, /* exists, but not pertinent to DataDir */
+ SHMSTATE_UNATTACHED /* pertinent to DataDir, no attached PIDs */
+} IpcMemoryState;
+
unsigned long UsedShmemSegID = 0;
void *UsedShmemSegAddr = NULL;
@@ -82,8 +102,8 @@ static void *AnonymousShmem = NULL;
static void *InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size);
static void IpcMemoryDetach(int status, Datum shmaddr);
static void IpcMemoryDelete(int status, Datum shmId);
-static PGShmemHeader *PGSharedMemoryAttach(IpcMemoryKey key,
- IpcMemoryId *shmid);
+static IpcMemoryState PGSharedMemoryAttach(IpcMemoryId shmId,
+ PGShmemHeader **addr);
/*
@@ -287,11 +307,36 @@ IpcMemoryDelete(int status, Datum shmId)
bool
PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
{
- IpcMemoryId shmId = (IpcMemoryId) id2;
+ PGShmemHeader *memAddress;
+ IpcMemoryState state;
+
+ state = PGSharedMemoryAttach((IpcMemoryId) id2, &memAddress);
+ if (memAddress && shmdt(memAddress) < 0)
+ elog(LOG, "shmdt(%p) failed: %m", memAddress);
+ switch (state)
+ {
+ case SHMSTATE_ENOENT:
+ case SHMSTATE_FOREIGN:
+ case SHMSTATE_UNATTACHED:
+ return false;
+ case SHMSTATE_ANALYSIS_FAILURE:
+ case SHMSTATE_ATTACHED:
+ return true;
+ }
+ return true;
+}
+
+/* See comment at IpcMemoryState. */
+static IpcMemoryState
+PGSharedMemoryAttach(IpcMemoryId shmId,
+ PGShmemHeader **addr)
+{
struct shmid_ds shmStat;
struct stat statbuf;
PGShmemHeader *hdr;
+ *addr = NULL;
+
/*
* We detect whether a shared memory segment is in use by seeing whether
* it (a) exists and (b) has any processes attached to it.
@@ -304,15 +349,15 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
* exists.
*/
if (errno == EINVAL)
- return false;
+ return SHMSTATE_ENOENT;
/*
- * EACCES implies that the segment belongs to some other userid, which
- * means it is not a Postgres shmem segment (or at least, not one that
- * is relevant to our data directory).
+ * EACCES implies we have no read permission, which means it is not a
+ * Postgres shmem segment (or at least, not one that is relevant to
+ * our data directory).
*/
if (errno == EACCES)
- return false;
+ return SHMSTATE_FOREIGN;
/*
* Some Linux kernel versions (in fact, all of them as of July 2007)
@@ -323,7 +368,7 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
*/
#ifdef HAVE_LINUX_EIDRM_BUG
if (errno == EIDRM)
- return false;
+ return SHMSTATE_ENOENT;
#endif
/*
@@ -331,25 +376,32 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
* only likely case is EIDRM, which implies that the segment has been
* IPC_RMID'd but there are still processes attached to it.
*/
- return true;
+ return SHMSTATE_ANALYSIS_FAILURE;
}
- /* If it has no attached processes, it's not in use */
- if (shmStat.shm_nattch == 0)
- return false;
-
/*
* Try to attach to the segment and see if it matches our data directory.
* This avoids shmid-conflict problems on machines that are running
* several postmasters under the same userid.
*/
if (stat(DataDir, &statbuf) < 0)
- return true; /* if can't stat, be conservative */
-
- hdr = (PGShmemHeader *) shmat(shmId, NULL, PG_SHMAT_FLAGS);
+ return SHMSTATE_ANALYSIS_FAILURE; /* can't stat; be conservative */
+ /*
+ * Attachment fails if we have no write permission. Since that will never
+ * happen with Postgres IPCProtection, such a failure shows the segment is
+ * not a Postgres segment. If attachment fails for some other reason, be
+ * conservative.
+ */
+ hdr = (PGShmemHeader *) shmat(shmId, UsedShmemSegAddr, PG_SHMAT_FLAGS);
if (hdr == (PGShmemHeader *) -1)
- return true; /* if can't attach, be conservative */
+ {
+ if (errno == EACCES)
+ return SHMSTATE_FOREIGN;
+ else
+ return SHMSTATE_ANALYSIS_FAILURE;
+ }
+ *addr = hdr;
if (hdr->magic != PGShmemMagic ||
hdr->device != statbuf.st_dev ||
@@ -357,16 +409,12 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
{
/*
* It's either not a Postgres segment, or not one for my data
- * directory. In either case it poses no threat.
+ * directory.
*/
- shmdt((void *) hdr);
- return false;
+ return SHMSTATE_FOREIGN;
}
- /* Trouble --- looks a lot like there's still live backends */
- shmdt((void *) hdr);
-
- return true;
+ return shmStat.shm_nattch == 0 ? SHMSTATE_UNATTACHED : SHMSTATE_ATTACHED;
}
#ifdef MAP_HUGETLB
@@ -538,25 +586,21 @@ AnonymousShmemDetach(int status, Datum arg)
* standard header. Also, register an on_shmem_exit callback to release
* the storage.
*
- * Dead Postgres segments are recycled if found, but we do not fail upon
- * collision with non-Postgres shmem segments. The idea here is to detect and
- * re-use keys that may have been assigned by a crashed postmaster or backend.
- *
- * makePrivate means to always create a new segment, rather than attach to
- * or recycle any existing segment.
+ * Dead Postgres segments pertinent to this DataDir are recycled if found, but
+ * we do not fail upon collision with foreign shmem segments. The idea here
+ * is to detect and re-use keys that may have been assigned by a crashed
+ * postmaster or backend.
*
* The port number is passed for possible use as a key (for SysV, we use
- * it to generate the starting shmem key). In a standalone backend,
- * zero will be passed.
+ * it to generate the starting shmem key).
*/
PGShmemHeader *
-PGSharedMemoryCreate(Size size, bool makePrivate, int port,
+PGSharedMemoryCreate(Size size, int port,
PGShmemHeader **shim)
{
IpcMemoryKey NextShmemSegID;
void *memAddress;
PGShmemHeader *hdr;
- IpcMemoryId shmid;
struct stat statbuf;
Size sysvsize;
@@ -588,11 +632,20 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port,
/* Make sure PGSharedMemoryAttach doesn't fail without need */
UsedShmemSegAddr = NULL;
- /* Loop till we find a free IPC key */
- NextShmemSegID = port * 1000;
+ /*
+ * Loop till we find a free IPC key. Trust CreateDataDirLockFile() to
+ * ensure no more than one postmaster per data directory can enter this
+ * loop simultaneously. (CreateDataDirLockFile() does not ensure that,
+ * but prefer fixing it over coping here.)
+ */
+ NextShmemSegID = 1 + port * 1000;
- for (NextShmemSegID++;; NextShmemSegID++)
+ for (;;)
{
+ IpcMemoryId shmid;
+ PGShmemHeader *oldhdr;
+ IpcMemoryState state;
+
/* Try to create new segment */
memAddress = InternalIpcMemoryCreate(NextShmemSegID, sysvsize);
if (memAddress)
@@ -600,58 +653,71 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port,
/* Check shared memory and possibly remove and recreate */
- if (makePrivate) /* a standalone backend shouldn't do this */
- continue;
-
- if ((memAddress = PGSharedMemoryAttach(NextShmemSegID, &shmid)) == NULL)
- continue; /* can't attach, not one of mine */
-
/*
- * If I am not the creator and it belongs to an extant process,
- * continue.
+ * shmget() failure is typically EACCES, hence SHMSTATE_FOREIGN.
+ * ENOENT, a narrow possibility, implies SHMSTATE_ENOENT, but one can
+ * safely treat SHMSTATE_ENOENT like SHMSTATE_FOREIGN.
*/
- hdr = (PGShmemHeader *) memAddress;
- if (hdr->creatorPID != getpid())
+ shmid = shmget(NextShmemSegID, sizeof(PGShmemHeader), 0);
+ if (shmid < 0)
{
- if (kill(hdr->creatorPID, 0) == 0 || errno != ESRCH)
- {
- shmdt(memAddress);
- continue; /* segment belongs to a live process */
- }
+ oldhdr = NULL;
+ state = SHMSTATE_FOREIGN;
}
+ else
+ state = PGSharedMemoryAttach(shmid, &oldhdr);
- /*
- * The segment appears to be from a dead Postgres process, or from a
- * previous cycle of life in this same process. Zap it, if possible,
- * and any associated dynamic shared memory segments, as well. This
- * probably shouldn't fail, but if it does, assume the segment belongs
- * to someone else after all, and continue quietly.
- */
- if (hdr->dsm_control != 0)
- dsm_cleanup_using_control_segment(hdr->dsm_control);
- shmdt(memAddress);
- if (shmctl(shmid, IPC_RMID, NULL) < 0)
- continue;
+ switch (state)
+ {
+ case SHMSTATE_ANALYSIS_FAILURE:
+ case SHMSTATE_ATTACHED:
+ ereport(FATAL,
+ (errcode(ERRCODE_LOCK_FILE_EXISTS),
+ errmsg("pre-existing shared memory block (key %lu, ID %lu) is still in use",
+ (unsigned long) NextShmemSegID,
+ (unsigned long) shmid),
+ errhint("Terminate any old server processes associated with data directory \"%s\".",
+ DataDir)));
+ break;
+ case SHMSTATE_ENOENT:
- /*
- * Now try again to create the segment.
- */
- memAddress = InternalIpcMemoryCreate(NextShmemSegID, sysvsize);
- if (memAddress)
- break; /* successful create and attach */
+ /*
+ * To our surprise, some other process deleted since our last
+ * InternalIpcMemoryCreate(). Moments earlier, we would have
+ * seen SHMSTATE_FOREIGN. Try that same ID again.
+ */
+ elog(LOG,
+ "shared memory block (key %lu, ID %lu) deleted during startup",
+ (unsigned long) NextShmemSegID,
+ (unsigned long) shmid);
+ break;
+ case SHMSTATE_FOREIGN:
+ NextShmemSegID++;
+ break;
+ case SHMSTATE_UNATTACHED:
- /*
- * Can only get here if some other process managed to create the same
- * shmem key before we did. Let him have that one, loop around to try
- * next key.
- */
+ /*
+ * The segment pertains to DataDir, and every process that had
+ * used it has died or detached. Zap it, if possible, and any
+ * associated dynamic shared memory segments, as well. This
+ * shouldn't fail, but if it does, assume the segment belongs
+ * to someone else after all, and try the next candidate.
+ * Otherwise, try again to create the segment. That may fail
+ * if some other process creates the same shmem key before we
+ * do, in which case we'll try the next key.
+ */
+ if (oldhdr->dsm_control != 0)
+ dsm_cleanup_using_control_segment(oldhdr->dsm_control);
+ if (shmctl(shmid, IPC_RMID, NULL) < 0)
+ NextShmemSegID++;
+ break;
+ }
+
+ if (oldhdr && shmdt(oldhdr) < 0)
+ elog(LOG, "shmdt(%p) failed: %m", oldhdr);
}
- /*
- * OK, we created a new segment. Mark it as created by this process. The
- * order of assignments here is critical so that another Postgres process
- * can't see the header as valid but belonging to an invalid PID!
- */
+ /* Initialize new segment. */
hdr = (PGShmemHeader *) memAddress;
hdr->creatorPID = getpid();
hdr->magic = PGShmemMagic;
@@ -707,7 +773,8 @@ void
PGSharedMemoryReAttach(void)
{
IpcMemoryId shmid;
- void *hdr;
+ PGShmemHeader *hdr;
+ IpcMemoryState state;
void *origUsedShmemSegAddr = UsedShmemSegAddr;
Assert(UsedShmemSegAddr != NULL);
@@ -720,14 +787,18 @@ PGSharedMemoryReAttach(void)
#endif
elog(DEBUG3, "attaching to %p", UsedShmemSegAddr);
- hdr = (void *) PGSharedMemoryAttach((IpcMemoryKey) UsedShmemSegID, &shmid);
- if (hdr == NULL)
+ shmid = shmget(UsedShmemSegID, sizeof(PGShmemHeader), 0);
+ if (shmid < 0)
+ state = SHMSTATE_FOREIGN;
+ else
+ state = PGSharedMemoryAttach(shmid, &hdr);
+ if (state != SHMSTATE_ATTACHED)
elog(FATAL, "could not reattach to shared memory (key=%d, addr=%p): %m",
(int) UsedShmemSegID, UsedShmemSegAddr);
if (hdr != origUsedShmemSegAddr)
elog(FATAL, "reattaching to shared memory returned unexpected address (got %p, expected %p)",
hdr, origUsedShmemSegAddr);
- dsm_set_control_handle(((PGShmemHeader *) hdr)->dsm_control);
+ dsm_set_control_handle(hdr->dsm_control);
UsedShmemSegAddr = hdr; /* probably redundant */
}
@@ -801,31 +872,3 @@ PGSharedMemoryDetach(void)
AnonymousShmem = NULL;
}
}
-
-
-/*
- * Attach to shared memory and make sure it has a Postgres header
- *
- * Returns attach address if OK, else NULL
- */
-static PGShmemHeader *
-PGSharedMemoryAttach(IpcMemoryKey key, IpcMemoryId *shmid)
-{
- PGShmemHeader *hdr;
-
- if ((*shmid = shmget(key, sizeof(PGShmemHeader), 0)) < 0)
- return NULL;
-
- hdr = (PGShmemHeader *) shmat(*shmid, UsedShmemSegAddr, PG_SHMAT_FLAGS);
-
- if (hdr == (PGShmemHeader *) -1)
- return NULL; /* failed: must be some other app's */
-
- if (hdr->magic != PGShmemMagic)
- {
- shmdt((void *) hdr);
- return NULL; /* segment belongs to a non-Postgres app */
- }
-
- return hdr;
-}
diff --git a/src/backend/port/win32_shmem.c b/src/backend/port/win32_shmem.c
index eed17c5e21..ccd7b6b5e3 100644
--- a/src/backend/port/win32_shmem.c
+++ b/src/backend/port/win32_shmem.c
@@ -192,14 +192,9 @@ EnableLockPagesPrivilege(int elevel)
*
* Create a shared memory segment of the given size and initialize its
* standard header.
- *
- * makePrivate means to always create a new segment, rather than attach to
- * or recycle any existing segment. On win32, we always create a new segment,
- * since there is no need for recycling (segments go away automatically
- * when the last backend exits)
*/
PGShmemHeader *
-PGSharedMemoryCreate(Size size, bool makePrivate, int port,
+PGSharedMemoryCreate(Size size, int port,
PGShmemHeader **shim)
{
void *memAddress;