summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFujii Masao <fujii@postgresql.org>2017-01-17 17:32:45 +0900
committerFujii Masao <fujii@postgresql.org>2017-01-17 17:32:45 +0900
commitc73157ca0e058806a956b8126f158dcb513b1881 (patch)
treeec878947b5eab7ab76da7b9fc7272cd0fd731aea
parent5e1e2e75d25b7fd7152fa2b41998efd2dac9c275 (diff)
downloadpostgresql-c73157ca0e058806a956b8126f158dcb513b1881.tar.gz
Fix an assertion failure related to an exclusive backup.
Previously multiple sessions could execute pg_start_backup() and pg_stop_backup() to start and stop an exclusive backup at the same time. This could trigger the assertion failure of "FailedAssertion("!(XLogCtl->Insert.exclusiveBackup)". This happend because, even while pg_start_backup() was starting an exclusive backup, other session could run pg_stop_backup() concurrently and mark the backup as not-in-progress unconditionally. This patch introduces ExclusiveBackupState indicating the state of an exclusive backup. This state is used to ensure that there is only one session running pg_start_backup() or pg_stop_backup() at the same time, to avoid the assertion failure. Back-patch to all supported versions. Author: Michael Paquier Reviewed-By: Kyotaro Horiguchi and me Reported-By: Andreas Seltenreich Discussion: <87mvktojme.fsf@credativ.de>
-rw-r--r--src/backend/access/transam/xlog.c209
1 files changed, 147 insertions, 62 deletions
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 312a3e3d5d..299a01fed0 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -345,6 +345,29 @@ typedef struct XLogwrtResult
} XLogwrtResult;
/*
+ * State of an exclusive backup, necessary to control concurrent activities
+ * across sessions when working on exclusive backups.
+ *
+ * EXCLUSIVE_BACKUP_NONE means that there is no exclusive backup actually
+ * running, to be more precise pg_start_backup() is not being executed for
+ * an exclusive backup and there is no exclusive backup in progress.
+ * EXCLUSIVE_BACKUP_STARTING means that pg_start_backup() is starting an
+ * exclusive backup.
+ * EXCLUSIVE_BACKUP_IN_PROGRESS means that pg_start_backup() has finished
+ * running and an exclusive backup is in progress. pg_stop_backup() is
+ * needed to finish it.
+ * EXCLUSIVE_BACKUP_STOPPING means that pg_stop_backup() is stopping an
+ * exclusive backup.
+ */
+typedef enum ExclusiveBackupState
+{
+ EXCLUSIVE_BACKUP_NONE = 0,
+ EXCLUSIVE_BACKUP_STARTING,
+ EXCLUSIVE_BACKUP_IN_PROGRESS,
+ EXCLUSIVE_BACKUP_STOPPING
+} ExclusiveBackupState;
+
+/*
* Shared state data for XLogInsert.
*/
typedef struct XLogCtlInsert
@@ -366,13 +389,15 @@ typedef struct XLogCtlInsert
bool fullPageWrites;
/*
- * exclusiveBackup is true if a backup started with pg_start_backup() is
- * in progress, and nonExclusiveBackups is a counter indicating the number
- * of streaming base backups currently in progress. forcePageWrites is set
- * to true when either of these is non-zero. lastBackupStart is the latest
- * checkpoint redo location used as a starting point for an online backup.
+ * exclusiveBackupState indicates the state of an exclusive backup
+ * (see comments of ExclusiveBackupState for more details).
+ * nonExclusiveBackups is a counter indicating the number of streaming
+ * base backups currently in progress. forcePageWrites is set to true
+ * when either of these is non-zero. lastBackupStart is the latest
+ * checkpoint redo location used as a starting point for an online
+ * backup.
*/
- bool exclusiveBackup;
+ ExclusiveBackupState exclusiveBackupState;
int nonExclusiveBackups;
XLogRecPtr lastBackupStart;
} XLogCtlInsert;
@@ -699,6 +724,7 @@ static bool CheckForStandbyTrigger(void);
static void xlog_outrec(StringInfo buf, XLogRecord *record);
#endif
static void pg_start_backup_callback(int code, Datum arg);
+static void pg_stop_backup_callback(int code, Datum arg);
static bool read_backup_label(XLogRecPtr *checkPointLoc,
bool *backupEndRequired, bool *backupFromStandby);
static void rm_redo_error_callback(void *arg);
@@ -9643,7 +9669,12 @@ do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile)
LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
if (exclusive)
{
- if (XLogCtl->Insert.exclusiveBackup)
+ /*
+ * At first, mark that we're now starting an exclusive backup,
+ * to ensure that there are no other sessions currently running
+ * pg_start_backup() or pg_stop_backup().
+ */
+ if (XLogCtl->Insert.exclusiveBackupState != EXCLUSIVE_BACKUP_NONE)
{
LWLockRelease(WALInsertLock);
ereport(ERROR,
@@ -9651,7 +9682,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile)
errmsg("a backup is already in progress"),
errhint("Run pg_stop_backup() and try again.")));
}
- XLogCtl->Insert.exclusiveBackup = true;
+ XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_STARTING;
}
else
XLogCtl->Insert.nonExclusiveBackups++;
@@ -9810,7 +9841,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile)
{
/*
* Check for existing backup label --- implies a backup is already
- * running. (XXX given that we checked exclusiveBackup above,
+ * running. (XXX given that we checked exclusiveBackupState above,
* maybe it would be OK to just unlink any such label file?)
*/
if (stat(BACKUP_LABEL_FILE, &stat_buf) != 0)
@@ -9852,6 +9883,16 @@ do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile)
PG_END_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
/*
+ * Mark that start phase has correctly finished for an exclusive backup.
+ */
+ if (exclusive)
+ {
+ LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
+ XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_IN_PROGRESS;
+ LWLockRelease(WALInsertLock);
+ }
+
+ /*
* We're done. As a convenience, return the starting WAL location.
*/
return startpoint;
@@ -9867,8 +9908,8 @@ pg_start_backup_callback(int code, Datum arg)
LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
if (exclusive)
{
- Assert(XLogCtl->Insert.exclusiveBackup);
- XLogCtl->Insert.exclusiveBackup = false;
+ Assert(XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_STARTING);
+ XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_NONE;
}
else
{
@@ -9876,7 +9917,7 @@ pg_start_backup_callback(int code, Datum arg)
XLogCtl->Insert.nonExclusiveBackups--;
}
- if (!XLogCtl->Insert.exclusiveBackup &&
+ if (XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_NONE &&
XLogCtl->Insert.nonExclusiveBackups == 0)
{
XLogCtl->Insert.forcePageWrites = false;
@@ -9885,6 +9926,24 @@ pg_start_backup_callback(int code, Datum arg)
}
/*
+ * Error cleanup callback for pg_stop_backup
+ */
+static void
+pg_stop_backup_callback(int code, Datum arg)
+{
+ bool exclusive = DatumGetBool(arg);
+
+ /* Update backup status on failure */
+ LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
+ if (exclusive)
+ {
+ Assert(XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_STOPPING);
+ XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_IN_PROGRESS;
+ }
+ LWLockRelease(WALInsertLock);
+}
+
+/*
* do_pg_stop_backup is the workhorse of the user-visible pg_stop_backup()
* function.
@@ -9942,12 +10001,85 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive)
errmsg("WAL level not sufficient for making an online backup"),
errhint("wal_level must be set to \"archive\" or \"hot_standby\" at server start.")));
+ if (exclusive)
+ {
+ /*
+ * At first, mark that we're now stopping an exclusive backup,
+ * to ensure that there are no other sessions currently running
+ * pg_start_backup() or pg_stop_backup().
+ */
+ LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
+ if (XLogCtl->Insert.exclusiveBackupState != EXCLUSIVE_BACKUP_IN_PROGRESS)
+ {
+ LWLockRelease(WALInsertLock);
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("exclusive backup not in progress")));
+ }
+ XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_STOPPING;
+ LWLockRelease(WALInsertLock);
+
+ /*
+ * Remove backup_label. In case of failure, the state for an exclusive
+ * backup is switched back to in-progress.
+ */
+ PG_ENSURE_ERROR_CLEANUP(pg_stop_backup_callback, (Datum) BoolGetDatum(exclusive));
+ {
+ /*
+ * Read the existing label file into memory.
+ */
+ struct stat statbuf;
+ int r;
+
+ if (stat(BACKUP_LABEL_FILE, &statbuf))
+ {
+ if (errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\": %m",
+ BACKUP_LABEL_FILE)));
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("a backup is not in progress")));
+ }
+
+ lfp = AllocateFile(BACKUP_LABEL_FILE, "r");
+ if (!lfp)
+ {
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m",
+ BACKUP_LABEL_FILE)));
+ }
+ labelfile = palloc(statbuf.st_size + 1);
+ r = fread(labelfile, statbuf.st_size, 1, lfp);
+ labelfile[statbuf.st_size] = '\0';
+
+ /*
+ * Close and remove the backup label file
+ */
+ if (r != 1 || ferror(lfp) || FreeFile(lfp))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m",
+ BACKUP_LABEL_FILE)));
+ if (unlink(BACKUP_LABEL_FILE) != 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not remove file \"%s\": %m",
+ BACKUP_LABEL_FILE)));
+ }
+ PG_END_ENSURE_ERROR_CLEANUP(pg_stop_backup_callback, (Datum) BoolGetDatum(exclusive));
+ }
+
/*
* OK to update backup counters and forcePageWrites
*/
LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
if (exclusive)
- XLogCtl->Insert.exclusiveBackup = false;
+ {
+ XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_NONE;
+ }
else
{
/*
@@ -9960,60 +10092,13 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive)
XLogCtl->Insert.nonExclusiveBackups--;
}
- if (!XLogCtl->Insert.exclusiveBackup &&
+ if (XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_NONE &&
XLogCtl->Insert.nonExclusiveBackups == 0)
{
XLogCtl->Insert.forcePageWrites = false;
}
LWLockRelease(WALInsertLock);
- if (exclusive)
- {
- /*
- * Read the existing label file into memory.
- */
- struct stat statbuf;
- int r;
-
- if (stat(BACKUP_LABEL_FILE, &statbuf))
- {
- if (errno != ENOENT)
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not stat file \"%s\": %m",
- BACKUP_LABEL_FILE)));
- ereport(ERROR,
- (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
- errmsg("a backup is not in progress")));
- }
-
- lfp = AllocateFile(BACKUP_LABEL_FILE, "r");
- if (!lfp)
- {
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not read file \"%s\": %m",
- BACKUP_LABEL_FILE)));
- }
- labelfile = palloc(statbuf.st_size + 1);
- r = fread(labelfile, statbuf.st_size, 1, lfp);
- labelfile[statbuf.st_size] = '\0';
-
- /*
- * Close and remove the backup label file
- */
- if (r != 1 || ferror(lfp) || FreeFile(lfp))
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not read file \"%s\": %m",
- BACKUP_LABEL_FILE)));
- if (unlink(BACKUP_LABEL_FILE) != 0)
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not remove file \"%s\": %m",
- BACKUP_LABEL_FILE)));
- }
-
/*
* Read and parse the START WAL LOCATION line (this code is pretty crude,
* but we are not expecting any variability in the file format).
@@ -10247,7 +10332,7 @@ do_pg_abort_backup(void)
Assert(XLogCtl->Insert.nonExclusiveBackups > 0);
XLogCtl->Insert.nonExclusiveBackups--;
- if (!XLogCtl->Insert.exclusiveBackup &&
+ if (XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_NONE &&
XLogCtl->Insert.nonExclusiveBackups == 0)
{
XLogCtl->Insert.forcePageWrites = false;