summaryrefslogtreecommitdiff
path: root/src/backend/access
diff options
context:
space:
mode:
authorSimon Riggs <simon@2ndQuadrant.com>2011-03-06 22:49:16 +0000
committerSimon Riggs <simon@2ndQuadrant.com>2011-03-06 22:49:16 +0000
commita8a8a3e0965201df88bdfdff08f50e5c06c552b7 (patch)
treec29687748fa9d5e9bc335e11bf3d8446563184c3 /src/backend/access
parent149b2673c244b92b59411dd2292d6ddcfb03d5d4 (diff)
downloadpostgresql-a8a8a3e0965201df88bdfdff08f50e5c06c552b7.tar.gz
Efficient transaction-controlled synchronous replication.
If a standby is broadcasting reply messages and we have named one or more standbys in synchronous_standby_names then allow users who set synchronous_replication to wait for commit, which then provides strict data integrity guarantees. Design avoids sending and receiving transaction state information so minimises bookkeeping overheads. We synchronize with the highest priority standby that is connected and ready to synchronize. Other standbys can be defined to takeover in case of standby failure. This version has very strict behaviour; more relaxed options may be added at a later date. Simon Riggs and Fujii Masao, with reviews by Yeb Havinga, Jaime Casanova, Heikki Linnakangas and Robert Haas, plus the assistance of many other design reviewers.
Diffstat (limited to 'src/backend/access')
-rw-r--r--src/backend/access/transam/twophase.c25
-rw-r--r--src/backend/access/transam/xact.c11
2 files changed, 35 insertions, 1 deletions
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 287ad26698..729c7b72e0 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -56,6 +56,7 @@
#include "pg_trace.h"
#include "pgstat.h"
#include "replication/walsender.h"
+#include "replication/syncrep.h"
#include "storage/fd.h"
#include "storage/predicate.h"
#include "storage/procarray.h"
@@ -1071,6 +1072,14 @@ EndPrepare(GlobalTransaction gxact)
END_CRIT_SECTION();
+ /*
+ * Wait for synchronous replication, if required.
+ *
+ * Note that at this stage we have marked the prepare, but still show as
+ * running in the procarray (twice!) and continue to hold locks.
+ */
+ SyncRepWaitForLSN(gxact->prepare_lsn);
+
records.tail = records.head = NULL;
}
@@ -2030,6 +2039,14 @@ RecordTransactionCommitPrepared(TransactionId xid,
MyProc->inCommit = false;
END_CRIT_SECTION();
+
+ /*
+ * Wait for synchronous replication, if required.
+ *
+ * Note that at this stage we have marked clog, but still show as
+ * running in the procarray and continue to hold locks.
+ */
+ SyncRepWaitForLSN(recptr);
}
/*
@@ -2109,4 +2126,12 @@ RecordTransactionAbortPrepared(TransactionId xid,
TransactionIdAbortTree(xid, nchildren, children);
END_CRIT_SECTION();
+
+ /*
+ * Wait for synchronous replication, if required.
+ *
+ * Note that at this stage we have marked clog, but still show as
+ * running in the procarray and continue to hold locks.
+ */
+ SyncRepWaitForLSN(recptr);
}
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index 4b407015df..c8b582cce8 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -37,6 +37,7 @@
#include "miscadmin.h"
#include "pgstat.h"
#include "replication/walsender.h"
+#include "replication/syncrep.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/lmgr.h"
@@ -1055,7 +1056,7 @@ RecordTransactionCommit(void)
* if all to-be-deleted tables are temporary though, since they are lost
* anyway if we crash.)
*/
- if ((wrote_xlog && XactSyncCommit) || forceSyncCommit || nrels > 0)
+ if ((wrote_xlog && XactSyncCommit) || forceSyncCommit || nrels > 0 || SyncRepRequested())
{
/*
* Synchronous commit case:
@@ -1125,6 +1126,14 @@ RecordTransactionCommit(void)
/* Compute latestXid while we have the child XIDs handy */
latestXid = TransactionIdLatest(xid, nchildren, children);
+ /*
+ * Wait for synchronous replication, if required.
+ *
+ * Note that at this stage we have marked clog, but still show as
+ * running in the procarray and continue to hold locks.
+ */
+ SyncRepWaitForLSN(XactLastRecEnd);
+
/* Reset XactLastRecEnd until the next transaction writes something */
XactLastRecEnd.xrecoff = 0;