Overhaul CREATE BARRIER handling
authorPavan Deolasee <[email protected]>
Fri, 11 Jan 2019 07:21:05 +0000 (12:51 +0530)
committerPavan Deolasee <[email protected]>
Fri, 11 Jan 2019 09:31:24 +0000 (15:01 +0530)
Instead of using a low-level LWLock, the new code uses a high-level advisory
lock (though we some tweaks). This ensures that the command doesn't have to
hold a low-level lock across network roundtrips as the old implementation
required. This also ensures that the command responds to various timeouts such
as statement_timeout and lock_timeout instead of remaining blocked forever.

While reworking, I also realised that the current implementation wasn't
actually doing the right thing to stop in-flight 2PC transactions while a
BARRIER is being generated. This is because we were merely acquiring the lock
when a COMMIT TRANSACTION command is sent to the remote nodes. But that doesn't
participate in a 2PC. So now correctly acquire the lock before 2PC is started.
The lock is released with the transaction and hence no special handling is
required to release the lock correctly. (This also takes care of any errors
and/or timeouts while holding the lock.)

We should really add some test coverage for BARRIER, but this commit does not
do that.

src/backend/access/transam/xact.c
src/backend/pgxc/barrier/barrier.c
src/backend/pgxc/pool/execRemote.c
src/backend/tcop/postgres.c
src/include/pgxc/barrier.h

index a38bab582395ac7e088f4ff05f00ab91512c6d80..2de7ec0a98aa742633f37779a5c537668f5e4e22 100644 (file)
@@ -57,6 +57,7 @@
 #include "libpq/pqsignal.h"
 #include "miscadmin.h"
 #include "pgstat.h"
+#include "pgxc/barrier.h"
 #include "replication/logical.h"
 #include "replication/logicallauncher.h"
 #include "replication/origin.h"
@@ -2375,6 +2376,18 @@ CommitTransaction(void)
        s->topGlobalTransansactionId = s->transactionId;
        if (IS_PGXC_LOCAL_COORDINATOR)
        {
+               /*
+                * First ensure that there is no CREATE BARRIER request in-progress and
+                * also block any further request until we finish the 2PC.
+                *
+                * The lock gets automatically released when the transaction ends.
+                * Since we ensure that the local transaction is finished only after
+                * the 2PC is run completely on the remote nodes, this seems
+                * sufficient. The lock also gets released if an error occurs this
+                * point onwards.
+                */
+               BarrierLockAcquireForXact();
+
                XactLocalNodePrepared = false;
                if (savePrepareGID)
                {
@@ -2477,12 +2490,13 @@ CommitTransaction(void)
                 * ereport and we will run error recovery as part of AbortTransaction
                 */
                PreCommit_Remote(savePrepareGID, saveNodeString, XactLocalNodePrepared);
+
                /*
                 * Now that all the remote nodes have successfully prepared and
                 * commited, commit the local transaction as well. Remember, any errors
                 * before this point would have been reported via ereport. The fact
                 * that we are here shows that the transaction has been committed
-                * successfully on the remote nodes
+                * successfully on the remote nodes.
                 */
                if (XactLocalNodePrepared)
                {
index a06b2d9590d341e3babd370a4b20f0b08cdf7cf6..ad3d67cf3e5fbce7c356cd7bbf676279a7522aaf 100644 (file)
@@ -25,7 +25,7 @@
 #include "pgxc/pgxc.h"
 #include "nodes/nodes.h"
 #include "pgxc/pgxcnode.h"
-#include "storage/lwlock.h"
+#include "storage/lock.h"
 #include "tcop/dest.h"
 
 static const char *generate_barrier_id(const char *id);
@@ -33,6 +33,88 @@ static PGXCNodeAllHandles *PrepareBarrier(const char *id);
 static void ExecuteBarrier(const char *id);
 static void EndBarrier(PGXCNodeAllHandles *handles, const char *id);
 
+/*
+ * Use some random values to uniquely identify the barrier lock.
+ *
+ * XXX The chances of this conflicting with anything real is so small that it
+ * doesn't seem worth doing anything special to either detect or avoid
+ * conflicts.
+ */
+#define BarrierLockTagMagic1   1696412986
+#define BarrierLockTagMagic2   155831266
+#define BarrierLockTagMagic3   227185880
+#define BarrierLockTagMagic4   21676
+
+/*
+ * This is same as an advisory lock, but we use DEFAULT_LOCKMETHOD to ensure
+ * that the lock is released in case of ereport.
+ */
+#define SET_BARRIER_LOCKTAG(locktag) \
+       ((locktag).locktag_field1 = BarrierLockTagMagic1, \
+        (locktag).locktag_field2 = BarrierLockTagMagic2, \
+        (locktag).locktag_field3 = BarrierLockTagMagic3, \
+        (locktag).locktag_field4 = BarrierLockTagMagic4, \
+        (locktag).locktag_type = LOCKTAG_ADVISORY, \
+        (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
+
+/*
+ * Acquire the BarrierLock while generating a BARRIER. This locks out all other
+ * readers-writers. The lock is only obtained in a transaction boundary
+ * since we don't the callers to cross that boundary.
+ */
+void
+BarrierLockAcquireForBarrier(void)
+{
+       LOCKTAG                 barrierlock;
+
+       SET_BARRIER_LOCKTAG(barrierlock);
+       LockAcquire(&barrierlock, AccessExclusiveLock, false, false);
+}
+
+/*
+ * Acquire the BarrierLock while starting a 2PC. The lock is obtained in a
+ * Share mode to ensure that multiple backends can run 2PC in parallel, but
+ * conflict with any ongoing CREATE BARRIER request.
+ *
+ * It's enough to obtain the lock in a transaction boundary because even though
+ * the local transaction may be prepared while running 2PC, the lock is not
+ * released until the prepared transaction is actually finished. That happens
+ * only after the 2PC is finished on all the remote nodes. So that seems enough
+ * to block in-flight 2PC while generating a BARRIER.
+ */
+void
+BarrierLockAcquireForXact(void)
+{
+       LOCKTAG                 barrierlock;
+
+       SET_BARRIER_LOCKTAG(barrierlock);
+       LockAcquire(&barrierlock, ShareLock, false, false);
+}
+/*
+ * Acquire the BarrierLock in read mode.
+ */
+void
+BarrierLockReleaseForBarrier(void)
+{
+       LOCKTAG                 barrierlock;
+
+       SET_BARRIER_LOCKTAG(barrierlock);
+       LockRelease(&barrierlock, AccessExclusiveLock, false);
+}
+
+/*
+ * Currently unused since the lock gets auto released once the transaction
+ * ends.
+ */
+void
+BarrierLockReleaseForXact(void)
+{
+       LOCKTAG                 barrierlock;
+
+       SET_BARRIER_LOCKTAG(barrierlock);
+       LockRelease(&barrierlock, ShareLock, false);
+}
+
 /*
  * Prepare ourselves for an incoming BARRIER. We must disable all new 2PC
  * commits and let the ongoing commits to finish. We then remember the
@@ -45,15 +127,11 @@ static void EndBarrier(PGXCNodeAllHandles *handles, const char *id);
  * new 2PC and there can not be any 2PC in-progress. This technique would
  * rely on assumption that an exclusive lock requester is not starved by
  * share lock requesters.
- *
- * Note: To ensure that the 2PC are not blocked for a long time, we should
- * set a timeout. The lock should be release after the timeout and the
- * barrier should be canceled.
  */
 void
 ProcessCreateBarrierPrepare(const char *id)
 {
-       StringInfoData buf;
+       StringInfoData  buf;
 
        if (!IS_PGXC_REMOTE_COORDINATOR)
                ereport(ERROR,
@@ -61,17 +139,13 @@ ProcessCreateBarrierPrepare(const char *id)
                                 errmsg("The CREATE BARRIER PREPARE message is expected to "
                                                "arrive at a Coordinator from another Coordinator")));
 
-       LWLockAcquire(BarrierLock, LW_EXCLUSIVE);
+       /* Acquire the BarrierLock */
+       BarrierLockAcquireForBarrier();
 
        pq_beginmessage(&buf, 'b');
        pq_sendstring(&buf, id);
        pq_endmessage(&buf);
        pq_flush();
-
-       /*
-        * TODO Start a timer to terminate the pending barrier after a specified
-        * timeout
-        */
 }
 
 /*
@@ -81,7 +155,7 @@ ProcessCreateBarrierPrepare(const char *id)
 void
 ProcessCreateBarrierEnd(const char *id)
 {
-       StringInfoData buf;
+       StringInfoData  buf;
 
        if (!IS_PGXC_REMOTE_COORDINATOR)
                ereport(ERROR,
@@ -89,16 +163,13 @@ ProcessCreateBarrierEnd(const char *id)
                                 errmsg("The CREATE BARRIER END message is expected to "
                                                "arrive at a Coordinator from another Coordinator")));
 
-       LWLockRelease(BarrierLock);
+       /* Release the lock. */
+       BarrierLockReleaseForBarrier();
 
        pq_beginmessage(&buf, 'b');
        pq_sendstring(&buf, id);
        pq_endmessage(&buf);
        pq_flush();
-
-       /*
-        * TODO Stop the timer
-        */
 }
 
 /*
@@ -220,6 +291,9 @@ CheckBarrierCommandStatus(PGXCNodeAllHandles *conn_handles, const char *id,
        for (conn = 0; conn < count; conn++)
        {
                PGXCNodeHandle *handle;
+               ResponseCombiner combiner;
+
+               InitResponseCombiner(&combiner, 1, COMBINE_TYPE_NONE);
 
                if (conn < conn_handles->co_conn_count)
                        handle = conn_handles->coord_handles[conn];
@@ -231,7 +305,7 @@ CheckBarrierCommandStatus(PGXCNodeAllHandles *conn_handles, const char *id,
                                        (errcode(ERRCODE_INTERNAL_ERROR),
                                         errmsg("Failed to receive response from the remote side")));
 
-               if (handle_response(handle, NULL) != RESPONSE_BARRIER_OK)
+               if (handle_response(handle, &combiner) != RESPONSE_BARRIER_OK)
                        ereport(ERROR,
                                        (errcode(ERRCODE_INTERNAL_ERROR),
                                         errmsg("CREATE BARRIER PREPARE command failed "
@@ -311,26 +385,22 @@ PrepareBarrier(const char *id)
        elog(DEBUG2, "Preparing Coordinators for BARRIER");
 
        /*
-        * Send a CREATE BARRIER PREPARE message to all the Coordinators. We should
-        * send an asynchronous request so that we can disable local commits and
-        * then wait for the remote Coordinators to finish the work
+        * Send a CREATE BARRIER PREPARE message to all the Coordinators. We send
+        * an asynchronous request so that we can disable local commits and then
+        * wait for the remote Coordinators to finish the work
         */
        coord_handles = SendBarrierPrepareRequest(GetAllCoordNodes(), id);
 
        /*
-        * Disable local commits
+        * Now disable 2PC originating at this coordinator.
         */
-       LWLockAcquire(BarrierLock, LW_EXCLUSIVE);
+       BarrierLockAcquireForBarrier();
 
        elog(DEBUG2, "Disabled 2PC commits originating at the driving Coordinator");
 
-       /*
-        * TODO Start a timer to cancel the barrier request in case of a timeout
-        */
-
        /*
         * Local in-flight commits are now over. Check status of the remote
-        * Coordinators
+        * Coordinators.
         */
        CheckBarrierCommandStatus(coord_handles, id, "PREPARE");
 
@@ -427,8 +497,9 @@ static void
 EndBarrier(PGXCNodeAllHandles *prepared_handles, const char *id)
 {
        /* Resume 2PC locally */
-       LWLockRelease(BarrierLock);
+       BarrierLockReleaseForBarrier();
 
+       /* and also on the remote coordinators. */
        SendBarrierEndRequest(prepared_handles, id);
 
        CheckBarrierCommandStatus(prepared_handles, id, "END");
index b05943a30481cc594d2f0ef14cb07275102d9fb3..7e5ff91f07355c4c01a7c5b13da584f7d8e209ae 100644 (file)
@@ -2516,8 +2516,6 @@ prepare_err:
 
 /*
  * Commit transactions on remote nodes.
- * If barrier lock is set wait while it is released.
- * Release remote connection after completion.
  */
 static void
 pgxc_node_remote_commit(void)
@@ -2532,16 +2530,6 @@ pgxc_node_remote_commit(void)
 
        SetSendCommandId(false);
 
-       /*
-        * Barrier:
-        *
-        * We should acquire the BarrierLock in SHARE mode here to ensure that
-        * there are no in-progress barrier at this point. This mechanism would
-        * work as long as LWLock mechanism does not starve a EXCLUSIVE lock
-        * requester
-        */
-       LWLockAcquire(BarrierLock, LW_SHARED);
-
        for (i = 0; i < handles->dn_conn_count; i++)
        {
                PGXCNodeHandle *conn = handles->datanode_handles[i];
@@ -2614,11 +2602,6 @@ pgxc_node_remote_commit(void)
                }
        }
 
-       /*
-        * Release the BarrierLock.
-        */
-       LWLockRelease(BarrierLock);
-
        if (conn_count)
        {
                InitResponseCombiner(&combiner, conn_count, COMBINE_TYPE_NONE);
index d62717438b7a0cb267fcee5d43fa19b026b9c896..6afa2e677b12c5247ce034f58270245c6656d613 100644 (file)
@@ -5072,11 +5072,23 @@ PostgresMain(int argc, char *argv[],
                                        switch (command)
                                        {
                                                case CREATE_BARRIER_PREPARE:
+                                                       /*
+                                                        * Start a transaction to ensure errors are handled
+                                                        * correctly.
+                                                        */
+                                                       start_xact_command();
+
                                                        ProcessCreateBarrierPrepare(id);
                                                        break;
 
                                                case CREATE_BARRIER_END:
                                                        ProcessCreateBarrierEnd(id);
+
+                                                       /*
+                                                        * Matches start_xact_command() during the PREPARE
+                                                        * stage.
+                                                        */
+                                                       finish_xact_command();
                                                        break;
 
                                                case CREATE_BARRIER_EXECUTE:
index 6ae842acd595f370604b72cff6d89346050c9188..b7dd387d8d3312ccdf8d105471bf45abe0145255 100644 (file)
@@ -33,6 +33,11 @@ typedef struct xl_barrier
 
 #define XLOG_BARRIER_CREATE    0x00
 
+extern void BarrierLockAcquireForBarrier(void);
+extern void BarrierLockReleaseForBarrier(void);
+extern void BarrierLockAcquireForXact(void);
+extern void BarrierLockReleaseForXact(void);
+
 extern void ProcessCreateBarrierPrepare(const char *id);
 extern void ProcessCreateBarrierEnd(const char *id);
 extern void ProcessCreateBarrierExecute(const char *id);