Handle a race condition between portal close 'C' message and new request for
authorPavan Deolasee <[email protected]>
Wed, 17 Feb 2016 07:28:31 +0000 (12:58 +0530)
committerPavan Deolasee <[email protected]>
Wed, 17 Feb 2016 07:28:31 +0000 (12:58 +0530)
running the portal, as part of the next step of query execution

A producer will unbind and remove the SharedQ once all consumers are done
with reading pending data. It does not wait for consumers to actually send
close 'C' message. If the next step of the execution now recreates the SharedQ
with the same name (because its the same RemoteSubplan being re-executed), and
if the close messages arrives after that, but before the new producer gets
chance to bind to the SharedQ, we will end up marking future consumers of the
new SharedQ as 'DONE'. The SharedQueueAcquire then incorrectly assumes that
this is a stale Q belonging to earlier execution and gets in an infinite wait.

Also do not try indefinitely for the old producer to unbind and remove a stale
queue. Any further bugs in this area will cause infinite loops. Instead try for
a fixed number of times and then let the query fail.

src/backend/executor/producerReceiver.c
src/backend/pgxc/squeue/squeue.c

index 145788a11b6f14f2c43d3008a85558b54a3f8f71..8dec452b6ae761b7bb59c9551e446e65e08d654e 100644 (file)
@@ -156,11 +156,13 @@ producerDestroyReceiver(DestReceiver *self)
                if (SharedQueueFinish(myState->squeue, myState->typeinfo,
                                                          myState->tstores) == 0)
                {
+                       elog(DEBUG3, "SharedQueueFinish returned 0 - freeing tstores");
                        pfree(myState->tstores);
                        myState->tstores = NULL;
                }
                else
                {
+                       elog(DEBUG2, "producerDestroyReceiver - sleeping for 10 seconds waiting for consumers to connect");
                        pg_usleep(10*1000*1000l);
                        /*
                         * Do not wait for consumers that was not even connected after 10
@@ -279,11 +281,15 @@ ProducerReceiverPushBuffers(DestReceiver *self)
                if (SharedQueueFinish(myState->squeue, myState->typeinfo,
                                                          myState->tstores) == 0)
                {
+                       elog(DEBUG3, "SharedQueueFinish returned 0, freeing tstores");
                        pfree(myState->tstores);
                        myState->tstores = NULL;
                }
                else
+               {
+                       elog(DEBUG3, "SharedQueueFinish returned non-zero value");
                        return false;
+               }
        }
        return true;
 }
index 8c975572949ea27e00d3b1b305a4e039be774770..5ede9d5373c3f4b014be8bec1a5148d0c6846e0e 100644 (file)
@@ -265,6 +265,7 @@ SharedQueueAcquire(const char *sqname, int ncons)
 {
        bool            found;
        SharedQueue sq;
+       int trycount = 0;
 
        Assert(IsConnFromDatanode());
        Assert(ncons > 0);
@@ -394,9 +395,13 @@ tryagain:
                        if (old_squeue)
                        {
                                LWLockRelease(SQueuesLock);
-                               pg_usleep(1L);
+                               pg_usleep(1000000L);
                                elog(DEBUG1, "SQueue race condition, give the old producer to "
                                                "finish the work and retry again");
+                               trycount++;
+                               if (trycount >= 10)
+                                       elog(ERROR, "Couldn't resolve SQueue race condition after"
+                                                       " %d tries", trycount);
                                goto tryagain;
                        }
 
@@ -1472,16 +1477,14 @@ SharedQueueRelease(const char *sqname)
                                sqname, sq->sq_nodeid, sq->sq_pid);
 
                /*
-                * Case if the shared queue was never bound.
-                * Just remove it from the hash table.
+                * If the SharedQ is not bound, we can't just remove it because
+                * somebody might have just created a fresh entry and is going to bind
+                * to it soon. We assume that the future producer will eventually
+                * release the SharedQ
                 */
                if (sq->sq_nodeid == -1)
                {
-                       sq->sq_sync = NULL;
-                       sqsync->queue = NULL;
-                       if (hash_search(SharedQueues, sqname, HASH_REMOVE, NULL) != sq)
-                               elog(PANIC, "Shared queue data corruption");
-                       elog(DEBUG1, "SQueue %s, producer not bound - released SQueue", sqname);
+                       elog(DEBUG1, "SQueue %s, producer not bound ", sqname);
                        LWLockRelease(SQueuesLock);
                        return;
                }
@@ -1505,7 +1508,30 @@ SharedQueueRelease(const char *sqname)
                                        elog(DEBUG1, "SQueue %s, consumer node %d, pid %d, "
                                                        "status %d",  sq->sq_key, cstate->cs_node,
                                                        cstate->cs_pid, cstate->cs_status);
-                                       if (cstate->cs_status != CONSUMER_DONE)
+
+                                       /*
+                                        * If the consumer pid is not set, we are looking at a race
+                                        * condition where the old producer (which supplied the
+                                        * tuples to this remote datanode) may have finished and
+                                        * marked all consumers as CONSUMER_EOF, the consumers
+                                        * themeselves consumed all the tuples and marked
+                                        * themselves as CONSUMER_DONE. The old producer in that
+                                        * case may have actually removed the SharedQ from shared
+                                        * memory. But if a new execution for this same portal
+                                        * comes before the consumer sends a "Close Portal" message
+                                        * (which subsequently calls this function), we may end up
+                                        * corrupting state for the upcoming consumer for this new
+                                        * execution of the portal.
+                                        *
+                                        * It seems best to just ignore the release call in such
+                                        * cases.
+                                        */
+                                       if (cstate->cs_pid == 0)
+                                       {
+                                               elog(DEBUG1, "SQueue %s, consumer node %d, already released",
+                                                       sq->sq_key, cstate->cs_node);
+                                       }
+                                       else if (cstate->cs_status != CONSUMER_DONE)
                                        {
                                                /* Inform producer the consumer have done the job */
                                                cstate->cs_status = CONSUMER_DONE;