Partial group locking implementation.
authorRobert Haas <[email protected]>
Sat, 3 Oct 2015 17:34:35 +0000 (13:34 -0400)
committerRobert Haas <[email protected]>
Wed, 28 Oct 2015 13:43:45 +0000 (14:43 +0100)
This doesn't touch deadlock.c but it's enough to get the regression
tests working with stuff pushed under Gather nodes.

src/backend/access/transam/parallel.c
src/backend/storage/lmgr/lock.c
src/backend/storage/lmgr/proc.c
src/include/storage/lock.h
src/include/storage/proc.h

index 35a873de6bac4aa5425ce173fed1eaa5e64d1bb1..171356f3d5f3aef71f2494e587140ff419726e42 100644 (file)
@@ -398,6 +398,9 @@ LaunchParallelWorkers(ParallelContext *pcxt)
        if (pcxt->nworkers == 0)
                return;
 
+       /* We need to be a lock group leader. */
+       BecomeLockGroupLeader();
+
        /* If we do have workers, we'd better have a DSM segment. */
        Assert(pcxt->seg != NULL);
 
@@ -952,6 +955,19 @@ ParallelWorkerMain(Datum main_arg)
         * backend-local state to match the original backend.
         */
 
+       /*
+        * Join locking group.  We must do this before anything that could try
+        * to acquire a heavyweight lock, because any heavyweight locks acquired
+        * to this point could block either directly against the parallel group
+        * leader or against some process which in turn waits for a lock that
+        * conflicts with the parallel group leader, causing an undetected
+        * deadlock.  (If we can't join the lock group, the leader has gone away,
+        * so just exit quietly.)
+        */
+       if (!BecomeLockGroupMember(fps->parallel_master_pgproc,
+                                                          fps->parallel_master_pid))
+               return;
+
        /*
         * Load libraries that were loaded by original backend.  We want to do
         * this before restoring GUCs, because the libraries might define custom
index 76fc615cd594ca098b4470bb78d9729bb554d642..de6a05e0cfe7002e190f7b28c3c9b2dc6f185928 100644 (file)
@@ -35,6 +35,7 @@
 #include "access/transam.h"
 #include "access/twophase.h"
 #include "access/twophase_rmgr.h"
+#include "access/xact.h"
 #include "access/xlog.h"
 #include "miscadmin.h"
 #include "pg_trace.h"
@@ -706,6 +707,7 @@ LockAcquireExtended(const LOCKTAG *locktag,
        lockMethodTable = LockMethods[lockmethodid];
        if (lockmode <= 0 || lockmode > lockMethodTable->numLockModes)
                elog(ERROR, "unrecognized lock mode: %d", lockmode);
+       Assert(!IsInParallelMode() || MyProc->lockGroupLeader != NULL);
 
        if (RecoveryInProgress() && !InRecovery &&
                (locktag->locktag_type == LOCKTAG_OBJECT ||
@@ -1136,6 +1138,18 @@ SetupLockInTable(LockMethod lockMethodTable, PGPROC *proc,
        {
                uint32          partition = LockHashPartition(hashcode);
 
+               /*
+                * It might seem unsafe to access proclock->groupLeader without a lock,
+                * but it's not really.  Either we are initializing a proclock on our
+                * own behalf, in which case our group leader isn't changing because
+                * the group leader for a process can only ever be changed by the
+                * process itself; or else we are transferring a fast-path lock to the
+                * main lock table, in which case that process can't change it's lock
+                * group leader without first releasing all of its locks (and in
+                * particular the one we are currently transferring).
+                */
+               proclock->groupLeader = proc->lockGroupLeader != NULL ?
+                       proc->lockGroupLeader : proc;
                proclock->holdMask = 0;
                proclock->releaseMask = 0;
                /* Add proclock to appropriate lists */
@@ -1255,9 +1269,10 @@ RemoveLocalLock(LOCALLOCK *locallock)
  * NOTES:
  *             Here's what makes this complicated: one process's locks don't
  * conflict with one another, no matter what purpose they are held for
- * (eg, session and transaction locks do not conflict).
- * So, we must subtract off our own locks when determining whether the
- * requested new lock conflicts with those already held.
+ * (eg, session and transaction locks do not conflict).  Nor do the locks
+ * of one process in a lock group conflict with those of another process in
+ * the same group.  So, we must subtract off these locks when determining
+ * whether the requested new lock conflicts with those already held.
  */
 int
 LockCheckConflicts(LockMethod lockMethodTable,
@@ -1267,8 +1282,12 @@ LockCheckConflicts(LockMethod lockMethodTable,
 {
        int                     numLockModes = lockMethodTable->numLockModes;
        LOCKMASK        myLocks;
-       LOCKMASK        otherLocks;
+       int                     conflictMask = lockMethodTable->conflictTab[lockmode];
+       int                     conflictsRemaining[MAX_LOCKMODES];
+       int                     totalConflictsRemaining = 0;
        int                     i;
+       SHM_QUEUE  *procLocks;
+       PROCLOCK   *otherproclock;
 
        /*
         * first check for global conflicts: If no locks conflict with my request,
@@ -1279,40 +1298,91 @@ LockCheckConflicts(LockMethod lockMethodTable,
         * type of lock that conflicts with request.   Bitwise compare tells if
         * there is a conflict.
         */
-       if (!(lockMethodTable->conflictTab[lockmode] & lock->grantMask))
+       if (!(conflictMask & lock->grantMask))
        {
                PROCLOCK_PRINT("LockCheckConflicts: no conflict", proclock);
                return STATUS_OK;
        }
 
        /*
-        * Rats.  Something conflicts.  But it could still be my own lock. We have
-        * to construct a conflict mask that does not reflect our own locks, but
-        * only lock types held by other processes.
+        * Rats.  Something conflicts.  But it could still be my own lock, or
+        * a lock held by another member of my locking group.  First, figure out
+        * how many conflicts remain after subtracting out any locks I hold
+        * myself.
         */
        myLocks = proclock->holdMask;
-       otherLocks = 0;
        for (i = 1; i <= numLockModes; i++)
        {
-               int                     myHolding = (myLocks & LOCKBIT_ON(i)) ? 1 : 0;
+               if ((conflictMask & LOCKBIT_ON(i)) == 0)
+               {
+                       conflictsRemaining[i] = 0;
+                       continue;
+               }
+               conflictsRemaining[i] = lock->granted[i];
+               if (myLocks & LOCKBIT_ON(i))
+                       --conflictsRemaining[i];
+               totalConflictsRemaining += conflictsRemaining[i];
+       }
 
-               if (lock->granted[i] > myHolding)
-                       otherLocks |= LOCKBIT_ON(i);
+       /* If no conflicts remain, we get the lock. */
+       if (totalConflictsRemaining == 0)
+       {
+               PROCLOCK_PRINT("LockCheckConflicts: resolved (simple)", proclock);
+               return STATUS_OK;
+       }
+
+       /* If no group locking, it's definitely a conflict. */
+       if (proclock->groupLeader == MyProc && MyProc->lockGroupLeader == NULL)
+       {
+               Assert(proclock->tag.myProc == MyProc);
+               PROCLOCK_PRINT("LockCheckConflicts: conflicting (simple)",
+                                          proclock);
+               return STATUS_FOUND;
        }
 
        /*
-        * now check again for conflicts.  'otherLocks' describes the types of
-        * locks held by other processes.  If one of these conflicts with the kind
-        * of lock that I want, there is a conflict and I have to sleep.
+        * Locks held in conflicting modes by members of our own lock group are
+        * not real conflicts; we can subtract those out and see if we still have
+        * a conflict.  This is O(N) in the number of processes holding or awaiting
+        * locks on this object.  We could improve that by making the shared memory
+        * state more complex (and larger) but it doesn't seem worth it.
         */
-       if (!(lockMethodTable->conflictTab[lockmode] & otherLocks))
+       procLocks = &(lock->procLocks);
+       otherproclock = (PROCLOCK *)
+               SHMQueueNext(procLocks, procLocks, offsetof(PROCLOCK, lockLink));
+       while (otherproclock != NULL)
        {
-               /* no conflict. OK to get the lock */
-               PROCLOCK_PRINT("LockCheckConflicts: resolved", proclock);
-               return STATUS_OK;
+               if (proclock != otherproclock &&
+                       proclock->groupLeader == otherproclock->groupLeader &&
+                       (otherproclock->holdMask & conflictMask) != 0)
+               {
+                       int     intersectMask = otherproclock->holdMask & conflictMask;
+
+                       for (i = 1; i <= numLockModes; i++)
+                       {
+                               if ((intersectMask & LOCKBIT_ON(i)) != 0)
+                               {
+                                       if (conflictsRemaining[i] <= 0)
+                                               elog(PANIC, "proclocks held do not match lock");
+                                       conflictsRemaining[i]--;
+                                       totalConflictsRemaining--;
+                               }
+                       }
+
+                       if (totalConflictsRemaining == 0)
+                       {
+                               PROCLOCK_PRINT("LockCheckConflicts: resolved (group)",
+                                                          proclock);
+                               return STATUS_OK;
+                       }
+               }
+               otherproclock = (PROCLOCK *)
+                       SHMQueueNext(procLocks, &otherproclock->lockLink,
+                                                offsetof(PROCLOCK, lockLink));
        }
 
-       PROCLOCK_PRINT("LockCheckConflicts: conflicting", proclock);
+       /* Nope, it's a real conflict. */
+       PROCLOCK_PRINT("LockCheckConflicts: conflicting (group)", proclock);
        return STATUS_FOUND;
 }
 
@@ -3095,6 +3165,10 @@ PostPrepare_Locks(TransactionId xid)
        PROCLOCKTAG proclocktag;
        int                     partition;
 
+       /* Can't prepare a lock group follower. */
+       Assert(MyProc->lockGroupLeader == NULL ||
+                  MyProc->lockGroupLeader == MyProc);
+
        /* This is a critical section: any error means big trouble */
        START_CRIT_SECTION();
 
@@ -3238,6 +3312,13 @@ PostPrepare_Locks(TransactionId xid)
                        proclocktag.myLock = lock;
                        proclocktag.myProc = newproc;
 
+                       /*
+                        * Update groupLeader pointer to point to the new proc.  (We'd
+                        * better not be a member of somebody else's lock group!)
+                        */
+                       Assert(proclock->groupLeader == proclock->tag.myProc);
+                       proclock->groupLeader = newproc;
+
                        /*
                         * Update the proclock.  We should not find any existing entry for
                         * the same hash key, since there can be only one entry for any
@@ -3785,6 +3866,8 @@ lock_twophase_recover(TransactionId xid, uint16 info,
         */
        if (!found)
        {
+               Assert(proc->lockGroupLeader == NULL);
+               proclock->groupLeader = proc;
                proclock->holdMask = 0;
                proclock->releaseMask = 0;
                /* Add proclock to appropriate lists */
index bb10c1bed0077b9a06a6b5b4a2555c0bfe6601e8..a848d8b702d34fc1aaae95079c446433788d7550 100644 (file)
@@ -397,6 +397,11 @@ InitProcess(void)
        MyProc->backendLatestXid = InvalidTransactionId;
        pg_atomic_init_u32(&MyProc->nextClearXidElem, INVALID_PGPROCNO);
 
+       /* Check that group locking fields are in a proper initial state. */
+       Assert(MyProc->lockGroupLeaderIdentifier == 0);
+       Assert(MyProc->lockGroupLeader == NULL);
+       Assert(MyProc->lockGroupSize == 0);
+
        /*
         * Acquire ownership of the PGPROC's latch, so that we can use WaitLatch
         * on it.  That allows us to repoint the process latch, which so far
@@ -556,6 +561,11 @@ InitAuxiliaryProcess(void)
        OwnLatch(&MyProc->procLatch);
        SwitchToSharedLatch();
 
+       /* Check that group locking fields are in a proper initial state. */
+       Assert(MyProc->lockGroupLeaderIdentifier == 0);
+       Assert(MyProc->lockGroupLeader == NULL);
+       Assert(MyProc->lockGroupSize == 0);
+
        /*
         * We might be reusing a semaphore that belonged to a failed process. So
         * be careful and reinitialize its value here.  (This is not strictly
@@ -793,6 +803,33 @@ ProcKill(int code, Datum arg)
        if (MyReplicationSlot != NULL)
                ReplicationSlotRelease();
 
+       /* Detach from any lock group of which we are a member. */
+       if (MyProc->lockGroupLeader != NULL)
+       {
+               PGPROC     *leader = MyProc->lockGroupLeader;
+
+               LWLockAcquire(leader->backendLock, LW_EXCLUSIVE);
+               Assert(leader->lockGroupSize > 0);
+               if (--leader->lockGroupSize == 0)
+               {
+                       leader->lockGroupLeaderIdentifier = 0;
+                       leader->lockGroupLeader = NULL;
+                       if (leader != MyProc)
+                       {
+                               procgloballist = leader->procgloballist;
+
+                               /* Leader exited first; return its PGPROC. */
+                               SpinLockAcquire(ProcStructLock);
+                               leader->links.next = (SHM_QUEUE *) *procgloballist;
+                               *procgloballist = leader;
+                               SpinLockRelease(ProcStructLock);
+                       }
+               }
+               else if (leader != MyProc)
+                       MyProc->lockGroupLeader = NULL;
+               LWLockRelease(leader->backendLock);
+       }
+
        /*
         * Reset MyLatch to the process local one.  This is so that signal
         * handlers et al can continue using the latch after the shared latch
@@ -807,9 +844,20 @@ ProcKill(int code, Datum arg)
        procgloballist = proc->procgloballist;
        SpinLockAcquire(ProcStructLock);
 
-       /* Return PGPROC structure (and semaphore) to appropriate freelist */
-       proc->links.next = (SHM_QUEUE *) *procgloballist;
-       *procgloballist = proc;
+       /*
+        * If we're still a member of a locking group, that means we're a leader
+        * which has somehow exited before its children.  The last remaining child
+        * will release our PGPROC.  Otherwise, release it now.
+        */
+       if (proc->lockGroupLeader == NULL)
+       {
+               /* Since lockGroupLeader is NULL, lockGroupSize should be 0. */
+               Assert(proc->lockGroupSize == 0);
+
+               /* Return PGPROC structure (and semaphore) to appropriate freelist */
+               proc->links.next = (SHM_QUEUE *) *procgloballist;
+               *procgloballist = proc;
+       }
 
        /* Update shared estimate of spins_per_delay */
        ProcGlobal->spins_per_delay = update_spins_per_delay(ProcGlobal->spins_per_delay);
@@ -942,8 +990,30 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
        bool            allow_autovacuum_cancel = true;
        int                     myWaitStatus;
        PGPROC     *proc;
+       PGPROC     *leader = MyProc->lockGroupLeader;
        int                     i;
 
+       /*
+        * If group locking is in use, locks held my members of my locking group
+        * need to be included in myHeldLocks.
+        */
+       if (leader != NULL)
+       {
+               SHM_QUEUE  *procLocks = &(lock->procLocks);
+               PROCLOCK   *otherproclock;
+
+               otherproclock = (PROCLOCK *)
+                       SHMQueueNext(procLocks, procLocks, offsetof(PROCLOCK, lockLink));
+               while (otherproclock != NULL)
+               {
+                       if (otherproclock->groupLeader == leader)
+                               myHeldLocks |= otherproclock->holdMask;
+                       otherproclock = (PROCLOCK *)
+                               SHMQueueNext(procLocks, &otherproclock->lockLink,
+                                                        offsetof(PROCLOCK, lockLink));
+               }
+       }
+
        /*
         * Determine where to add myself in the wait queue.
         *
@@ -968,6 +1038,15 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
                proc = (PGPROC *) waitQueue->links.next;
                for (i = 0; i < waitQueue->size; i++)
                {
+                       /*
+                        * If we're part of the same locking group as this waiter, its
+                        * locks neither conflict with ours nor contribute to aheadRequsts.
+                        */
+                       if (leader != NULL && leader == proc->lockGroupLeader)
+                       {
+                               proc = (PGPROC *) proc->links.next;
+                               continue;
+                       }
                        /* Must he wait for me? */
                        if (lockMethodTable->conflictTab[proc->waitLockMode] & myHeldLocks)
                        {
@@ -1658,3 +1737,61 @@ ProcSendSignal(int pid)
                SetLatch(&proc->procLatch);
        }
 }
+
+/*
+ * BecomeLockGroupLeader - designate process as lock group leader
+ *
+ * Once this function has returned, other processes can join the lock group
+ * by calling BecomeLockGroupMember.
+ */
+void
+BecomeLockGroupLeader(void)
+{
+       /* If we already did it, we don't need to do it again. */
+       if (MyProc->lockGroupLeader == MyProc)
+               return;
+
+       /* We had better not be a follower. */
+       Assert(MyProc->lockGroupLeader == NULL);
+
+       /* Create single-member group, containing only ourselves. */
+       LWLockAcquire(MyProc->backendLock, LW_EXCLUSIVE);
+       MyProc->lockGroupLeader = MyProc;
+       MyProc->lockGroupLeaderIdentifier = MyProcPid;
+       MyProc->lockGroupSize = 1;
+       LWLockRelease(MyProc->backendLock);
+}
+
+/*
+ * BecomeLockGroupMember - designate process as lock group member
+ *
+ * This is pretty straightforward except for the possibility that the leader
+ * whose group we're trying to join might exit before we manage to do so;
+ * and the PGPROC might get recycled for an unrelated process.  To avoid
+ * that, we require the caller to pass the PID of the intended PGPROC as
+ * an interlock.  Returns true if we successfully join the intended lock
+ * group, and false if not.
+ */
+bool
+BecomeLockGroupMember(PGPROC *leader, int pid)
+{
+       bool    ok = false;
+
+       /* Group leader can't become member of group */
+       Assert(MyProc != leader);
+
+       /* PID must be valid. */
+       Assert(pid != 0);
+
+       /* Try to join the group. */
+       LWLockAcquire(leader->backendLock, LW_EXCLUSIVE);
+       if (leader->lockGroupLeaderIdentifier == pid)
+       {
+               ok = true;
+               leader->lockGroupSize++;
+               MyProc->lockGroupLeader = leader;
+       }
+       LWLockRelease(leader->backendLock);
+
+       return ok;
+}
index a9cd08c5276aae45b659b8c7d15532d593acacf7..fa81003700461b9a6c9bd50713f1ed74d52f835c 100644 (file)
@@ -346,6 +346,7 @@ typedef struct PROCLOCK
        PROCLOCKTAG tag;                        /* unique identifier of proclock object */
 
        /* data */
+       PGPROC     *groupLeader;        /* group leader, or NULL if no lock group */
        LOCKMASK        holdMask;               /* bitmask for lock types currently held */
        LOCKMASK        releaseMask;    /* bitmask for lock types to be released */
        SHM_QUEUE       lockLink;               /* list link in LOCK's list of proclocks */
@@ -457,7 +458,6 @@ typedef enum
                                                                 * worker */
 } DeadLockState;
 
-
 /*
  * The lockmgr's shared hash tables are partitioned to reduce contention.
  * To determine which partition a given locktag belongs to, compute the tag's
index 3d680171782029ae854c66116566ff8f5c1f21e1..591e4aea86fe94b0c06ffa7bc1f0973920d17d4b 100644 (file)
@@ -155,6 +155,10 @@ struct PGPROC
        bool            fpVXIDLock;             /* are we holding a fast-path VXID lock? */
        LocalTransactionId fpLocalTransactionId;        /* lxid for fast-path VXID
                                                                                                 * lock */
+       /* Support for lock groups. */
+       int                     lockGroupLeaderIdentifier;      /* MyProcPid, if I'm a leader */
+       PGPROC     *lockGroupLeader;    /* lock group leader, if I'm a follower */
+       int                     lockGroupSize;          /* # of members, if I'm a leader */
 };
 
 /* NOTE: "typedef struct PGPROC PGPROC" appears in storage/lock.h. */
@@ -272,4 +276,7 @@ extern void LockErrorCleanup(void);
 extern void ProcWaitForSignal(void);
 extern void ProcSendSignal(int pid);
 
+extern void BecomeLockGroupLeader(void);
+extern bool BecomeLockGroupMember(PGPROC *leader, int pid);
+
 #endif   /* PROC_H */