Use chash for buftable stuff.
authorRobert Haas <[email protected]>
Mon, 13 Oct 2014 20:26:45 +0000 (16:26 -0400)
committerRobert Haas <[email protected]>
Mon, 13 Oct 2014 20:26:45 +0000 (16:26 -0400)
src/backend/storage/buffer/buf_table.c
src/backend/storage/buffer/bufmgr.c
src/include/storage/buf_internals.h
src/include/storage/lwlock.h

index 7a38f2f15099520c7384725030282841ba34c83f..092cf8fe43727d3a50ece7e101afd43293c38303 100644 (file)
  */
 #include "postgres.h"
 
+#include "miscadmin.h"
 #include "storage/bufmgr.h"
 #include "storage/buf_internals.h"
+#include "utils/chash.h"
 
 
 /* entry for buffer lookup hashtable */
@@ -32,8 +34,13 @@ typedef struct
    int         id;             /* Associated buffer ID */
 } BufferLookupEnt;
 
-static HTAB *SharedBufHash;
-
+static CHashDescriptor SharedBufDescriptor = {
+   "buffer lookup table",
+   0,
+   sizeof(BufferLookupEnt),
+   sizeof(BufferTag)
+};
+static CHashTable SharedBufHash;
 
 /*
  * Estimate space needed for mapping hashtable
@@ -42,7 +49,13 @@ static HTAB *SharedBufHash;
 Size
 BufTableShmemSize(int size)
 {
-   return hash_estimate_size(size, sizeof(BufferLookupEnt));
+   if (SharedBufHash == NULL)
+   {
+       SharedBufDescriptor.capacity = size;
+       SharedBufHash = CHashBootstrap(&SharedBufDescriptor);
+   }
+
+   return CHashEstimateSize(SharedBufHash);
 }
 
 /*
@@ -52,59 +65,29 @@ BufTableShmemSize(int size)
 void
 InitBufTable(int size)
 {
-   HASHCTL     info;
-
-   /* assume no locking is needed yet */
-
-   /* BufferTag maps to Buffer */
-   info.keysize = sizeof(BufferTag);
-   info.entrysize = sizeof(BufferLookupEnt);
-   info.hash = tag_hash;
-   info.num_partitions = NUM_BUFFER_PARTITIONS;
-
-   SharedBufHash = ShmemInitHash("Shared Buffer Lookup Table",
-                                 size, size,
-                                 &info,
-                                 HASH_ELEM | HASH_FUNCTION | HASH_PARTITION);
-}
-
-/*
- * BufTableHashCode
- *     Compute the hash code associated with a BufferTag
- *
- * This must be passed to the lookup/insert/delete routines along with the
- * tag.  We do it like this because the callers need to know the hash code
- * in order to determine which buffer partition to lock, and we don't want
- * to do the hash computation twice (hash_any is a bit slow).
- */
-uint32
-BufTableHashCode(BufferTag *tagPtr)
-{
-   return get_hash_value(SharedBufHash, (void *) tagPtr);
+   if (SharedBufHash == NULL || !IsUnderPostmaster)
+   {
+       Assert(SharedBufDescriptor.capacity == 0 ||
+           SharedBufDescriptor.capacity == size);
+       SharedBufDescriptor.capacity = size;
+       SharedBufHash = CHashInitialize(SharedBufHash, &SharedBufDescriptor);
+   }
 }
 
 /*
  * BufTableLookup
  *     Lookup the given BufferTag; return buffer ID, or -1 if not found
- *
- * Caller must hold at least share lock on BufMappingLock for tag's partition
  */
 int
-BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
+BufTableLookup(BufferTag *tagPtr)
 {
-   BufferLookupEnt *result;
-
-   result = (BufferLookupEnt *)
-       hash_search_with_hash_value(SharedBufHash,
-                                   (void *) tagPtr,
-                                   hashcode,
-                                   HASH_FIND,
-                                   NULL);
+   BufferLookupEnt ent;
 
-   if (!result)
+   ent.key = *tagPtr;
+   if (!CHashSearch(SharedBufHash, &ent))
        return -1;
 
-   return result->id;
+   return ent.id;
 }
 
 /*
@@ -118,27 +101,20 @@ BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
  * Caller must hold exclusive lock on BufMappingLock for tag's partition
  */
 int
-BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id)
+BufTableInsert(BufferTag *tagPtr, int buf_id)
 {
-   BufferLookupEnt *result;
-   bool        found;
+   BufferLookupEnt ent;
+
+   ent.key = *tagPtr;
+   ent.id = buf_id;
 
    Assert(buf_id >= 0);        /* -1 is reserved for not-in-table */
    Assert(tagPtr->blockNum != P_NEW);  /* invalid tag */
 
-   result = (BufferLookupEnt *)
-       hash_search_with_hash_value(SharedBufHash,
-                                   (void *) tagPtr,
-                                   hashcode,
-                                   HASH_ENTER,
-                                   &found);
-
-   if (found)                  /* found something already in the table */
-       return result->id;
-
-   result->id = buf_id;
+   if (CHashInsert(SharedBufHash, &ent))
+       return -1;
 
-   return -1;
+   return ent.id;
 }
 
 /*
@@ -148,17 +124,8 @@ BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id)
  * Caller must hold exclusive lock on BufMappingLock for tag's partition
  */
 void
-BufTableDelete(BufferTag *tagPtr, uint32 hashcode)
+BufTableDelete(BufferTag *tagPtr)
 {
-   BufferLookupEnt *result;
-
-   result = (BufferLookupEnt *)
-       hash_search_with_hash_value(SharedBufHash,
-                                   (void *) tagPtr,
-                                   hashcode,
-                                   HASH_REMOVE,
-                                   NULL);
-
-   if (!result)                /* shouldn't happen */
+   if (!CHashDelete(SharedBufHash, tagPtr))
        elog(ERROR, "shared buffer hash table corrupted");
 }
index 45d1d61d95dc303a9621834289962aca80be7fc0..663fab7b45ab3efc44878aa43cbcbaf39c7c7763 100644 (file)
@@ -429,22 +429,14 @@ PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
    else
    {
        BufferTag   newTag;     /* identity of requested block */
-       uint32      newHash;    /* hash value for newTag */
-       LWLock     *newPartitionLock;   /* buffer partition lock for it */
        int         buf_id;
 
        /* create a tag so we can lookup the buffer */
        INIT_BUFFERTAG(newTag, reln->rd_smgr->smgr_rnode.node,
                       forkNum, blockNum);
 
-       /* determine its hash code and partition lock ID */
-       newHash = BufTableHashCode(&newTag);
-       newPartitionLock = BufMappingPartitionLock(newHash);
-
        /* see if the block is in the buffer pool already */
-       LWLockAcquire(newPartitionLock, LW_SHARED);
        buf_id = BufTableLookup(&newTag, newHash);
-       LWLockRelease(newPartitionLock);
 
        /* If not in buffers, initiate prefetch */
        if (buf_id < 0)
@@ -822,11 +814,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
            bool *foundPtr)
 {
    BufferTag   newTag;         /* identity of requested block */
-   uint32      newHash;        /* hash value for newTag */
-   LWLock     *newPartitionLock;       /* buffer partition lock for it */
    BufferTag   oldTag;         /* previous identity of selected buffer */
-   uint32      oldHash;        /* hash value for oldTag */
-   LWLock     *oldPartitionLock;       /* buffer partition lock for it */
    BufFlags    oldFlags;
    int         buf_id;
    volatile BufferDesc *buf;
@@ -835,29 +823,31 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
    /* create a tag so we can lookup the buffer */
    INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum);
 
-   /* determine its hash code and partition lock ID */
-   newHash = BufTableHashCode(&newTag);
-   newPartitionLock = BufMappingPartitionLock(newHash);
-
    /* see if the block is in the buffer pool already */
-   LWLockAcquire(newPartitionLock, LW_SHARED);
-   buf_id = BufTableLookup(&newTag, newHash);
+start:
+   buf_id = BufTableLookup(&newTag);
    if (buf_id >= 0)
    {
+       BufferDesc *foundbuf;
+
        /*
         * Found it.  Now, pin the buffer so no one can steal it from the
-        * buffer pool, and check to see if the correct data has been loaded
-        * into the buffer.
+        * buffer pool.
         */
-       buf = &BufferDescriptors[buf_id];
+       foundbuf = &BufferDescriptors[buf_id];
 
-       valid = PinBuffer(buf, strategy);
+       valid = PinBuffer(foundbuf, strategy);
 
-       /* Can release the mapping lock as soon as we've pinned it */
-       LWLockRelease(newPartitionLock);
+       /* Check whether someone recycled the buffer before we pinned it. */
+       if (!BUFFERTAGS_EQUAL(newTag, foundbuf->tag))
+       {
+           UnpinBuffer(foundbuf, true);
+           goto start;
+       }
 
        *foundPtr = TRUE;
 
+       /* Check to see if the correct data has been loaded into the buffer. */
        if (!valid)
        {
            /*
@@ -867,7 +857,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
             * own read attempt if the page is still not BM_VALID.
             * StartBufferIO does it all.
             */
-           if (StartBufferIO(buf, true))
+           if (StartBufferIO(foundbuf, true))
            {
                /*
                 * If we get here, previous attempts to read the buffer must
@@ -877,15 +867,9 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
            }
        }
 
-       return buf;
+       return foundbuf;
    }
 
-   /*
-    * Didn't find it in the buffer pool.  We'll have to initialize a new
-    * buffer.  Remember to unlock the mapping lock while doing the work.
-    */
-   LWLockRelease(newPartitionLock);
-
    /* Loop here in case we have to try another victim buffer */
    for (;;)
    {
@@ -986,42 +970,8 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
         */
        if (oldFlags & BM_TAG_VALID)
        {
-           /*
-            * Need to compute the old tag's hashcode and partition lock ID.
-            * XXX is it worth storing the hashcode in BufferDesc so we need
-            * not recompute it here?  Probably not.
-            */
+           /* Save old tag. */
            oldTag = buf->tag;
-           oldHash = BufTableHashCode(&oldTag);
-           oldPartitionLock = BufMappingPartitionLock(oldHash);
-
-           /*
-            * Must lock the lower-numbered partition first to avoid
-            * deadlocks.
-            */
-           if (oldPartitionLock < newPartitionLock)
-           {
-               LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
-               LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
-           }
-           else if (oldPartitionLock > newPartitionLock)
-           {
-               LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
-               LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
-           }
-           else
-           {
-               /* only one partition, only one lock */
-               LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
-           }
-       }
-       else
-       {
-           /* if it wasn't valid, we need only the new partition */
-           LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
-           /* these just keep the compiler quiet about uninit variables */
-           oldHash = 0;
-           oldPartitionLock = 0;
        }
 
        /*
@@ -1031,32 +981,34 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
         * Note that we have not yet removed the hashtable entry for the old
         * tag.
         */
-       buf_id = BufTableInsert(&newTag, newHash, buf->buf_id);
+enter:
+       buf_id = BufTableInsert(&newTag, buf->buf_id);
 
        if (buf_id >= 0)
        {
+           BufferDesc *foundbuf;
+
            /*
-            * Got a collision. Someone has already done what we were about to
-            * do. We'll just handle this as if it were found in the buffer
-            * pool in the first place.  First, give up the buffer we were
-            * planning to use.
+            * We've got a collision, apparently: it looks like someone else
+            * did what we were about to do.  We can handle this as if we had
+            * found the buffer in the pool in the first place, but we must
+            * recheck the buffer tag after pinning it, because it could still
+            * get renamed under us.
+            */
+           foundbuf = &BufferDescriptors[buf_id];
+           valid = PinBuffer(foundbuf, strategy);
+           if (memcmp(&newTag, &foundbuf->tag, sizeof(BufferTag)) == 0)
+           {
+               UnpinBuffer(foundbuf, true);
+               goto enter;
+           }
+
+           /*
+            * Collision confirmed.  Give up the buffer we were planning to
+            * use.
             */
            UnpinBuffer(buf, true);
 
-           /* Can give up that buffer's mapping partition lock now */
-           if ((oldFlags & BM_TAG_VALID) &&
-               oldPartitionLock != newPartitionLock)
-               LWLockRelease(oldPartitionLock);
-
-           /* remaining code should match code at top of routine */
-
-           buf = &BufferDescriptors[buf_id];
-
-           valid = PinBuffer(buf, strategy);
-
-           /* Can release the mapping lock as soon as we've pinned it */
-           LWLockRelease(newPartitionLock);
-
            *foundPtr = TRUE;
 
            if (!valid)
@@ -1068,7 +1020,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
                 * then set up our own read attempt if the page is still not
                 * BM_VALID.  StartBufferIO does it all.
                 */
-               if (StartBufferIO(buf, true))
+               if (StartBufferIO(foundbuf, true))
                {
                    /*
                     * If we get here, previous attempts to read the buffer
@@ -1078,7 +1030,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
                }
            }
 
-           return buf;
+           return foundbuf;
        }
 
        /*
@@ -1097,11 +1049,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
            break;
 
        UnlockBufHdr(buf);
-       BufTableDelete(&newTag, newHash);
-       if ((oldFlags & BM_TAG_VALID) &&
-           oldPartitionLock != newPartitionLock)
-           LWLockRelease(oldPartitionLock);
-       LWLockRelease(newPartitionLock);
+       BufTableDelete(&newTag);
        UnpinBuffer(buf, true);
    }
 
@@ -1124,13 +1072,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
    UnlockBufHdr(buf);
 
    if (oldFlags & BM_TAG_VALID)
-   {
-       BufTableDelete(&oldTag, oldHash);
-       if (oldPartitionLock != newPartitionLock)
-           LWLockRelease(oldPartitionLock);
-   }
-
-   LWLockRelease(newPartitionLock);
+       BufTableDelete(&oldTag);
 
    /*
     * Buffer contents are currently invalid.  Try to get the io_in_progress
@@ -1166,42 +1108,11 @@ static void
 InvalidateBuffer(volatile BufferDesc *buf)
 {
    BufferTag   oldTag;
-   uint32      oldHash;        /* hash value for oldTag */
-   LWLock     *oldPartitionLock;       /* buffer partition lock for it */
    BufFlags    oldFlags;
 
    /* Save the original buffer tag before dropping the spinlock */
    oldTag = buf->tag;
 
-   UnlockBufHdr(buf);
-
-   /*
-    * Need to compute the old tag's hashcode and partition lock ID. XXX is it
-    * worth storing the hashcode in BufferDesc so we need not recompute it
-    * here?  Probably not.
-    */
-   oldHash = BufTableHashCode(&oldTag);
-   oldPartitionLock = BufMappingPartitionLock(oldHash);
-
-retry:
-
-   /*
-    * Acquire exclusive mapping lock in preparation for changing the buffer's
-    * association.
-    */
-   LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
-
-   /* Re-lock the buffer header */
-   LockBufHdr(buf);
-
-   /* If it's changed while we were waiting for lock, do nothing */
-   if (!BUFFERTAGS_EQUAL(buf->tag, oldTag))
-   {
-       UnlockBufHdr(buf);
-       LWLockRelease(oldPartitionLock);
-       return;
-   }
-
    /*
     * We assume the only reason for it to be pinned is that someone else is
     * flushing the page out.  Wait for them to finish.  (This could be an
@@ -1211,15 +1122,21 @@ retry:
     * yet done StartBufferIO, WaitIO will fall through and we'll effectively
     * be busy-looping here.)
     */
-   if (buf->refcount != 0)
+   while (buf->refcount != 0)
    {
        UnlockBufHdr(buf);
-       LWLockRelease(oldPartitionLock);
        /* safety check: should definitely not be our *own* pin */
        if (GetPrivateRefCount(buf->buf_id) > 0)
            elog(ERROR, "buffer is pinned in InvalidateBuffer");
        WaitIO(buf);
-       goto retry;
+       LockBufHdr(buf);
+
+       /* If it's changed while we were waiting for lock, do nothing */
+       if (!BUFFERTAGS_EQUAL(buf->tag, oldTag))
+       {
+           UnlockBufHdr(buf);
+           return;
+       }
    }
 
    /*
@@ -1237,12 +1154,7 @@ retry:
     * Remove the buffer from the lookup hashtable, if it was in there.
     */
    if (oldFlags & BM_TAG_VALID)
-       BufTableDelete(&oldTag, oldHash);
-
-   /*
-    * Done with mapping lock.
-    */
-   LWLockRelease(oldPartitionLock);
+       BufTableDelete(&oldTag);
 
    /*
     * Insert the buffer at the head of the list of free buffers.
index 0e69b633c3fad45742bca8ee7507baf87efe3823..4c6fac8052a9fbc9ed3661a1c7e195305ab7455e 100644 (file)
@@ -95,20 +95,6 @@ typedef struct buftag
    (a).forkNum == (b).forkNum \
 )
 
-/*
- * The shared buffer mapping table is partitioned to reduce contention.
- * To determine which partition lock a given tag requires, compute the tag's
- * hash code with BufTableHashCode(), then apply BufMappingPartitionLock().
- * NB: NUM_BUFFER_PARTITIONS must be a power of 2!
- */
-#define BufTableHashPartition(hashcode) \
-   ((hashcode) % NUM_BUFFER_PARTITIONS)
-#define BufMappingPartitionLock(hashcode) \
-   (&MainLWLockArray[BUFFER_MAPPING_LWLOCK_OFFSET + \
-       BufTableHashPartition(hashcode)].lock)
-#define BufMappingPartitionLockByIndex(i) \
-   (&MainLWLockArray[BUFFER_MAPPING_LWLOCK_OFFSET + (i)].lock)
-
 /*
  * BufferDesc -- shared descriptor/state data for a single shared buffer.
  *
@@ -200,9 +186,9 @@ extern void StrategyInitialize(bool init);
 extern Size BufTableShmemSize(int size);
 extern void InitBufTable(int size);
 extern uint32 BufTableHashCode(BufferTag *tagPtr);
-extern int BufTableLookup(BufferTag *tagPtr, uint32 hashcode);
-extern int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id);
-extern void BufTableDelete(BufferTag *tagPtr, uint32 hashcode);
+extern int BufTableLookup(BufferTag *tagPtr);
+extern int BufTableInsert(BufferTag *tagPtr, int buf_id);
+extern void BufTableDelete(BufferTag *tagPtr);
 
 /* localbuf.c */
 extern void LocalPrefetchBuffer(SMgrRelation smgr, ForkNumber forkNum,
index 02c8f1aa8c610470f9607d2df06cbbc7341811e5..f98be4d4827a97020c9a8b4a965bed5d458baeac 100644 (file)
@@ -136,7 +136,7 @@ extern PGDLLIMPORT LWLockPadded *MainLWLockArray;
  */
 
 /* Number of partitions of the shared buffer mapping hashtable */
-#define NUM_BUFFER_PARTITIONS  128
+#define NUM_BUFFER_PARTITIONS  0
 
 /* Number of partitions the shared lock tables are divided into */
 #define LOG2_NUM_LOCK_PARTITIONS  4