Use chash for buftable stuff.
authorRobert Haas <rhaas@postgresql.org>
Mon, 13 Oct 2014 20:26:45 +0000 (16:26 -0400)
committerRobert Haas <rhaas@postgresql.org>
Tue, 27 Jan 2015 02:39:01 +0000 (02:39 +0000)
src/backend/storage/buffer/buf_table.c
src/backend/storage/buffer/bufmgr.c
src/include/storage/buf_internals.h
src/include/storage/lwlock.h

index 6ed47d5142de68ec71aa2bd031b453159d02cc8b..0840afaa5da25de72e7f755bb289c415ca579c17 100644 (file)
  */
 #include "postgres.h"
 
+#include "miscadmin.h"
 #include "storage/bufmgr.h"
 #include "storage/buf_internals.h"
+#include "utils/chash.h"
 
 
 /* entry for buffer lookup hashtable */
@@ -32,8 +34,13 @@ typedef struct
        int                     id;                             /* Associated buffer ID */
 } BufferLookupEnt;
 
-static HTAB *SharedBufHash;
-
+static CHashDescriptor SharedBufDescriptor = {
+       "buffer lookup table",
+       0,
+       sizeof(BufferLookupEnt),
+       sizeof(BufferTag)
+};
+static CHashTable SharedBufHash;
 
 /*
  * Estimate space needed for mapping hashtable
@@ -42,7 +49,13 @@ static HTAB *SharedBufHash;
 Size
 BufTableShmemSize(int size)
 {
-       return hash_estimate_size(size, sizeof(BufferLookupEnt));
+       if (SharedBufHash == NULL)
+       {
+               SharedBufDescriptor.capacity = size;
+               SharedBufHash = CHashBootstrap(&SharedBufDescriptor);
+       }
+
+       return CHashEstimateSize(SharedBufHash);
 }
 
 /*
@@ -52,58 +65,29 @@ BufTableShmemSize(int size)
 void
 InitBufTable(int size)
 {
-       HASHCTL         info;
-
-       /* assume no locking is needed yet */
-
-       /* BufferTag maps to Buffer */
-       info.keysize = sizeof(BufferTag);
-       info.entrysize = sizeof(BufferLookupEnt);
-       info.num_partitions = NUM_BUFFER_PARTITIONS;
-
-       SharedBufHash = ShmemInitHash("Shared Buffer Lookup Table",
-                                                                 size, size,
-                                                                 &info,
-                                                                 HASH_ELEM | HASH_BLOBS | HASH_PARTITION);
-}
-
-/*
- * BufTableHashCode
- *             Compute the hash code associated with a BufferTag
- *
- * This must be passed to the lookup/insert/delete routines along with the
- * tag.  We do it like this because the callers need to know the hash code
- * in order to determine which buffer partition to lock, and we don't want
- * to do the hash computation twice (hash_any is a bit slow).
- */
-uint32
-BufTableHashCode(BufferTag *tagPtr)
-{
-       return get_hash_value(SharedBufHash, (void *) tagPtr);
+       if (SharedBufHash == NULL || !IsUnderPostmaster)
+       {
+               Assert(SharedBufDescriptor.capacity == 0 ||
+                       SharedBufDescriptor.capacity == size);
+               SharedBufDescriptor.capacity = size;
+               SharedBufHash = CHashInitialize(SharedBufHash, &SharedBufDescriptor);
+       }
 }
 
 /*
  * BufTableLookup
  *             Lookup the given BufferTag; return buffer ID, or -1 if not found
- *
- * Caller must hold at least share lock on BufMappingLock for tag's partition
  */
 int
-BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
+BufTableLookup(BufferTag *tagPtr)
 {
-       BufferLookupEnt *result;
-
-       result = (BufferLookupEnt *)
-               hash_search_with_hash_value(SharedBufHash,
-                                                                       (void *) tagPtr,
-                                                                       hashcode,
-                                                                       HASH_FIND,
-                                                                       NULL);
+       BufferLookupEnt ent;
 
-       if (!result)
+       ent.key = *tagPtr;
+       if (!CHashSearch(SharedBufHash, &ent))
                return -1;
 
-       return result->id;
+       return ent.id;
 }
 
 /*
@@ -117,27 +101,20 @@ BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
  * Caller must hold exclusive lock on BufMappingLock for tag's partition
  */
 int
-BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id)
+BufTableInsert(BufferTag *tagPtr, int buf_id)
 {
-       BufferLookupEnt *result;
-       bool            found;
+       BufferLookupEnt ent;
+
+       ent.key = *tagPtr;
+       ent.id = buf_id;
 
        Assert(buf_id >= 0);            /* -1 is reserved for not-in-table */
        Assert(tagPtr->blockNum != P_NEW);      /* invalid tag */
 
-       result = (BufferLookupEnt *)
-               hash_search_with_hash_value(SharedBufHash,
-                                                                       (void *) tagPtr,
-                                                                       hashcode,
-                                                                       HASH_ENTER,
-                                                                       &found);
-
-       if (found)                                      /* found something already in the table */
-               return result->id;
-
-       result->id = buf_id;
+       if (CHashInsert(SharedBufHash, &ent))
+               return -1;
 
-       return -1;
+       return ent.id;
 }
 
 /*
@@ -147,17 +124,8 @@ BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id)
  * Caller must hold exclusive lock on BufMappingLock for tag's partition
  */
 void
-BufTableDelete(BufferTag *tagPtr, uint32 hashcode)
+BufTableDelete(BufferTag *tagPtr)
 {
-       BufferLookupEnt *result;
-
-       result = (BufferLookupEnt *)
-               hash_search_with_hash_value(SharedBufHash,
-                                                                       (void *) tagPtr,
-                                                                       hashcode,
-                                                                       HASH_REMOVE,
-                                                                       NULL);
-
-       if (!result)                            /* shouldn't happen */
+       if (!CHashDelete(SharedBufHash, tagPtr))
                elog(ERROR, "shared buffer hash table corrupted");
 }
index 7eb2d22fa16169cc6f4e4c93fbb9223cab1f4553..f98b6f255b5f572c8b7b915150f2d6a64c954dff 100644 (file)
@@ -445,22 +445,14 @@ PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
        else
        {
                BufferTag       newTag;         /* identity of requested block */
-               uint32          newHash;        /* hash value for newTag */
-               LWLock     *newPartitionLock;   /* buffer partition lock for it */
                int                     buf_id;
 
                /* create a tag so we can lookup the buffer */
                INIT_BUFFERTAG(newTag, reln->rd_smgr->smgr_rnode.node,
                                           forkNum, blockNum);
 
-               /* determine its hash code and partition lock ID */
-               newHash = BufTableHashCode(&newTag);
-               newPartitionLock = BufMappingPartitionLock(newHash);
-
                /* see if the block is in the buffer pool already */
-               LWLockAcquire(newPartitionLock, LW_SHARED);
                buf_id = BufTableLookup(&newTag, newHash);
-               LWLockRelease(newPartitionLock);
 
                /* If not in buffers, initiate prefetch */
                if (buf_id < 0)
@@ -871,11 +863,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
                        bool *foundPtr)
 {
        BufferTag       newTag;                 /* identity of requested block */
-       uint32          newHash;                /* hash value for newTag */
-       LWLock     *newPartitionLock;           /* buffer partition lock for it */
        BufferTag       oldTag;                 /* previous identity of selected buffer */
-       uint32          oldHash;                /* hash value for oldTag */
-       LWLock     *oldPartitionLock;           /* buffer partition lock for it */
        BufFlags        oldFlags;
        int                     buf_id;
        volatile BufferDesc *buf;
@@ -884,29 +872,31 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
        /* create a tag so we can lookup the buffer */
        INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum);
 
-       /* determine its hash code and partition lock ID */
-       newHash = BufTableHashCode(&newTag);
-       newPartitionLock = BufMappingPartitionLock(newHash);
-
        /* see if the block is in the buffer pool already */
-       LWLockAcquire(newPartitionLock, LW_SHARED);
-       buf_id = BufTableLookup(&newTag, newHash);
+start:
+       buf_id = BufTableLookup(&newTag);
        if (buf_id >= 0)
        {
+               BufferDesc *foundbuf;
+
                /*
                 * Found it.  Now, pin the buffer so no one can steal it from the
-                * buffer pool, and check to see if the correct data has been loaded
-                * into the buffer.
+                * buffer pool.
                 */
-               buf = &BufferDescriptors[buf_id];
+               foundbuf = &BufferDescriptors[buf_id];
 
-               valid = PinBuffer(buf, strategy);
+               valid = PinBuffer(foundbuf, strategy);
 
-               /* Can release the mapping lock as soon as we've pinned it */
-               LWLockRelease(newPartitionLock);
+               /* Check whether someone recycled the buffer before we pinned it. */
+               if (!BUFFERTAGS_EQUAL(newTag, foundbuf->tag))
+               {
+                       UnpinBuffer(foundbuf, true);
+                       goto start;
+               }
 
                *foundPtr = TRUE;
 
+               /* Check to see if the correct data has been loaded into the buffer. */
                if (!valid)
                {
                        /*
@@ -916,7 +906,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
                         * own read attempt if the page is still not BM_VALID.
                         * StartBufferIO does it all.
                         */
-                       if (StartBufferIO(buf, true))
+                       if (StartBufferIO(foundbuf, true))
                        {
                                /*
                                 * If we get here, previous attempts to read the buffer must
@@ -926,15 +916,9 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
                        }
                }
 
-               return buf;
+               return foundbuf;
        }
 
-       /*
-        * Didn't find it in the buffer pool.  We'll have to initialize a new
-        * buffer.  Remember to unlock the mapping lock while doing the work.
-        */
-       LWLockRelease(newPartitionLock);
-
        /* Loop here in case we have to try another victim buffer */
        for (;;)
        {
@@ -1041,42 +1025,8 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
                 */
                if (oldFlags & BM_TAG_VALID)
                {
-                       /*
-                        * Need to compute the old tag's hashcode and partition lock ID.
-                        * XXX is it worth storing the hashcode in BufferDesc so we need
-                        * not recompute it here?  Probably not.
-                        */
+                       /* Save old tag. */
                        oldTag = buf->tag;
-                       oldHash = BufTableHashCode(&oldTag);
-                       oldPartitionLock = BufMappingPartitionLock(oldHash);
-
-                       /*
-                        * Must lock the lower-numbered partition first to avoid
-                        * deadlocks.
-                        */
-                       if (oldPartitionLock < newPartitionLock)
-                       {
-                               LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
-                               LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
-                       }
-                       else if (oldPartitionLock > newPartitionLock)
-                       {
-                               LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
-                               LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
-                       }
-                       else
-                       {
-                               /* only one partition, only one lock */
-                               LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
-                       }
-               }
-               else
-               {
-                       /* if it wasn't valid, we need only the new partition */
-                       LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
-                       /* these just keep the compiler quiet about uninit variables */
-                       oldHash = 0;
-                       oldPartitionLock = 0;
                }
 
                /*
@@ -1086,32 +1036,34 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
                 * Note that we have not yet removed the hashtable entry for the old
                 * tag.
                 */
-               buf_id = BufTableInsert(&newTag, newHash, buf->buf_id);
+enter:
+               buf_id = BufTableInsert(&newTag, buf->buf_id);
 
                if (buf_id >= 0)
                {
+                       BufferDesc *foundbuf;
+
                        /*
-                        * Got a collision. Someone has already done what we were about to
-                        * do. We'll just handle this as if it were found in the buffer
-                        * pool in the first place.  First, give up the buffer we were
-                        * planning to use.
+                        * We've got a collision, apparently: it looks like someone else
+                        * did what we were about to do.  We can handle this as if we had
+                        * found the buffer in the pool in the first place, but we must
+                        * recheck the buffer tag after pinning it, because it could still
+                        * get renamed under us.
+                        */
+                       foundbuf = &BufferDescriptors[buf_id];
+                       valid = PinBuffer(foundbuf, strategy);
+                       if (memcmp(&newTag, &foundbuf->tag, sizeof(BufferTag)) == 0)
+                       {
+                               UnpinBuffer(foundbuf, true);
+                               goto enter;
+                       }
+
+                       /*
+                        * Collision confirmed.  Give up the buffer we were planning to
+                        * use.
                         */
                        UnpinBuffer(buf, true);
 
-                       /* Can give up that buffer's mapping partition lock now */
-                       if ((oldFlags & BM_TAG_VALID) &&
-                               oldPartitionLock != newPartitionLock)
-                               LWLockRelease(oldPartitionLock);
-
-                       /* remaining code should match code at top of routine */
-
-                       buf = &BufferDescriptors[buf_id];
-
-                       valid = PinBuffer(buf, strategy);
-
-                       /* Can release the mapping lock as soon as we've pinned it */
-                       LWLockRelease(newPartitionLock);
-
                        *foundPtr = TRUE;
 
                        if (!valid)
@@ -1123,7 +1075,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
                                 * then set up our own read attempt if the page is still not
                                 * BM_VALID.  StartBufferIO does it all.
                                 */
-                               if (StartBufferIO(buf, true))
+                               if (StartBufferIO(foundbuf, true))
                                {
                                        /*
                                         * If we get here, previous attempts to read the buffer
@@ -1133,7 +1085,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
                                }
                        }
 
-                       return buf;
+                       return foundbuf;
                }
 
                /*
@@ -1152,11 +1104,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
                        break;
 
                UnlockBufHdr(buf);
-               BufTableDelete(&newTag, newHash);
-               if ((oldFlags & BM_TAG_VALID) &&
-                       oldPartitionLock != newPartitionLock)
-                       LWLockRelease(oldPartitionLock);
-               LWLockRelease(newPartitionLock);
+               BufTableDelete(&newTag);
                UnpinBuffer(buf, true);
        }
 
@@ -1179,13 +1127,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
        UnlockBufHdr(buf);
 
        if (oldFlags & BM_TAG_VALID)
-       {
-               BufTableDelete(&oldTag, oldHash);
-               if (oldPartitionLock != newPartitionLock)
-                       LWLockRelease(oldPartitionLock);
-       }
-
-       LWLockRelease(newPartitionLock);
+               BufTableDelete(&oldTag);
 
        /*
         * Buffer contents are currently invalid.  Try to get the io_in_progress
@@ -1221,42 +1163,11 @@ static void
 InvalidateBuffer(volatile BufferDesc *buf)
 {
        BufferTag       oldTag;
-       uint32          oldHash;                /* hash value for oldTag */
-       LWLock     *oldPartitionLock;           /* buffer partition lock for it */
        BufFlags        oldFlags;
 
        /* Save the original buffer tag before dropping the spinlock */
        oldTag = buf->tag;
 
-       UnlockBufHdr(buf);
-
-       /*
-        * Need to compute the old tag's hashcode and partition lock ID. XXX is it
-        * worth storing the hashcode in BufferDesc so we need not recompute it
-        * here?  Probably not.
-        */
-       oldHash = BufTableHashCode(&oldTag);
-       oldPartitionLock = BufMappingPartitionLock(oldHash);
-
-retry:
-
-       /*
-        * Acquire exclusive mapping lock in preparation for changing the buffer's
-        * association.
-        */
-       LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
-
-       /* Re-lock the buffer header */
-       LockBufHdr(buf);
-
-       /* If it's changed while we were waiting for lock, do nothing */
-       if (!BUFFERTAGS_EQUAL(buf->tag, oldTag))
-       {
-               UnlockBufHdr(buf);
-               LWLockRelease(oldPartitionLock);
-               return;
-       }
-
        /*
         * We assume the only reason for it to be pinned is that someone else is
         * flushing the page out.  Wait for them to finish.  (This could be an
@@ -1266,15 +1177,21 @@ retry:
         * yet done StartBufferIO, WaitIO will fall through and we'll effectively
         * be busy-looping here.)
         */
-       if (buf->refcount != 0)
+       while (buf->refcount != 0)
        {
                UnlockBufHdr(buf);
-               LWLockRelease(oldPartitionLock);
                /* safety check: should definitely not be our *own* pin */
                if (GetPrivateRefCount(buf->buf_id) > 0)
                        elog(ERROR, "buffer is pinned in InvalidateBuffer");
                WaitIO(buf);
-               goto retry;
+               LockBufHdr(buf);
+
+               /* If it's changed while we were waiting for lock, do nothing */
+               if (!BUFFERTAGS_EQUAL(buf->tag, oldTag))
+               {
+                       UnlockBufHdr(buf);
+                       return;
+               }
        }
 
        /*
@@ -1292,12 +1209,7 @@ retry:
         * Remove the buffer from the lookup hashtable, if it was in there.
         */
        if (oldFlags & BM_TAG_VALID)
-               BufTableDelete(&oldTag, oldHash);
-
-       /*
-        * Done with mapping lock.
-        */
-       LWLockRelease(oldPartitionLock);
+               BufTableDelete(&oldTag);
 
        /*
         * Insert the buffer at the head of the list of free buffers.
index 9b8ace54da8af82bce6f9f1b77c7db302a1f1458..4b1696cf778444f26316e844beb1f75a24bc91ad 100644 (file)
@@ -95,20 +95,6 @@ typedef struct buftag
        (a).forkNum == (b).forkNum \
 )
 
-/*
- * The shared buffer mapping table is partitioned to reduce contention.
- * To determine which partition lock a given tag requires, compute the tag's
- * hash code with BufTableHashCode(), then apply BufMappingPartitionLock().
- * NB: NUM_BUFFER_PARTITIONS must be a power of 2!
- */
-#define BufTableHashPartition(hashcode) \
-       ((hashcode) % NUM_BUFFER_PARTITIONS)
-#define BufMappingPartitionLock(hashcode) \
-       (&MainLWLockArray[BUFFER_MAPPING_LWLOCK_OFFSET + \
-               BufTableHashPartition(hashcode)].lock)
-#define BufMappingPartitionLockByIndex(i) \
-       (&MainLWLockArray[BUFFER_MAPPING_LWLOCK_OFFSET + (i)].lock)
-
 /*
  *     BufferDesc -- shared descriptor/state data for a single shared buffer.
  *
@@ -200,9 +186,9 @@ extern void StrategyInitialize(bool init);
 extern Size BufTableShmemSize(int size);
 extern void InitBufTable(int size);
 extern uint32 BufTableHashCode(BufferTag *tagPtr);
-extern int     BufTableLookup(BufferTag *tagPtr, uint32 hashcode);
-extern int     BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id);
-extern void BufTableDelete(BufferTag *tagPtr, uint32 hashcode);
+extern int     BufTableLookup(BufferTag *tagPtr);
+extern int     BufTableInsert(BufferTag *tagPtr, int buf_id);
+extern void BufTableDelete(BufferTag *tagPtr);
 
 /* localbuf.c */
 extern void LocalPrefetchBuffer(SMgrRelation smgr, ForkNumber forkNum,
index e3c2efc1f3de61bd736d764f302876730113e5bf..1b37447b36c227e1ee2f692b8450cadf22df4740 100644 (file)
@@ -144,7 +144,7 @@ extern PGDLLIMPORT LWLockPadded *MainLWLockArray;
  */
 
 /* Number of partitions of the shared buffer mapping hashtable */
-#define NUM_BUFFER_PARTITIONS  128
+#define NUM_BUFFER_PARTITIONS  0
 
 /* Number of partitions the shared lock tables are divided into */
 #define LOG2_NUM_LOCK_PARTITIONS  4