*/
#include "postgres.h"
+#include "miscadmin.h"
#include "storage/bufmgr.h"
#include "storage/buf_internals.h"
+#include "utils/chash.h"
/* entry for buffer lookup hashtable */
int id; /* Associated buffer ID */
} BufferLookupEnt;
-static HTAB *SharedBufHash;
-
+static CHashDescriptor SharedBufDescriptor = {
+ "buffer lookup table",
+ 0,
+ sizeof(BufferLookupEnt),
+ sizeof(BufferTag)
+};
+static CHashTable SharedBufHash;
/*
* Estimate space needed for mapping hashtable
Size
BufTableShmemSize(int size)
{
- return hash_estimate_size(size, sizeof(BufferLookupEnt));
+ if (SharedBufHash == NULL)
+ {
+ SharedBufDescriptor.capacity = size;
+ SharedBufHash = CHashBootstrap(&SharedBufDescriptor);
+ }
+
+ return CHashEstimateSize(SharedBufHash);
}
/*
void
InitBufTable(int size)
{
- HASHCTL info;
-
- /* assume no locking is needed yet */
-
- /* BufferTag maps to Buffer */
- info.keysize = sizeof(BufferTag);
- info.entrysize = sizeof(BufferLookupEnt);
- info.num_partitions = NUM_BUFFER_PARTITIONS;
-
- SharedBufHash = ShmemInitHash("Shared Buffer Lookup Table",
- size, size,
- &info,
- HASH_ELEM | HASH_BLOBS | HASH_PARTITION);
-}
-
-/*
- * BufTableHashCode
- * Compute the hash code associated with a BufferTag
- *
- * This must be passed to the lookup/insert/delete routines along with the
- * tag. We do it like this because the callers need to know the hash code
- * in order to determine which buffer partition to lock, and we don't want
- * to do the hash computation twice (hash_any is a bit slow).
- */
-uint32
-BufTableHashCode(BufferTag *tagPtr)
-{
- return get_hash_value(SharedBufHash, (void *) tagPtr);
+ if (SharedBufHash == NULL || !IsUnderPostmaster)
+ {
+ Assert(SharedBufDescriptor.capacity == 0 ||
+ SharedBufDescriptor.capacity == size);
+ SharedBufDescriptor.capacity = size;
+ SharedBufHash = CHashInitialize(SharedBufHash, &SharedBufDescriptor);
+ }
}
/*
* BufTableLookup
* Lookup the given BufferTag; return buffer ID, or -1 if not found
- *
- * Caller must hold at least share lock on BufMappingLock for tag's partition
*/
int
-BufTableLookup(BufferTag *tagPtr, uint32 hashcode)
+BufTableLookup(BufferTag *tagPtr)
{
- BufferLookupEnt *result;
-
- result = (BufferLookupEnt *)
- hash_search_with_hash_value(SharedBufHash,
- (void *) tagPtr,
- hashcode,
- HASH_FIND,
- NULL);
+ BufferLookupEnt ent;
- if (!result)
+ ent.key = *tagPtr;
+ if (!CHashSearch(SharedBufHash, &ent))
return -1;
- return result->id;
+ return ent.id;
}
/*
* Caller must hold exclusive lock on BufMappingLock for tag's partition
*/
int
-BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id)
+BufTableInsert(BufferTag *tagPtr, int buf_id)
{
- BufferLookupEnt *result;
- bool found;
+ BufferLookupEnt ent;
+
+ ent.key = *tagPtr;
+ ent.id = buf_id;
Assert(buf_id >= 0); /* -1 is reserved for not-in-table */
Assert(tagPtr->blockNum != P_NEW); /* invalid tag */
- result = (BufferLookupEnt *)
- hash_search_with_hash_value(SharedBufHash,
- (void *) tagPtr,
- hashcode,
- HASH_ENTER,
- &found);
-
- if (found) /* found something already in the table */
- return result->id;
-
- result->id = buf_id;
+ if (CHashInsert(SharedBufHash, &ent))
+ return -1;
- return -1;
+ return ent.id;
}
/*
* Caller must hold exclusive lock on BufMappingLock for tag's partition
*/
void
-BufTableDelete(BufferTag *tagPtr, uint32 hashcode)
+BufTableDelete(BufferTag *tagPtr)
{
- BufferLookupEnt *result;
-
- result = (BufferLookupEnt *)
- hash_search_with_hash_value(SharedBufHash,
- (void *) tagPtr,
- hashcode,
- HASH_REMOVE,
- NULL);
-
- if (!result) /* shouldn't happen */
+ if (!CHashDelete(SharedBufHash, tagPtr))
elog(ERROR, "shared buffer hash table corrupted");
}
else
{
BufferTag newTag; /* identity of requested block */
- uint32 newHash; /* hash value for newTag */
- LWLock *newPartitionLock; /* buffer partition lock for it */
int buf_id;
/* create a tag so we can lookup the buffer */
INIT_BUFFERTAG(newTag, reln->rd_smgr->smgr_rnode.node,
forkNum, blockNum);
- /* determine its hash code and partition lock ID */
- newHash = BufTableHashCode(&newTag);
- newPartitionLock = BufMappingPartitionLock(newHash);
-
/* see if the block is in the buffer pool already */
- LWLockAcquire(newPartitionLock, LW_SHARED);
buf_id = BufTableLookup(&newTag, newHash);
- LWLockRelease(newPartitionLock);
/* If not in buffers, initiate prefetch */
if (buf_id < 0)
bool *foundPtr)
{
BufferTag newTag; /* identity of requested block */
- uint32 newHash; /* hash value for newTag */
- LWLock *newPartitionLock; /* buffer partition lock for it */
BufferTag oldTag; /* previous identity of selected buffer */
- uint32 oldHash; /* hash value for oldTag */
- LWLock *oldPartitionLock; /* buffer partition lock for it */
BufFlags oldFlags;
int buf_id;
volatile BufferDesc *buf;
/* create a tag so we can lookup the buffer */
INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum);
- /* determine its hash code and partition lock ID */
- newHash = BufTableHashCode(&newTag);
- newPartitionLock = BufMappingPartitionLock(newHash);
-
/* see if the block is in the buffer pool already */
- LWLockAcquire(newPartitionLock, LW_SHARED);
- buf_id = BufTableLookup(&newTag, newHash);
+start:
+ buf_id = BufTableLookup(&newTag);
if (buf_id >= 0)
{
+ BufferDesc *foundbuf;
+
/*
* Found it. Now, pin the buffer so no one can steal it from the
- * buffer pool, and check to see if the correct data has been loaded
- * into the buffer.
+ * buffer pool.
*/
- buf = &BufferDescriptors[buf_id];
+ foundbuf = &BufferDescriptors[buf_id];
- valid = PinBuffer(buf, strategy);
+ valid = PinBuffer(foundbuf, strategy);
- /* Can release the mapping lock as soon as we've pinned it */
- LWLockRelease(newPartitionLock);
+ /* Check whether someone recycled the buffer before we pinned it. */
+ if (!BUFFERTAGS_EQUAL(newTag, foundbuf->tag))
+ {
+ UnpinBuffer(foundbuf, true);
+ goto start;
+ }
*foundPtr = TRUE;
+ /* Check to see if the correct data has been loaded into the buffer. */
if (!valid)
{
/*
* own read attempt if the page is still not BM_VALID.
* StartBufferIO does it all.
*/
- if (StartBufferIO(buf, true))
+ if (StartBufferIO(foundbuf, true))
{
/*
* If we get here, previous attempts to read the buffer must
}
}
- return buf;
+ return foundbuf;
}
- /*
- * Didn't find it in the buffer pool. We'll have to initialize a new
- * buffer. Remember to unlock the mapping lock while doing the work.
- */
- LWLockRelease(newPartitionLock);
-
/* Loop here in case we have to try another victim buffer */
for (;;)
{
*/
if (oldFlags & BM_TAG_VALID)
{
- /*
- * Need to compute the old tag's hashcode and partition lock ID.
- * XXX is it worth storing the hashcode in BufferDesc so we need
- * not recompute it here? Probably not.
- */
+ /* Save old tag. */
oldTag = buf->tag;
- oldHash = BufTableHashCode(&oldTag);
- oldPartitionLock = BufMappingPartitionLock(oldHash);
-
- /*
- * Must lock the lower-numbered partition first to avoid
- * deadlocks.
- */
- if (oldPartitionLock < newPartitionLock)
- {
- LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
- LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
- }
- else if (oldPartitionLock > newPartitionLock)
- {
- LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
- LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
- }
- else
- {
- /* only one partition, only one lock */
- LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
- }
- }
- else
- {
- /* if it wasn't valid, we need only the new partition */
- LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);
- /* these just keep the compiler quiet about uninit variables */
- oldHash = 0;
- oldPartitionLock = 0;
}
/*
* Note that we have not yet removed the hashtable entry for the old
* tag.
*/
- buf_id = BufTableInsert(&newTag, newHash, buf->buf_id);
+enter:
+ buf_id = BufTableInsert(&newTag, buf->buf_id);
if (buf_id >= 0)
{
+ BufferDesc *foundbuf;
+
/*
- * Got a collision. Someone has already done what we were about to
- * do. We'll just handle this as if it were found in the buffer
- * pool in the first place. First, give up the buffer we were
- * planning to use.
+ * We've got a collision, apparently: it looks like someone else
+ * did what we were about to do. We can handle this as if we had
+ * found the buffer in the pool in the first place, but we must
+ * recheck the buffer tag after pinning it, because it could still
+ * get renamed under us.
+ */
+ foundbuf = &BufferDescriptors[buf_id];
+ valid = PinBuffer(foundbuf, strategy);
+ if (memcmp(&newTag, &foundbuf->tag, sizeof(BufferTag)) == 0)
+ {
+ UnpinBuffer(foundbuf, true);
+ goto enter;
+ }
+
+ /*
+ * Collision confirmed. Give up the buffer we were planning to
+ * use.
*/
UnpinBuffer(buf, true);
- /* Can give up that buffer's mapping partition lock now */
- if ((oldFlags & BM_TAG_VALID) &&
- oldPartitionLock != newPartitionLock)
- LWLockRelease(oldPartitionLock);
-
- /* remaining code should match code at top of routine */
-
- buf = &BufferDescriptors[buf_id];
-
- valid = PinBuffer(buf, strategy);
-
- /* Can release the mapping lock as soon as we've pinned it */
- LWLockRelease(newPartitionLock);
-
*foundPtr = TRUE;
if (!valid)
* then set up our own read attempt if the page is still not
* BM_VALID. StartBufferIO does it all.
*/
- if (StartBufferIO(buf, true))
+ if (StartBufferIO(foundbuf, true))
{
/*
* If we get here, previous attempts to read the buffer
}
}
- return buf;
+ return foundbuf;
}
/*
break;
UnlockBufHdr(buf);
- BufTableDelete(&newTag, newHash);
- if ((oldFlags & BM_TAG_VALID) &&
- oldPartitionLock != newPartitionLock)
- LWLockRelease(oldPartitionLock);
- LWLockRelease(newPartitionLock);
+ BufTableDelete(&newTag);
UnpinBuffer(buf, true);
}
UnlockBufHdr(buf);
if (oldFlags & BM_TAG_VALID)
- {
- BufTableDelete(&oldTag, oldHash);
- if (oldPartitionLock != newPartitionLock)
- LWLockRelease(oldPartitionLock);
- }
-
- LWLockRelease(newPartitionLock);
+ BufTableDelete(&oldTag);
/*
* Buffer contents are currently invalid. Try to get the io_in_progress
InvalidateBuffer(volatile BufferDesc *buf)
{
BufferTag oldTag;
- uint32 oldHash; /* hash value for oldTag */
- LWLock *oldPartitionLock; /* buffer partition lock for it */
BufFlags oldFlags;
/* Save the original buffer tag before dropping the spinlock */
oldTag = buf->tag;
- UnlockBufHdr(buf);
-
- /*
- * Need to compute the old tag's hashcode and partition lock ID. XXX is it
- * worth storing the hashcode in BufferDesc so we need not recompute it
- * here? Probably not.
- */
- oldHash = BufTableHashCode(&oldTag);
- oldPartitionLock = BufMappingPartitionLock(oldHash);
-
-retry:
-
- /*
- * Acquire exclusive mapping lock in preparation for changing the buffer's
- * association.
- */
- LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);
-
- /* Re-lock the buffer header */
- LockBufHdr(buf);
-
- /* If it's changed while we were waiting for lock, do nothing */
- if (!BUFFERTAGS_EQUAL(buf->tag, oldTag))
- {
- UnlockBufHdr(buf);
- LWLockRelease(oldPartitionLock);
- return;
- }
-
/*
* We assume the only reason for it to be pinned is that someone else is
* flushing the page out. Wait for them to finish. (This could be an
* yet done StartBufferIO, WaitIO will fall through and we'll effectively
* be busy-looping here.)
*/
- if (buf->refcount != 0)
+ while (buf->refcount != 0)
{
UnlockBufHdr(buf);
- LWLockRelease(oldPartitionLock);
/* safety check: should definitely not be our *own* pin */
if (GetPrivateRefCount(buf->buf_id) > 0)
elog(ERROR, "buffer is pinned in InvalidateBuffer");
WaitIO(buf);
- goto retry;
+ LockBufHdr(buf);
+
+ /* If it's changed while we were waiting for lock, do nothing */
+ if (!BUFFERTAGS_EQUAL(buf->tag, oldTag))
+ {
+ UnlockBufHdr(buf);
+ return;
+ }
}
/*
* Remove the buffer from the lookup hashtable, if it was in there.
*/
if (oldFlags & BM_TAG_VALID)
- BufTableDelete(&oldTag, oldHash);
-
- /*
- * Done with mapping lock.
- */
- LWLockRelease(oldPartitionLock);
+ BufTableDelete(&oldTag);
/*
* Insert the buffer at the head of the list of free buffers.