uint32 nfreelists; /* # of freelists */
uint32 arena_limit; /* # of arena elements */
uint32 arena_stride; /* bytes allocated per arena element */
- CHashBucket *bucket; /* array of size nbuckets */
- CHashBucket *garbage; /* array of size ngarbage */
- CHashBucket *freelist; /* array of size nfreelists */
+ CHashPtr *bucket; /* array of size nbuckets */
+ CHashPtr *garbage; /* array of size ngarbage */
+ CHashPtr *freelist; /* array of size nfreelists */
char *arena; /* arena */
} CHashTableData;
static void CHashAddToGarbage(CHashTable table, uint32 bucket, CHashPtr c);
static void CHashImmediateFree(CHashTable table, CHashPtr c);
static bool CHashRemoveMarked(CHashTable table, uint32 bucket,
- CHashPtr *cp, volatile CHashPtr *p);
+ CHashPtr *cp, CHashPtr *p);
/*
* First stage of CHashTable initialization. We fill in all the constants
total_buckets = add_size(total_buckets, table->nfreelists);
size = MAXALIGN(sizeof(CHashTableData));
- size = add_size(size, mul_size(sizeof(CHashBucket), total_buckets));
+ size = add_size(size, mul_size(sizeof(CHashPtr), total_buckets));
size = add_size(size, mul_size(table->arena_stride, table->arena_limit));
return size;
Assert(!found);
/* Bucket, garbage, and freelist arrays follow table info. */
- table->bucket = (CHashBucket *)
+ table->bucket = (CHashPtr *)
(((char *) shmem) + MAXALIGN(sizeof(CHashTableData)));
table->garbage = &table->bucket[table->nbuckets];
table->freelist = &table->garbage[table->ngarbage];
/* Initialize all three sets of lists to empty. */
for (i = 0; i < table->nbuckets; ++i)
- table->bucket[i].head = InvalidCHashPtr;
+ table->bucket[i] = InvalidCHashPtr;
for (i = 0; i < table->ngarbage; ++i)
- table->garbage[i].head = InvalidCHashPtr;
+ table->garbage[i] = InvalidCHashPtr;
for (i = 0; i < table->nfreelists; ++i)
- table->freelist[i].head = InvalidCHashPtr;
+ table->freelist[i] = InvalidCHashPtr;
/* Put all arena elements on the free lists. */
for (i = 0; i < table->arena_limit; ++i)
{
- CHashBucket *f = &table->freelist[i % table->nfreelists];
- CHashNode *n = CHashTableGetRaw(table, i);
+ CHashPtr *f = &table->freelist[i % table->nfreelists];
+ CHashNode *n = CHashTableGetRaw(table, i);
- n->un.gcnext = f->head;
- f->head = MakeCHashPtr(i);
+ n->un.gcnext = *f;
+ *f = MakeCHashPtr(i);
}
/*
uint32 hashcode = hash_any(entry, table->desc.key_size);
uint32 bucket = hashcode & table->bucket_mask;
CHashPtr c;
- volatile CHashNode *n;
+ CHashNode *n;
bool found = false;
int cmp = 1;
CHashTableSuppressGC(table, bucket);
/* Scan bucket. */
- c = table->bucket[bucket].head;
+ c = table->bucket[bucket];
while (c != InvalidCHashPtr)
{
uint32 h;
uint32 bucket = hashcode & table->bucket_mask;
CHashPtr new;
CHashPtr c;
- volatile CHashPtr *p;
- volatile CHashNode *n;
- volatile CHashNode *nnew;
+ CHashPtr *p;
+ CHashNode *n;
+ CHashNode *nnew;
bool found = false;
/*
/* Scan bucket. */
retry:
- p = &table->bucket[bucket].head;
+ p = &table->bucket[bucket];
c = *p;
while (c != InvalidCHashPtr)
{
uint32 hashcode = hash_any(entry, table->desc.key_size);
uint32 bucket = hashcode & table->bucket_mask;
CHashPtr c;
- volatile CHashPtr *p;
- volatile CHashNode *n;
+ CHashPtr *p;
+ CHashNode *n;
bool found = false;
/* Suppress garbage collection for target bucket. */
/* Scan bucket. */
retry:
- p = &table->bucket[bucket].head;
+ p = &table->bucket[bucket];
c = *p;
while (c != InvalidCHashPtr)
{
* Allocate an arena slot for a new item to be inserted into a hash table.
*
* We don't want to wait until every single free-list is completely empty
- * before beginning to garbage collect, because that could have undesirable
- * latency characteristics, and might possibly render free-lists thoroughly
- * worthless from the point of view of contention avoidance. Instead, we
- * check free lists and garbage lists in alternation. If we find a non-empty
- * free list, we allocate from it; if we find a non-empty garbage list, we
- * garbage collect it and put the contents on our free list.
+ * before beginning to garbage collect, because that could result in very
+ * fast allocation followed by a storm of garbage collection activity.
+ * It could also lead to every inserting backend ganging up on the only
+ * non-empty freelist.
*
+ * To avoid that, we check free lists and garbage lists in alternation.
* We always start with the same free list - which one is based on our
- * backend ID - but we try to round-robin among all the garbage lists.
+ * backend ID - but we try to round-robin over all the available garbage
+ * lists. Whenever we successfully garbage collect, we put the recovered
+ * items on our own free list. In this way, if there's only one backend
+ * active, it will typically find a free buffer in the first place it looks:
+ * its own free list. It will also settle into a pattern of garbage
+ * collecting the garbage list which it has visited least recently, which
+ * is what we want.
*/
static CHashPtr
CHashAllocate(CHashTable table)
/* Loop until we allocate a buffer. */
for (;;)
{
- volatile CHashBucket *b;
+ CHashPtr *b;
- /*
- * Attempt to pop a buffer from a freelist using compare-and-swap.
- */
+ /* Try to pop a buffer from a freelist using compare-and-swap. */
b = &table->freelist[f_current];
- new = b->head;
+ new = *b;
if (new != InvalidCHashPtr)
{
- volatile CHashNode *n;
+ CHashNode *n = CHashTableGetNode(table, new);
- n = CHashTableGetNode(table, new);
+ /*
+ * n is computed from table->freelist[f_current], which could
+ * be modified by concurrent activity, so we need a dependency
+ * barrier here.
+ */
pg_read_barrier_depends();
- if (__sync_bool_compare_and_swap(&b->head, new, n->un.gcnext))
+ if (__sync_bool_compare_and_swap(b, new, n->un.gcnext))
return new;
}
- /*
- * Check the next garbage list for recyclable buffers. If we
- * find any, try to garbage collect them.
- */
+ /* If next garbage list is non-empty, empty it via compare-and-swap. */
table->gc_next = (table->gc_next + 1) % table->ngarbage;
b = &table->garbage[table->gc_next];
- garbage = b->head;
+ garbage = *b;
if (garbage != InvalidCHashPtr &&
- __sync_bool_compare_and_swap(&b->head, garbage, InvalidCHashPtr))
+ __sync_bool_compare_and_swap(b, garbage, InvalidCHashPtr))
{
- CHashPtr fhead;
- CHashPtr fcurrent;
- CHashPtr fnext;
- CHashPtr oldhead;
uint64 chash_bucket;
uint32 i;
- volatile CHashNode *n;
-
- /*
- * Be certain that the writes associated with popping the
- * garbage list are complete before we start checking whether
- * the garbage is recycleable.
- */
- pg_memory_barrier();
+ CHashPtr fhead;
+ CHashNode *n;
/*
- * Spin until garbage is recyclable. We could have a "soft"
- * version of this that merely requeues the garbage if it's not
- * immediately recycleable, but it's not clear that we need
- * such a thing. On the flip side we might want to eventually
- * enter a longer sleep here, or PANIC, but it's not clear
- * exactly how to calibrate that, either.
+ * Spin until garbage is recyclable. We can't begin this operation
+ * until the clearing of the garbage list has been committed to
+ * memory, but since that was done using an atomic operation no
+ * explicit barrier is needed here.
+ *
+ * Note: We could have a "soft" version of this that merely
+ * requeues the garbage if it's not immediately recycleable, but
+ * it's not clear that we need such a thing. On the flip side we
+ * might want to eventually enter a longer sleep here, or PANIC,
+ * but it's not clear exactly how to calibrate that.
*/
chash_bucket = ((uint64) table->desc.id)<<32 | table->gc_next;
for (i = 0; i < ProcGlobal->allProcCount; i++)
;
}
- /*
- * Be certain that all prior reads are done before starting
- * the next batch of writes.
- */
- pg_memory_barrier();
-
/* Remove one item from list to satisfy current allocation. */
new = garbage;
n = CHashTableGetNode(table, new);
fhead = n->un.gcnext;
- /* If that's all there was, we're done. */
- if (fhead == InvalidCHashPtr)
- return new;
-
- /* Walk list of reclaimed elements to end. */
- fcurrent = fhead;
- for (;;)
+ /* Put any remaining elements back on the free list. */
+ if (fhead != InvalidCHashPtr)
{
- n = CHashTableGetNode(table, fcurrent);
- fnext = n->un.gcnext;
- if (fnext == InvalidCHashPtr)
- break;
- fcurrent = fnext;
+ CHashPtr fcurrent;
+ CHashPtr fnext;
+ CHashPtr oldhead;
+
+ /* Walk list of reclaimed elements to end. */
+ fcurrent = fhead;
+ for (;;)
+ {
+ n = CHashTableGetNode(table, fcurrent);
+ fnext = n->un.gcnext;
+ if (fnext == InvalidCHashPtr)
+ break;
+ fcurrent = fnext;
+ }
+
+ /* Push reclaimed elements onto home free list. */
+ b = &table->freelist[f_home];
+ do
+ {
+ oldhead = *b;
+ n->un.gcnext = oldhead;
+ } while (__sync_bool_compare_and_swap(b, oldhead, fhead));
}
- /* Push reclaimed elements onto home free list. */
- b = &table->freelist[f_home];
- do
- {
- oldhead = b->head;
- n->un.gcnext = oldhead;
- } while (__sync_bool_compare_and_swap(&b->head, oldhead, fhead));
-
/* Return the element we saved for ourselves. */
return new;
}
{
uint32 garbage_bucket;
CHashPtr g;
- volatile CHashNode *n;
- volatile CHashBucket *garbage;
+ CHashNode *n;
+ CHashPtr *garbage;
garbage_bucket = bucket >> table->garbage_shift;
n = CHashTableGetNode(table, c);
do
{
- g = garbage->head;
+ g = *garbage;
n->un.gcnext = g;
- } while (!__sync_bool_compare_and_swap(&garbage->head, g, c));
+ } while (!__sync_bool_compare_and_swap(garbage, g, c));
}
/*
static void
CHashImmediateFree(CHashTable table, CHashPtr c)
{
- volatile CHashNode *n;
- volatile CHashBucket *free;
+ CHashNode *n;
+ CHashPtr *free;
uint32 f_home;
CHashPtr f;
do
{
- f = free->head;
+ f = *free;
n->un.gcnext = f;
- } while (!__sync_bool_compare_and_swap(&free->head, f, c));
+ } while (!__sync_bool_compare_and_swap(free, f, c));
}
/*
*/
static bool
CHashRemoveMarked(CHashTable table, uint32 bucket, CHashPtr *cp,
- volatile CHashPtr *p)
+ CHashPtr *p)
{
CHashPtr c = *cp;
CHashPtr cc;
do
{
- volatile CHashNode *n;
+ CHashNode *n;
/*
* c is logically a pointer, so we must insert a dependency barrier