From 02fa9b1ce189a3159d15f262d66b6e4d2c8f2695 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Sat, 14 Jul 2012 01:44:22 -0400 Subject: [PATCH] Start of work on GC. --- src/backend/utils/hash/chash.c | 164 +++++++++++++++++++++++++++++---- 1 file changed, 148 insertions(+), 16 deletions(-) diff --git a/src/backend/utils/hash/chash.c b/src/backend/utils/hash/chash.c index 50fdafca43..0bcd576cd5 100644 --- a/src/backend/utils/hash/chash.c +++ b/src/backend/utils/hash/chash.c @@ -142,6 +142,8 @@ typedef struct CHashTableData uint32 bucket_mask; /* # of buckets, minus one */ uint32 garbage_shift; /* log2(nbuckets/ngarbage) */ uint32 ngarbage; /* # of garbage lists, a power of two */ + uint32 gc_next; /* next garbage list to reclaim */ + int gc_pid; /* PID that set gc_next */ uint32 nfreelists; /* # of freelists */ uint32 arena_limit; /* # of arena elements */ uint32 arena_stride; /* bytes allocated per arena element */ @@ -155,6 +157,25 @@ typedef struct CHashTableData (AssertMacro((offset) < (table)->arena_limit), \ (CHashNode *) ((table)->arena + (table)->arena_stride * (offset))) +/* + * We need a memory barrier to make sure that our bucket advertisement is + * committed to memory before we begin scanning the bucket, and another one + * to make sure the scan is done before we cease advertising it. + */ +#define CHashTableSuppressGC(table, bno) \ + do { \ + Assert(MyProc->chash_bucket == 0); \ + MyProc->chash_bucket = \ + ((uint64) table->desc.id)<<32 | (bucket>>table->garbage_shift); \ + pg_memory_barrier(); \ + } while (0) +#define CHashTableUnsuppressGC() \ + do { \ + Assert(MyProc->chash_bucket != 0); \ + pg_memory_barrier(); \ + MyProc->chash_bucket = 0; \ + } while (0) + /* * First stage of CHashTable initialization. We fill in all the constants * here, but not the pointers. @@ -195,6 +216,8 @@ CHashBootstrap(CHashDescriptor *desc) */ table->garbage_shift = Min(bucket_shift, 6); table->ngarbage = table->nbuckets >> table->garbage_shift; + table->gc_next = 0; + table->gc_pid = 0; /* * The number of freelists must be large enough to avoid contention; @@ -341,13 +364,8 @@ CHashSearch(CHashTable table, void *entry) CHashNode *n; bool found = false; - /* - * Suppress garbage collection for target bucket. We need a memory - * barrier to make sure that our bucket advertisement is committed to - * memory before we begin scanning the bucket. - */ - MyProc->chash_bucket = ((uint64) table->desc.id) << 32 | bucket; - pg_memory_barrier(); + /* Suppress garbage collection for target bucket. */ + CHashTableSuppressGC(table, bucket); /* Scan bucket. */ c = table->bucket[bucket].head; @@ -367,7 +385,7 @@ CHashSearch(CHashTable table, void *entry) pg_read_barrier_depends(); /* Compare current node by hashcode, then by memcmp. */ - n = CHashTableGetNode(table, c); + n = CHashTableGetNode(table, CHashPtrGetOffset(c)); if (n->hashcode == hashcode) cmp = memcmp(CHashNodeGetItem(n), entry, table->desc.key_size); else if (n->hashcode > hashcode) @@ -406,15 +424,129 @@ CHashSearch(CHashTable table, void *entry) } } - /* - * Once we're done scanning the bucket, we can permit garbage collection - * of that bucket to resume. The memory barrier ensures that all the reads - * that are part of the bucket scan finish before we allow garbage - * collection. - */ - pg_memory_barrier(); - MyProc->chash_bucket = 0; + /* Done scanning bucket. */ + CHashTableUnsuppressGC(); /* Return result to caller. */ return found; } + +/* + * Insert into a concurrent hash table. entry should be the complete entry + * to be inserted. If no duplicate entry is found in the table, this function + * will insert the entry and return true. Otherwise, the duplicate entry's + * value will be copied into the supplied entry and the function will return + * false. + */ +bool +CHashInsert(CHashTable table, void *entry) +{ +} + +/* + * Allocate an arena slot for a new item to be inserted into a hash table. + * + * We don't want to wait until every single free-list is completely empty + * before beginning to garbage collect, because that could have undesirable + * latency characteristics, and might possibly render free-lists thoroughly + * worthless from the point of view of contention avoidance. Instead, we + * check free lists and garbage lists in alternation. If we find a non-empty + * free list, we allocate from it; if we find a non-empty garbage list, we + * garbage collect it and put the contents on our free list. + * + * We always start with the same free list - which one is based on our + * backend ID - but we try to round-robin among all the garbage lists. + */ +static CHashPtr +CHashAllocate(CHashTable table) +{ + uint32 f_home = ((uint32) MyBackendId) % table->nfreelists; + uint32 f_current = f_home; + CHashPtr new; + CHashPtr garbage; + volatile CHashTable vtable = table; + + /* If this process hasn't initialized gc_next yet, do that now. */ + if (table->gc_pid != MyProcPid) + { + table->gc_pid = MyProcPid; + table->gc_next = ((uint32) MyProcPid) % table->ngarbage; + } + + /* Loop until we allocate a buffer. */ + for (;;) + { + /* + * Check one freelist for an available arena slot. To minimize + * spinlock traffic, we do an unlocked test first. We must recheck + * after acquiring the spinlock. + */ + if (vtable->freelist[f_current].head != InvalidCHashPtr) + { + SpinLockAcquire(&vtable->freelist[f_current].mutex); + new = vtable->freelist[f_current].head; + if (new != InvalidCHashPtr) + { + CHashNode *n = CHashTableGetNode(table, new); + vtable->freelist[f_current].head = n->next; + SpinLockRelease(&vtable->freelist[f_current].mutex); + return new; + } + SpinLockRelease(&vtable->freelist[f_current].mutex); + } + + /* + * Check the next garbage list for recyclable buffers. If we + * find any, try to garbage collect them. + */ + table->gc_next = (table->gc_next + 1) % table->ngarbage; + if (vtable->garbage[table->gc_next].head != InvalidCHashPtr) + { + volatile CHashBucket *b = &vtable->freelist[f_current]; + uint32 i; + uint64 chash_bucket; + + /* Pop garbage off list. */ + SpinLockAcquire(&b->mutex); + garbage = b->head; + b->head = InvalidCHashPtr; + SpinLockRelease(&b->mutex); + + /* Anything to recycle? */ + if (garbage != InvalidCHashPtr) + { + /* + * Be certain that the writes associated with popping the + * garbage list are complete before we start checking whether + * the garbage is recycleable. + */ + pg_memory_barrier(); + + /* + * Spin until garbage is recyclable. We could have a "soft" + * version of this that merely requeues the garbage if it's not + * immediately recycleable, but it's not clear that we need + * such a thing. On the flip side we might want to eventually + * enter a longer sleep here, or PANIC, but it's not clear + * exactly how to calibrate that, either. + */ + chash_bucket = ((uint64) table->desc.id)<<32 | table->gc_next; + for (i = 0; i < ProcGlobal->allProcCount; i++) + { + PGPROC *proc = &ProcGlobal->allProcs[i]; + + while (proc->chash_bucket == chash_bucket) + ; + } + + /* + * Be certain that all prior reads are done before starting + * the next batch of writes. + */ + pg_memory_barrier(); + + /* XXX. Recycle garbage here! */ + } + } + } +} -- 2.39.5