From: Robert Haas Date: Tue, 24 Jul 2012 16:30:41 +0000 (-0400) Subject: An attempt at making insertion work. X-Git-Url: http://waps.l3s.uni-hannover.de/gitweb/?a=commitdiff_plain;h=8cd0f78fd5919d677fda3ad33f61e25ca048bfa0;p=users%2Frhaas%2Fpostgres.git An attempt at making insertion work. --- diff --git a/src/backend/utils/hash/chash.c b/src/backend/utils/hash/chash.c index cb370dfcae..8caf516f9b 100644 --- a/src/backend/utils/hash/chash.c +++ b/src/backend/utils/hash/chash.c @@ -121,8 +121,12 @@ typedef struct */ typedef struct { - CHashPtr next; /* arena offset of next element */ - uint32 hashcode; /* hash(key) */ + CHashPtr next; /* arena offset of next element */ + union + { + uint32 hashcode; /* hash(key) */ + CHashPtr gcnext; /* arena offset of next garbage item */ + } un; } CHashNode; #define SizeOfCHashNode MAXALIGN(sizeof(CHashNode)) @@ -176,6 +180,11 @@ typedef struct CHashTableData MyProc->chash_bucket = 0; \ } while (0) +/* Function prototypes. */ +static CHashPtr CHashAllocate(CHashTable table); +static bool CHashRemoveMarked(CHashTable table, uint32 bucket, + CHashPtr *cp, volatile CHashPtr *p); + /* * First stage of CHashTable initialization. We fill in all the constants * here, but not the pointers. @@ -335,7 +344,7 @@ CHashInitialize(CHashTable table, CHashDescriptor *desc) CHashBucket *f = &table->freelist[i % table->nfreelists]; CHashNode *n = CHashTableGetNode(table, i); - n->next = f->head; + n->un.gcnext = f->head; f->head = i; } @@ -361,7 +370,7 @@ CHashSearch(CHashTable table, void *entry) uint32 hashcode = hash_any(entry, table->desc.key_size); uint32 bucket = hashcode & table->bucket_mask; CHashPtr c; - CHashNode *n; + volatile CHashNode *n; bool found = false; /* Suppress garbage collection for target bucket. */ @@ -377,18 +386,12 @@ CHashSearch(CHashTable table, void *entry) if (c == InvalidCHashPtr) break; - /* - * A dependency barrier is needed after reading a pointer value and - * before dereferencing it. c is, in effect, a pointer which we're - * about to deference. - */ - pg_read_barrier_depends(); - /* Compare current node by hashcode, then by memcmp. */ n = CHashTableGetNode(table, CHashPtrGetOffset(c)); - if (n->hashcode == hashcode) + pg_read_barrier_depends(); + if (n->un.hashcode == hashcode) cmp = memcmp(CHashNodeGetItem(n), entry, table->desc.key_size); - else if (n->hashcode > hashcode) + else if (n->un.hashcode > hashcode) cmp = 1; else cmp = -1; @@ -424,9 +427,6 @@ CHashSearch(CHashTable table, void *entry) } } - /* Done scanning bucket. */ - CHashTableUnsuppressGC(); - /* Return result to caller. */ return found; } @@ -441,6 +441,109 @@ CHashSearch(CHashTable table, void *entry) bool CHashInsert(CHashTable table, void *entry) { + uint32 hashcode = hash_any(entry, table->desc.key_size); + uint32 bucket = hashcode & table->bucket_mask; + CHashPtr new; + CHashPtr c; + volatile CHashPtr *p; + volatile CHashNode *n; + volatile CHashNode *nnew; + bool found = false; + + /* + * Allocate and initialize a new entry, on the assumption that the insert + * will succeed. If it ends up failing, we must be sure to put this back + * on some free list, lest it be permanently leaked. + */ + new = CHashAllocate(table); + nnew = CHashTableGetNode(table, new); + nnew->un.hashcode = hashcode; + memcpy(CHashNodeGetItem(nnew), entry, table->desc.element_size); + + /* Suppress garbage collection for target bucket. */ + CHashTableSuppressGC(table, bucket); + + /* Scan bucket. */ +retry: + p = &table->bucket[bucket].head; + c = *p; + for (;;) + { + int cmp; + + /* + * We can't safely update a delete-marked pointer, so remove any + * such pointers we find from the bucket chain. Sometimes, concurrent + * activity may force us to restart the whole scan, but that should + * be rare. + */ + if (CHashPtrIsMarked(c) && CHashRemoveMarked(table, bucket, &c, p)) + goto retry; + + /* If we reach the end of the bucket chain, stop. */ + if (c == InvalidCHashPtr) + break; + + /* Compare current node by hashcode, then by memcmp. */ + n = CHashTableGetNode(table, CHashPtrGetOffset(c)); + pg_read_barrier_depends(); + if (n->un.hashcode == hashcode) + cmp = memcmp(CHashNodeGetItem(n), entry, table->desc.key_size); + else if (n->un.hashcode > hashcode) + cmp = 1; + else + cmp = -1; + + /* If we found the key, or passed where it should be, we're done. */ + if (cmp >= 0) + { + found = (cmp == 0); + break; + } + + /* Move to next node. */ + p = &n->next; + c = *p; + } + + if (!found) + { + bool fail = false; + + /* If we didn't find the key, try to insert. */ + nnew->next = c; + SpinLockAcquire(&table->bucket[bucket].mutex); + if (*p == c) + *p = new; + else + fail = true; + SpinLockRelease(&table->bucket[bucket].mutex); + + /* + * If we failed, it means that somebody concurrently inserted or + * deleted an element. The correct insertion point might have changed, + * or the key we're trying to insert might now be present when it + * wasn't before, so we'll have to search the bucket chain anew. + */ + if (fail) + goto retry; + } + else + { + /* + * If we did find the key, return the corresponding value to the + * caller. + */ + memcpy(((char *) entry) + table->desc.key_size, + CHashNodeGetItem(n) + table->desc.key_size, + table->desc.element_size - table->desc.key_size); + } + + /* Done scanning bucket. */ + CHashTableUnsuppressGC(); + + /* The insert succeeded if and only if no duplicate was found. */ + return !found; } /* @@ -503,6 +606,7 @@ CHashAllocate(CHashTable table) if (vtable->garbage[table->gc_next].head != InvalidCHashPtr) { volatile CHashBucket *b = &vtable->freelist[f_current]; + volatile CHashNode *n; uint32 i; uint64 chash_bucket; @@ -515,6 +619,10 @@ CHashAllocate(CHashTable table) /* Anything to recycle? */ if (garbage != InvalidCHashPtr) { + CHashPtr fhead; + CHashPtr fcurrent; + CHashPtr fnext; + /* * Be certain that the writes associated with popping the * garbage list are complete before we start checking whether @@ -545,8 +653,101 @@ CHashAllocate(CHashTable table) */ pg_memory_barrier(); - /* XXX. Recycle garbage here! */ + /* Remove one item from list to satisfy current allocation. */ + new = garbage; + n = CHashTableGetNode(table, CHashPtrGetOffset(new)); + fhead = n->un.gcnext; + + /* If that's all there was, we're done. */ + if (fhead == InvalidCHashPtr) + return new; + + /* Walk list of reclaimed elements to end. */ + fcurrent = fhead; + for (;;) + { + n = CHashTableGetNode(table, CHashPtrGetOffset(fcurrent)); + fnext = n->un.gcnext; + if (fnext == InvalidCHashPtr) + break; + fcurrent = fnext; + } + + /* Push reclaimed elements onto home free list. */ + b = &vtable->freelist[f_current]; + SpinLockAcquire(&b->mutex); + n->next = b->head; + b->head = fhead; + SpinLockRelease(&b->mutex); + + /* Return the element we saved for ourselves. */ + return new; } } } } + +/* + * Attempt to remove marked elements from a bucket chain. + * + * p is a pointer into shared memory; it points to the CHashPtr that must be + * updated to remove deleted elements from the chain. + * + * cp is a pointer into backend-private memory; it is the delete-marked pointer + * fetched from the node to which p points. + * + * The return value is true if the caller must retry, or false if the caller + * may continue the scan. In the latter case, *cp is updated to contain a + * pointer to the node from which the scan should be resumed. + */ +static bool +CHashRemoveMarked(CHashTable table, uint32 bucket, CHashPtr *cp, + volatile CHashPtr *p) +{ + CHashPtr c = *cp; + CHashPtr cc; + bool retry_needed = false; + + do + { + volatile CHashNode *n; + + /* + * c is logically a pointer, so we must insert a dependency barrier + * before deferencing it. + */ + pg_read_barrier_depends(); + + /* Read next-pointer of deleted node. */ + n = CHashTableGetNode(table, CHashPtrGetOffset(c)); + cc = n->next; + + /* + * Redirect next-pointer of prior node to next-pointer of deleted + * node, unless someone else has meanwhile modified the bucket + * chain. + */ + SpinLockAcquire(&table->bucket[bucket].mutex); + if (*p == c) + *p = cc; + else + retry_needed = true; + SpinLockRelease(&table->bucket[bucket].mutex); + + /* + * If we failed to update the logical pointer, caller must rescan + * the bucket. There's no intelligent way to continue the scan, + * because for all we know the node that contains the pointer we're + * try to update may itself be deleted by now. + */ + if (retry_needed) + return true; + + /* The new target of the pointer may also be delete-marked, so loop. */ + c = cc; + } while (CHashPtrIsMarked(c)); + + /* Success! */ + *cp = c; + return false; +}