(AssertMacro((ptr) != InvalidCHashPtr), \
CHashTableGetRaw((table), CHashPtrGetOffset((ptr))))
+/*
+ * Bucket scan.
+ */
+typedef struct
+{
+ CHashPtr target;
+ CHashPtr *pointer_to_target;
+ CHashNode *target_node;
+ bool found;
+} CHashScanResult;
+
/*
* We need a memory barrier to make sure that our bucket advertisement is
* committed to memory before we begin scanning the bucket, and another one
static CHashPtr CHashAllocate(CHashTable table);
static void CHashAddToGarbage(CHashTable table, uint32 bucket, CHashPtr c);
static void CHashImmediateFree(CHashTable table, CHashPtr c);
-static bool CHashRemoveMarked(CHashTable table, uint32 bucket,
- CHashPtr *cp, CHashPtr *p);
+static void CHashBucketScan(CHashTable table,
+ CHashPtr *start,
+ uint32 hashcode,
+ const void *key,
+ CHashScanResult *res);
/*
* First stage of CHashTable initialization. We fill in all the constants
{
uint32 hashcode = hash_any(entry, table->desc.key_size);
uint32 bucket = hashcode & table->bucket_mask;
- CHashPtr c;
- CHashNode *n;
- bool found = false;
- int cmp = 1;
+ CHashScanResult scan;
- /* Suppress garbage collection for target bucket. */
+ /* Prevent garbage collection for this bucket. */
CHashTableSuppressGC(table, bucket);
- /* Scan bucket. */
- c = table->bucket[bucket];
- while (c != InvalidCHashPtr)
- {
- uint32 h;
-
- /* Compare current node by hashcode, then by memcmp. */
- n = CHashTableGetNode(table, c);
- pg_read_barrier_depends();
- h = n->un.hashcode;
- if (h == hashcode)
- cmp = memcmp(CHashNodeGetItem(n), entry, table->desc.key_size);
- else if (h > hashcode)
- cmp = 1;
- else
- cmp = -1;
-
- /* Stop if we've passed the list position where the entry should be. */
- if (cmp >= 0)
- break;
-
- /* Fetch next-pointer. */
- c = n->next;
- }
-
- /*
- * Is it the item we're looking for?
- *
- * If the pointer is marked, it will be followed (if at all) by
- * a node which is "later" than this one in terms of either
- * hashcode or memcmp ordering; we won't find a duplicate of the
- * current key. This is because a marked CHashPtr is never
- * further updated (or at least, not until after garbage
- * collection, which we've already prevented for this bucket),
- * so the successor element must have been present before this
- * one was deleted.
- */
- if (cmp == 0 && !CHashPtrIsMarked(n->next))
- {
+ /* Scan bucket and return data from any matching entry. */
+ CHashBucketScan(table, &table->bucket[bucket], hashcode, entry, &scan);
+ if (scan.found)
memcpy(((char *) entry) + table->desc.key_size,
- CHashNodeGetItem(n) + table->desc.key_size,
+ CHashNodeGetItem(scan.target_node) + table->desc.key_size,
table->desc.element_size - table->desc.key_size);
- found = true;
- }
- /* We're done. */
+ /* Allow garbage collection for this bucket. */
CHashTableUnsuppressGC();
- return found;
+
+ return scan.found;
}
/*
uint32 hashcode = hash_any(entry, table->desc.key_size);
uint32 bucket = hashcode & table->bucket_mask;
CHashPtr new;
- CHashPtr c;
- CHashPtr *p;
- CHashNode *n;
CHashNode *nnew;
- bool found = false;
+ CHashScanResult scan;
/*
* Allocate and initialize a new entry, on the assumption that the insert
nnew->un.hashcode = hashcode;
memcpy(CHashNodeGetItem(nnew), entry, table->desc.element_size);
- /* Suppress garbage collection for target bucket. */
+ /* Prevent garbage collection for this bucket. */
CHashTableSuppressGC(table, bucket);
- /* Scan bucket. */
+ /*
+ * Scan the bucket. If we don't find a match, use compare-and-swap to
+ * insert the new node at the insert position. If we do find a match,
+ * return the data to the caller.
+ */
retry:
- p = &table->bucket[bucket];
- c = *p;
- while (c != InvalidCHashPtr)
- {
- int cmp;
- uint32 h;
-
- /* Compare current node by hashcode, then by memcmp. */
- n = CHashTableGetNode(table, c);
- pg_read_barrier_depends();
- h = n->un.hashcode;
- if (h == hashcode)
- cmp = memcmp(CHashNodeGetItem(n), entry, table->desc.key_size);
- else if (h > hashcode)
- cmp = 1;
- else
- cmp = -1;
-
- /* If we found the key, or passed where it should be, we're done. */
- if (cmp >= 0)
- {
- found = (cmp == 0);
- break;
- }
-
- /* Move to next node. */
- p = &n->next;
- c = *p;
-
- /*
- * We can't safely update a delete-marked pointer, so remove any
- * such pointers we find from the bucket chain. Sometimes, concurrent
- * activity may force us to restart the whole scan, but that should
- * be rare.
- */
- if (CHashPtrIsMarked(c) && CHashRemoveMarked(table, bucket, &c, p))
- goto retry;
- }
-
- if (!found)
- {
- /*
- * If we fail, it means that somebody concurrently inserted or
- * deleted an element. The correct insertion point might have changed,
- * or the key we're trying to insert might now be present when it
- * wasn't before, so we'll have to search the bucket chain anew.
- */
- nnew->next = c;
- if (!__sync_bool_compare_and_swap(p, c, new))
- goto retry;
- }
+ CHashBucketScan(table, &table->bucket[bucket], hashcode, entry, &scan);
+ if (scan.found)
+ memcpy(((char *) entry) + table->desc.key_size,
+ CHashNodeGetItem(scan.target_node) + table->desc.key_size,
+ table->desc.element_size - table->desc.key_size);
else
{
/*
- * If we did find the key, return the corresponding value to the
- * caller.
+ * We didn't find a matching element, so use compare-and-swap to
+ * attempt to insert the new element we've prepared. If this fails,
+ * someone currently inserted or deleted an element. The correct
+ * insertion point might have changed, or the key we're trying to
+ * insert might now be present when it wasn't before, so we'll have
+ * to search the bucket chain anew.
+ *
+ * There is a risk of starvation here, but we hope it will not happen
+ * in practice. Contention for inserting new elements should be
+ * spread out pretty much evenly across N+M possible insertion points,
+ * where N is the number of buckets and M is the number of elements
+ * in the table. Even for a quite modestly size table this is likely
+ * to exceed the number of CPU cores.
*/
- memcpy(((char *) entry) + table->desc.key_size,
- CHashNodeGetItem(n) + table->desc.key_size,
- table->desc.element_size - table->desc.key_size);
+ nnew->next = scan.target;
+ if (!__sync_bool_compare_and_swap(scan.pointer_to_target,
+ scan.target, new))
+ goto retry;
}
- /* Done scanning bucket. */
+ /* Allow garbage collection for this bucket. */
CHashTableUnsuppressGC();
/* If the insert failed, free the entry we allocated. */
- if (found)
+ if (scan.found)
CHashImmediateFree(table, new);
/* The insert succeeded if and only if no duplicate was found. */
- return !found;
+ return !scan.found;
}
/*
{
uint32 hashcode = hash_any(entry, table->desc.key_size);
uint32 bucket = hashcode & table->bucket_mask;
- CHashPtr c;
- CHashPtr *p;
- CHashNode *n;
- bool found = false;
+ bool cleanup_scan = false;
+ CHashScanResult scan;
- /* Suppress garbage collection for target bucket. */
+ /* Prevent garbage collection for this bucket. */
CHashTableSuppressGC(table, bucket);
/* Scan bucket. */
-retry:
- p = &table->bucket[bucket];
- c = *p;
- while (c != InvalidCHashPtr)
- {
- int cmp;
- uint32 h;
+ CHashBucketScan(table, &table->bucket[bucket], hashcode, entry, &scan);
- /* Compare current node by hashcode, then by memcmp. */
- n = CHashTableGetNode(table, c);
- pg_read_barrier_depends();
- h = n->un.hashcode;
- if (h == hashcode)
- cmp = memcmp(CHashNodeGetItem(n), entry, table->desc.key_size);
- else if (h > hashcode)
- cmp = 1;
- else
- cmp = -1;
+ /*
+ * If we found a match, then loop until we succesfully delete-mark the
+ * target, or until someone else does so. There's no real risk of
+ * starvation here; the maximum number of times we can iterate here is
+ * equal to the number of backends other than this one, and we'd have
+ * to be very unlucky to have things turn out even that badly.
+ */
+ while (scan.found)
+ {
+ CHashPtr next = scan.target_node->next;
- /* If we found the key, or passed where it should be, we're done. */
- if (cmp >= 0)
+ if (CHashPtrIsMarked(scan.target))
+ {
+ /* Someone else deleted it before we did. */
+ scan.found = false;
+ }
+ else if (__sync_bool_compare_and_swap(&scan.target_node,
+ next,
+ CHashPtrMark(next)))
{
- found = (cmp == 0);
+ /* Deletion is done; attempt to remove node from list. */
+ if (__sync_bool_compare_and_swap(scan.pointer_to_target,
+ scan.target,
+ next))
+ CHashAddToGarbage(table, bucket, scan.target);
+ else
+ cleanup_scan = true;
break;
}
+ }
+
+ if (cleanup_scan)
+ {
+ /* XXX */
+ }
- /* Move to next node. */
- p = &n->next;
- c = *p;
+ /* Allow garbage collection for this bucket. */
+ CHashTableUnsuppressGC();
+
+ /* We're done. */
+ CHashTableUnsuppressGC();
+ return scan.found;
+}
+
+/*
+ * Scan one bucket of a concurrent hash table, storing the results in a
+ * CHashResult object provided by the caller.
+ *
+ * Caller must suppress garbage collection for the target bucket before
+ * calling this function, and resume it afterwards.
+ *
+ * If a matching item is found, res->target will be its arena offset and
+ * res->found will be true. If not, res->target will be the arena offset of
+ * the item following the insert position and found will be false. In either
+ * case, res->pointer_to_target will a pointer to the address where the value
+ * of target was found. res->target_node will be a pointer to the address of
+ * the CHashNode with offset res->target, unless res->target is invalid.
+ */
+static void
+CHashBucketScan(CHashTable table,
+ CHashPtr *start,
+ uint32 hashcode,
+ const void *key,
+ CHashScanResult *res)
+{
+ CHashPtr target;
+ CHashPtr *pointer_to_target;
+ CHashNode *target_node = NULL;
+
+retry:
+ pointer_to_target = start;
+ target = *pointer_to_target;
+ for (;;)
+ {
+ CHashPtr next;
+ uint32 h;
+ int cmp;
/*
- * We can't safely update a delete-marked pointer, so remove any
- * such pointers we find from the bucket chain. Sometimes, concurrent
- * activity may force us to restart the whole scan, but that should
- * be rare.
+ * Read arena offset of target. If we've reached the end of the
+ * bucket chain, stop; otherwise, figure out the actual address of
+ * the next item.
*/
- if (CHashPtrIsMarked(c) && CHashRemoveMarked(table, bucket, &c, p))
- goto retry;
- }
+ if (target == InvalidCHashPtr)
+ {
+ res->found = false;
+ break;
+ }
+ target_node = CHashTableGetNode(table, target);
- if (found)
- {
- CHashPtr cc;
+ /*
+ * target may have been fetched from an arena entry that could be
+ * concurrently modified, so a dependency barrier is required before
+ * dereferencing the derived pointer.
+ */
+ pg_read_barrier_depends();
+ next = target_node->next;
/*
- * Really do the deletion. Since we've held no lock up to this
- * point, it may well be that someone else has deleted the item out
- * from under us, so we recheck that after taking the lock.
+ * For simplicity, any bucket scan, even if it's servicing a search,
+ * will attempt to remove delete-marked items from the bucket. This
+ * ensures that delete-marked elements are removed from bucket chains
+ * as quickly as possible and reduces code duplication. See
+ * CHashDelete for further comments about why delete-marking is
+ * necessary and how it allows safe deletion.
*/
- do
+ if (CHashPtrIsMarked(next))
{
- cc = n->next;
- if (CHashPtrIsMarked(cc))
+zap:
+ if (__sync_bool_compare_and_swap(pointer_to_target, target, next))
{
- found = false;
- break;
+ /*
+ * No one else can possibly have modified target_node->next,
+ * because such modifications are not allowed after a
+ * delete-mark has been applied. Thus, if we just keep
+ * following the next pointers, we're guaranteed to visit
+ * all non-deleted items (and possibly some deleted items)
+ * that were present at the time we began the scan.
+ */
+ /* XXX We must put target on a garbage list now! */
+ target = next;
+ continue;
}
- } while (!__sync_bool_compare_and_swap(&n->next,
- cc, CHashPtrMark(cc)));
+ else
+ {
+ /*
+ * If the previous node has been delete-marked, we can't
+ * further update the next-pointer, so restart the scan.
+ * Otherwise, we know that some other backend removed one
+ * or more deleted nodes from the bucket chain just as we
+ * were trying to do, and we can simply continue the scan
+ * from wherever the previous node is pointing now. It's
+ * possible that some concurrent inserts have happened, too,
+ * but that's OK; we can view those as having happened "before"
+ * whatever operation this scan is supporting.
+ *
+ * Although starvation is a theoretical possibility here if
+ * we are forced to retry repeatedly, even a single retry is
+ * vanishingly unlikely in practice. It requires some other
+ * backend to delete both the node that we're looking at and
+ * the node which precedes it before we advance to the next
+ * node. That could certainly happen occasionally, but we'd
+ * have to be pretty unlucky to have it happen even twice in
+ * a row.
+ */
+ target = *pointer_to_target;
+ if (CHashPtrIsMarked(target))
+ goto retry;
+ continue;
+ }
+ }
/*
- * At this point the deletion is done. However, it's possible that
- * someone else did it before we did it, in which case we need to
- * return false rather than true. Also, we may have successfully
- * removed the deleted from the linked list, in which case we need to
- * add it to the garbage list; or that we failed to do so, in which
- * case we need to rescan the list and remove any deleted items we
- * find.
+ * Bucket chains are kept in order, so that there is exactly one legal
+ * point at which any given key can be inserted. The ordering is by
+ * hashcode first, and then by memcmp ordering of the keys involved.
*/
- Assert(!CHashPtrIsMarked(cc));
- if (__sync_bool_compare_and_swap(p, c, cc))
- CHashAddToGarbage(table, bucket, c);
+ h = target_node->un.hashcode;
+ if (h == hashcode)
+ cmp = memcmp(CHashNodeGetItem(target_node), key,
+ table->desc.key_size);
+ else if (h > hashcode)
+ cmp = 1;
else
+ cmp = -1;
+
+ /*
+ * If cmp < 0, then we haven't yet reached the point at which we'd
+ * expect to find the key, so we must continue the scan. If cmp == 0,
+ * we've found the key and can stop. If cmp > 0, we've either passed
+ * the point where we expect to find the key OR someone delete-marked
+ * the item and overwrote the hashcode with a gcnext pointer. In the
+ * latter case we must take care not to be fooled into stopping the
+ * scan early.
+ */
+ if (cmp >= 0)
{
- /* XXX Rescan bucket. */
+ if (cmp == 0)
+ {
+ res->found = true;
+ break;
+ }
+ else
+ {
+ /*
+ * pg_read_barrier() prevents the reread of the next pointer
+ * from being speculated ahead of the read of the hash value.
+ */
+ pg_read_barrier();
+ next = target_node->next;
+ if (CHashPtrIsMarked(next))
+ goto zap;
+ }
}
+
+ /* Continue scan from next node. */
+ pointer_to_target = &target_node->next;
+ target = next;
}
- /* We're done. */
- CHashTableUnsuppressGC();
- return found;
+ /* Send results back to caller. */
+ res->target = target;
+ res->pointer_to_target = pointer_to_target;
+ res->target_node = target_node;
}
/*
n->un.gcnext = f;
} while (!__sync_bool_compare_and_swap(free, f, c));
}
-
-/*
- * Attempt to remove marked elements from a bucket chain.
- *
- * p is a pointer into shared memory; it points to the CHashPtr that must be
- * updated to remove deleted elements from the chain.
- *
- * cp is a pointer into backend-private memory; it is the delete-marked pointer
- * fetched from the node to which p points.
- *
- * The return value is true if the caller must retry, or false if the caller
- * may continue the scan. In the latter case, *cp is updated to contain a
- * pointer to the node from which the scan should be resumed.
- */
-static bool
-CHashRemoveMarked(CHashTable table, uint32 bucket, CHashPtr *cp,
- CHashPtr *p)
-{
- CHashPtr c = *cp;
- CHashPtr cc;
-
- do
- {
- CHashNode *n;
-
- /*
- * c is logically a pointer, so we must insert a dependency barrier
- * before deferencing it.
- */
- pg_read_barrier_depends();
-
- /*
- * Attempt to remove the deleted node from the linked list.
- * If we fail to update the logical pointer, caller must rescan
- * the bucket. There's no intelligent way to continue the scan,
- * because for all we know the node that contains the pointer we're
- * try to update may itself be deleted by now.
- */
- n = CHashTableGetNode(table, c);
- cc = n->next;
- if (!__sync_bool_compare_and_swap(p, c, cc))
- return true;
-
- /* Add c to garbage list. */
- CHashAddToGarbage(table, bucket, c);
-
- /* The new target of the pointer may also be delete-marked, so loop. */
- c = cc;
- } while (CHashPtrIsMarked(c));
-
- /* Success! */
- *cp = c;
- return false;
-}