From 58e8423cc9daf5854f45e4dc1aa1f5c9f6afdd73 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Fri, 13 Jul 2012 16:25:35 -0400 Subject: [PATCH] First attempt at CHashSearch. --- src/backend/utils/hash/chash.c | 115 +++++++++++++++++++++++++++++++-- src/include/storage/proc.h | 3 + 2 files changed, 113 insertions(+), 5 deletions(-) diff --git a/src/backend/utils/hash/chash.c b/src/backend/utils/hash/chash.c index 6f1effd839..3d9adca098 100644 --- a/src/backend/utils/hash/chash.c +++ b/src/backend/utils/hash/chash.c @@ -81,6 +81,9 @@ #include "postgres.h" #include "miscadmin.h" +#include "access/hash.h" +#include "storage/barrier.h" +#include "storage/proc.h" #include "storage/shmem.h" #include "storage/spin.h" #include "utils/chash.h" @@ -91,8 +94,7 @@ * used to implement concurrent deletion. */ typedef uint32 CHashPtr; -#define InvalidCHashPtr ((uint32) -1) -#define ReclaimCHashPtr ((uint32) -2) +#define InvalidCHashPtr ((uint32) -2) #define CHashPtrIsMarked(x) ((x) & 1) #define CHashPtrGetOffset(x) ((x) >> 1) #define CHashPtrMark(x) ((x) | 1) @@ -118,11 +120,11 @@ typedef struct typedef struct { CHashPtr next; /* arena offset of next element */ - uint32 hash_value; /* hash(key) */ + uint32 hashcode; /* hash(key) */ } CHashNode; #define SizeOfCHashNode MAXALIGN(sizeof(CHashNode)) -#define CHashNodeGetItem(x) ((void *) (((char *) x) + SizeOfCHashNode)) +#define CHashNodeGetItem(x) (((char *) x) + SizeOfCHashNode) /* * CHashTableData stores all the information that we need in order to access @@ -144,9 +146,13 @@ typedef struct CHashTableData CHashBucket *bucket; /* array of size nbuckets */ CHashBucket *garbage; /* array of size ngarbage */ CHashBucket *freelist; /* array of size nfreelists */ - void *arena; /* arena */ + char *arena; /* arena */ } CHashTableData; +#define CHashTableGetNode(table, offset) \ + (AssertMacro((offset) < (table)->arena_limit), \ + (CHashNode *) ((table)->arena + (table)->arena_stride * (offset))) + /* * First stage of CHashTable initialization. We fill in all the constants * here, but not the pointers. @@ -162,6 +168,8 @@ CHashBootstrap(CHashDescriptor *desc) memcpy(&table->desc, desc, sizeof(CHashDescriptor)); /* Sanity checks. */ + if (desc->id == 0) + elog(ERROR, "concurrent hash table id must not be zero"); if (desc->capacity < 1 || desc->capacity > CHashMaxCapacity) elog(ERROR, "invalid capacity for concurrent hash"); if (desc->key_size < 1 || desc->key_size > desc->element_size) @@ -278,6 +286,9 @@ CHashInitialize(CHashTable table, CHashDescriptor *desc) /* Arena follows the various lists. */ table->arena = (void *) (&table->freelist[table->nfreelists]); + /* XXX. Must initialize spinlocks, set lists to empty, and then put + * all arena nodes on free lists. */ + /* * Copy table (with pointers now filled in) to shared memory. This is * arguably unnecessary when not using EXEC_BACKEND, but we do it anyway. @@ -286,3 +297,97 @@ CHashInitialize(CHashTable table, CHashDescriptor *desc) return table; } + +/* + * Search a concurrent hash table. entry should be a block of memory large + * enough to hold a complete entry, with just the key portion filled in. If + * a matching entry is found, this function will fill in the rest of the entry + * from the data in the hash table and return true. If not, it will return + * false. + */ +bool +CHashSearch(CHashTable table, void *entry) +{ + uint32 hashcode = hash_any(entry, table->desc.key_size); + uint32 bucket = hashcode & table->bucket_mask; + CHashPtr c; + CHashNode *n; + bool found = false; + + /* + * Suppress garbage collection for target bucket. We need a memory + * barrier to make sure that our bucket advertisement is committed to + * memory before we begin scanning the bucket. + */ + MyProc->chash_bucket = ((uint64) table->desc.id) << 32 | bucket; + pg_memory_barrier(); + + /* Scan bucket. */ + c = table->bucket[bucket].head; + for (;;) + { + int cmp; + + /* If we've reached the end of the bucket chain, stop. */ + if (c == InvalidCHashPtr) + break; + + /* + * A dependency barrier is needed after reading a pointer value and + * before dereferencing it. c is, in effect, a pointer which we're + * about to deference. + */ + pg_read_barrier_depends(); + + /* Compare current node by hashcode, then by memcmp. */ + n = CHashTableGetNode(table, c); + if (n->hashcode == hashcode) + cmp = memcmp(CHashNodeGetItem(n), entry, table->desc.key_size); + else if (n->hashcode > hashcode) + cmp = 1; + else + cmp = -1; + + /* Stop if we've passed the list position where the entry should be. */ + if (cmp > 0) + break; + + /* Fetch next-pointer. */ + c = n->next; + + /* Is it the item we're looking for? */ + if (cmp == 0) + { + /* + * If the pointer is marked, it will be followed (if at all) by + * a node which is "later" than this one in terms of either + * hashcode or memcmp ordering; we won't find a duplicate of the + * current key. This is because a marked CHashPtr is never + * further updated (or at least, not until after garbage + * collection, which we've already prevented for this bucket), + * so the successor element must have been present before this + * one was deleted. + */ + if (!CHashPtrIsMarked(c)) + { + memcpy(((char *) entry) + table->desc.key_size, + CHashNodeGetItem(n) + table->desc.key_size, + table->desc.element_size - table->desc.key_size); + found = true; + } + break; + } + } + + /* + * Once we're done scanning the bucket, we can permit garbage collection + * of that bucket to resume. The memory barrier ensures that all the reads + * that are part of the bucket scan finish before we allow garbage + * collection. + */ + pg_memory_barrier(); + MyProc->chash_bucket = 0; + + /* Return result to caller. */ + return found; +} diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index d194f38b46..af236df219 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -143,6 +143,9 @@ struct PGPROC bool fpVXIDLock; /* are we holding a fast-path VXID lock? */ LocalTransactionId fpLocalTransactionId; /* lxid for fast-path VXID * lock */ + + /* chash bucket currently being scanned. */ + uint64 chash_bucket; }; /* NOTE: "typedef struct PGPROC PGPROC" appears in storage/lock.h. */ -- 2.39.5