From 392d5abbca7f97ff9631ea0b4215db31878466ca Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Fri, 8 Jun 2012 16:57:06 -0400 Subject: [PATCH] Framework for GiST metapage. This commit doesn't actually create the metapage, but it ought to let everything keep working if one were somehow to spring into existence. We flag the root page with a new GiST flag F_ROOT to simplify things for xlog replay - this way, we don't have to count on being able to read the metapage to identify the root. This commit constitutes a WAL format change for XLOG_GIST_CREATE_INDEX; it now a block number as well as a relfilenode. That's the only record that changes, though, because the other record types that need to know which block is the root can simply check for F_ROOT || blkno == 0. --- src/backend/access/gist/gist.c | 19 +++++---- src/backend/access/gist/gistbuild.c | 21 ++++++---- src/backend/access/gist/gistbuildbuffers.c | 2 +- src/backend/access/gist/gistget.c | 4 +- src/backend/access/gist/gistutil.c | 49 ++++++++++++++++++++++ src/backend/access/gist/gistvacuum.c | 25 +++++++---- src/backend/access/gist/gistxlog.c | 35 ++++++++++------ src/include/access/gist.h | 9 ++++ src/include/access/gist_private.h | 34 ++++++++++++++- 9 files changed, 158 insertions(+), 40 deletions(-) diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 783590ea55..264ebc41df 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -158,6 +158,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, List **splitinfo, bool markfollowright) { + GistMetaPageData *meta = GistMetaData(rel); Page page = BufferGetPage(buffer); bool is_leaf = (GistPageIsLeaf(page)) ? true : false; XLogRecPtr recptr; @@ -202,7 +203,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, BlockNumber blkno = BufferGetBlockNumber(buffer); bool is_rootsplit; - is_rootsplit = (blkno == GIST_ROOT_BLKNO); + is_rootsplit = (blkno == meta->gist_root); /* * Form index tuples vector to split. If we're replacing an old tuple, @@ -287,7 +288,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, for (i = 0, ptr = dist; ptr; ptr = ptr->next) downlinks[i++] = ptr->itup; - rootpg.block.blkno = GIST_ROOT_BLKNO; + rootpg.block.blkno = meta->gist_root; rootpg.block.num = ndownlinks; rootpg.list = gistfillitupvec(downlinks, ndownlinks, &(rootpg.lenlist)); @@ -325,7 +326,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, } /* Set up rightlinks */ - if (ptr->next && ptr->block.blkno != GIST_ROOT_BLKNO) + if (ptr->next && ptr->block.blkno != meta->gist_root) GistPageGetOpaque(ptr->page)->rightlink = ptr->next->block.blkno; else @@ -475,6 +476,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, void gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate) { + GistMetaPageData *meta = GistMetaData(r); ItemId iid; IndexTuple idxtuple; GISTInsertStack firststack; @@ -487,7 +489,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate) state.r = r; /* Start from the root */ - firststack.blkno = GIST_ROOT_BLKNO; + firststack.blkno = meta->gist_root; firststack.lsn.xrecoff = 0; firststack.parent = NULL; firststack.downlinkoffnum = InvalidOffsetNumber; @@ -542,7 +544,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate) continue; } - if (stack->blkno != GIST_ROOT_BLKNO && + if (stack->blkno != meta->gist_root && XLByteLT(stack->parent->lsn, GistPageGetOpaque(stack->page)->nsn)) { @@ -628,7 +630,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate) * child pages, so we just need to retry from the root * page. */ - if (stack->blkno != GIST_ROOT_BLKNO) + if (stack->blkno != meta->gist_root) { UnlockReleaseBuffer(stack->buffer); xlocked = false; @@ -667,7 +669,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate) stack->page = (Page) BufferGetPage(stack->buffer); stack->lsn = PageGetLSN(stack->page); - if (stack->blkno == GIST_ROOT_BLKNO) + if (stack->blkno == meta->gist_root) { /* * the only page that can become inner instead of leaf is @@ -730,6 +732,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate) static GISTInsertStack * gistFindPath(Relation r, BlockNumber child, OffsetNumber *downlinkoffnum) { + GistMetaPageData *meta = GistMetaData(r); Page page; Buffer buffer; OffsetNumber i, @@ -742,7 +745,7 @@ gistFindPath(Relation r, BlockNumber child, OffsetNumber *downlinkoffnum) BlockNumber blkno; top = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack)); - top->blkno = GIST_ROOT_BLKNO; + top->blkno = meta->gist_root; top->downlinkoffnum = InvalidOffsetNumber; fifo = list_make1(top); diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c index 8caf485676..fef1f22e55 100644 --- a/src/backend/access/gist/gistbuild.c +++ b/src/backend/access/gist/gistbuild.c @@ -180,7 +180,6 @@ gistbuild(PG_FUNCTION_ARGS) /* initialize the root page */ buffer = gistNewBuffer(index); - Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO); page = BufferGetPage(buffer); START_CRIT_SECTION(); @@ -193,8 +192,12 @@ gistbuild(PG_FUNCTION_ARGS) { XLogRecPtr recptr; XLogRecData rdata; + gistxlogCreateIndex xlrec; - rdata.data = (char *) &(index->rd_node); + xlrec.node = index->rd_node; + xlrec.blkno = BufferGetBlockNumber(buffer); + + rdata.data = (char *) &xlrec; rdata.len = sizeof(RelFileNode); rdata.buffer = InvalidBuffer; rdata.next = NULL; @@ -684,11 +687,13 @@ gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level, IndexTuple *itup, int ntup, OffsetNumber oldoffnum, BlockNumber parentblk, OffsetNumber downlinkoffnum) { + Relation r = buildstate->indexrel; + GistMetaPageData *meta = GistMetaData(r); GISTBuildBuffers *gfbb = buildstate->gfbb; List *splitinfo; bool is_split; - is_split = gistplacetopage(buildstate->indexrel, + is_split = gistplacetopage(r, buildstate->freespace, buildstate->giststate, buffer, @@ -703,7 +708,7 @@ gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level, * nodes up to the root. That simplifies the algorithm to re-find correct * parent. */ - if (is_split && BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO) + if (is_split && BufferGetBlockNumber(buffer) == meta->gist_root) { Page page = BufferGetPage(buffer); OffsetNumber off; @@ -727,7 +732,7 @@ gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level, ItemId iid = PageGetItemId(page, off); IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid); BlockNumber childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid)); - Buffer childbuf = ReadBuffer(buildstate->indexrel, childblkno); + Buffer childbuf = ReadBuffer(r, childblkno); LockBuffer(childbuf, GIST_SHARE); gistMemorizeAllDownlinks(buildstate, childbuf); @@ -737,7 +742,7 @@ gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level, * Also remember that the parent of the new child page is the * root block. */ - gistMemorizeParent(buildstate, childblkno, GIST_ROOT_BLKNO); + gistMemorizeParent(buildstate, childblkno, meta->gist_root); } } } @@ -773,7 +778,7 @@ gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level, */ gistRelocateBuildBuffersOnSplit(gfbb, buildstate->giststate, - buildstate->indexrel, + r, level, buffer, splitinfo); @@ -1057,7 +1062,7 @@ gistGetMaxLevel(Relation index) * level. */ maxLevel = 0; - blkno = GIST_ROOT_BLKNO; + blkno = GistMetaData(index)->gist_root; while (true) { Buffer buffer; diff --git a/src/backend/access/gist/gistbuildbuffers.c b/src/backend/access/gist/gistbuildbuffers.c index 39aec856f9..7f28b2c6ce 100644 --- a/src/backend/access/gist/gistbuildbuffers.c +++ b/src/backend/access/gist/gistbuildbuffers.c @@ -576,7 +576,7 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate, * described by Arge et al did, but it's of no use, as you might as well * read the tuples straight from the heap instead of the root buffer. */ - Assert(blocknum != GIST_ROOT_BLKNO); + Assert(blocknum != GistMetaData(r)->gist_root); memcpy(&oldBuf, nodeBuffer, sizeof(GISTNodeBuffer)); oldBuf.isTemp = true; diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c index c790ad6ffb..dc47cec7cb 100644 --- a/src/backend/access/gist/gistget.c +++ b/src/backend/access/gist/gistget.c @@ -489,7 +489,7 @@ gistgettuple(PG_FUNCTION_ARGS) so->curTreeItem = NULL; so->curPageData = so->nPageData = 0; - fakeItem.blkno = GIST_ROOT_BLKNO; + fakeItem.blkno = GistMetaData(scan->indexRelation)->gist_root; memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN)); gistScanPage(scan, &fakeItem, NULL, NULL, NULL); } @@ -560,7 +560,7 @@ gistgetbitmap(PG_FUNCTION_ARGS) so->curTreeItem = NULL; so->curPageData = so->nPageData = 0; - fakeItem.blkno = GIST_ROOT_BLKNO; + fakeItem.blkno = GistMetaData(scan->indexRelation)->gist_root; memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN)); gistScanPage(scan, &fakeItem, NULL, tbm, &ntids); diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index 8039b5d569..dbf56eabc0 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -28,6 +28,55 @@ static Datum attrS[INDEX_MAX_KEYS]; static bool isnullS[INDEX_MAX_KEYS]; +/* + * Fetch local cache of AM-specific info about the index, initializing it + * if necessary + */ +GistMetaPageData * +GistMetaData(Relation index) +{ + Buffer metabuffer; + Page metapage; + GistMetaPageData *cache; + + if (index->rd_amcache != NULL) + return (GistMetaPageData *) index->rd_amcache; + + /* Allocate cache memory. */ + cache = MemoryContextAllocZero(index->rd_indexcxt, + sizeof(GistMetaPageData)); + + /* Read the metapage. */ + metabuffer = ReadBuffer(index, GIST_METAPAGE_BLKNO); + LockBuffer(metabuffer, GIST_SHARE); + + /* + * If this index was inherited from PostgreSQL < 9.3 via pg_upgrade, it + * might not have a metapage. In that case, we fake up some suitable + * metapage data. Otherwise, we just copy the data that exists on the + * page. + */ + metapage = BufferGetPage(metabuffer); + if (PageIsRelationMetapage(metapage)) + { + GistMetaPageData *meta; + + meta = GistPageGetMeta(metapage); + memcpy(cache, meta, sizeof(GistMetaPageData)); + } + else + { + cache->gist_magic = GIST_MAGIC; + cache->gist_version = 0; /* no real metapage */ + cache->gist_root = GIST_OLD_ROOT_BLKNO; + } + index->rd_amcache = (char *) cache; + + UnlockReleaseBuffer(metabuffer); + + return cache; +} + /* * Write itup vector to page, has no control of free space. */ diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c index f2a7a87266..3558b09ea3 100644 --- a/src/backend/access/gist/gistvacuum.c +++ b/src/backend/access/gist/gistvacuum.c @@ -35,6 +35,7 @@ gistvacuumcleanup(PG_FUNCTION_ARGS) blkno; BlockNumber totFreePages; bool needLock; + GistMetaPageData *meta = GistMetaData(rel); /* No-op in ANALYZE ONLY mode */ if (info->analyze_only) @@ -67,12 +68,21 @@ gistvacuumcleanup(PG_FUNCTION_ARGS) UnlockRelationForExtension(rel, ExclusiveLock); totFreePages = 0; - for (blkno = GIST_ROOT_BLKNO + 1; blkno < npages; blkno++) + + /* + * Block 0 might be the metapage, or it might be the root page, both + * of which we want to skip. As of PostgreSQL 9.3, the metapage might + * also be somewhere else in the index; we must skip it no matter where + * it is. + */ + for (blkno = 1; blkno < npages; blkno++) { Buffer buffer; Page page; vacuum_delay_point(); + if (blkno == meta->gist_root) + continue; buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); @@ -109,11 +119,11 @@ typedef struct GistBDItem } GistBDItem; static void -pushStackIfSplited(Page page, GistBDItem *stack) +pushStackIfSplited(GistMetaPageData *meta, Page page, GistBDItem *stack) { GISTPageOpaque opaque = GistPageGetOpaque(page); - if (stack->blkno != GIST_ROOT_BLKNO && !XLogRecPtrIsInvalid(stack->parentlsn) && + if (stack->blkno != meta->gist_root && !XLogRecPtrIsInvalid(stack->parentlsn) && (GistFollowRight(page) || XLByteLT(stack->parentlsn, opaque->nsn)) && opaque->rightlink != InvalidBlockNumber /* sanity check */ ) { @@ -147,6 +157,7 @@ gistbulkdelete(PG_FUNCTION_ARGS) Relation rel = info->index; GistBDItem *stack, *ptr; + GistMetaPageData *meta = GistMetaData(rel); /* first time through? */ if (stats == NULL) @@ -156,7 +167,7 @@ gistbulkdelete(PG_FUNCTION_ARGS) stats->num_index_tuples = 0; stack = (GistBDItem *) palloc0(sizeof(GistBDItem)); - stack->blkno = GIST_ROOT_BLKNO; + stack->blkno = meta->gist_root; while (stack) { @@ -182,7 +193,7 @@ gistbulkdelete(PG_FUNCTION_ARGS) LockBuffer(buffer, GIST_EXCLUSIVE); page = (Page) BufferGetPage(buffer); - if (stack->blkno == GIST_ROOT_BLKNO && !GistPageIsLeaf(page)) + if (stack->blkno == meta->gist_root && !GistPageIsLeaf(page)) { /* only the root can become non-leaf during relock */ UnlockReleaseBuffer(buffer); @@ -194,7 +205,7 @@ gistbulkdelete(PG_FUNCTION_ARGS) * check for split proceeded after look at parent, we should check * it after relock */ - pushStackIfSplited(page, stack); + pushStackIfSplited(meta, page, stack); /* * Remove deletable tuples from page @@ -247,7 +258,7 @@ gistbulkdelete(PG_FUNCTION_ARGS) else { /* check for split proceeded after look at parent */ - pushStackIfSplited(page, stack); + pushStackIfSplited(meta, page, stack); maxoff = PageGetMaxOffsetNumber(page); diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c index 76029d9949..a6e7591f23 100644 --- a/src/backend/access/gist/gistxlog.c +++ b/src/backend/access/gist/gistxlog.c @@ -140,13 +140,16 @@ gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record) GistClearTuplesDeleted(page); } - if (!GistPageIsLeaf(page) && PageGetMaxOffsetNumber(page) == InvalidOffsetNumber && xldata->blkno == GIST_ROOT_BLKNO) - + if (!GistPageIsLeaf(page) && + PageGetMaxOffsetNumber(page) == InvalidOffsetNumber && + (xldata->blkno == GIST_OLD_ROOT_BLKNO || GistPageIsRoot(page))) + { /* * all links on non-leaf root page was deleted by vacuum full, so root * page becomes a leaf */ GistPageSetLeaf(page); + } GistPageGetOpaque(page)->rightlink = InvalidBlockNumber; PageSetLSN(page, lsn); @@ -230,18 +233,26 @@ gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record) NewPage *newpage = xlrec.page + i; int flags; - if (newpage->header->blkno == GIST_ROOT_BLKNO) + buffer = XLogReadBuffer(xlrec.data->node, newpage->header->blkno, true); + Assert(BufferIsValid(buffer)); + page = (Page) BufferGetPage(buffer); + + /* + * If this index was inherited from PostgreSQL < 9.3 via pg_upgrade, + * the root will be at block zero. Otherwise, the metapage will be at + * that offset, and the root page will be flagged as such. + */ + if (newpage->header->blkno == GIST_OLD_ROOT_BLKNO + || GistPageIsRoot(page)) { Assert(i == 0); isrootsplit = true; } - buffer = XLogReadBuffer(xlrec.data->node, newpage->header->blkno, true); - Assert(BufferIsValid(buffer)); - page = (Page) BufferGetPage(buffer); - /* ok, clear buffer */ - if (xlrec.data->origleaf && newpage->header->blkno != GIST_ROOT_BLKNO) + if (isrootsplit) + flags = F_ROOT; + else if (xlrec.data->origleaf) flags = F_LEAF; else flags = 0; @@ -250,7 +261,7 @@ gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record) /* and fill it */ gistfillbuffer(page, newpage->itup, newpage->header->num, FirstOffsetNumber); - if (newpage->header->blkno == GIST_ROOT_BLKNO) + if (isrootsplit) { GistPageGetOpaque(page)->rightlink = InvalidBlockNumber; GistPageGetOpaque(page)->nsn = xldata->orignsn; @@ -280,15 +291,15 @@ gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record) static void gistRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record) { - RelFileNode *node = (RelFileNode *) XLogRecGetData(record); + gistxlogCreateIndex *xldata = (gistxlogCreateIndex *) XLogRecGetData(record); Buffer buffer; Page page; - buffer = XLogReadBuffer(*node, GIST_ROOT_BLKNO, true); + buffer = XLogReadBuffer(xldata->node, xldata->blkno, true); Assert(BufferIsValid(buffer)); page = (Page) BufferGetPage(buffer); - GISTInitBuffer(buffer, F_LEAF); + GISTInitBuffer(buffer, F_LEAF|F_ROOT); PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); diff --git a/src/include/access/gist.h b/src/include/access/gist.h index ed57bb7f93..9f4e2de078 100644 --- a/src/include/access/gist.h +++ b/src/include/access/gist.h @@ -62,6 +62,7 @@ #define F_DELETED (1 << 1) /* the page has been deleted */ #define F_TUPLES_DELETED (1 << 2) /* some tuples on the page are dead */ #define F_FOLLOW_RIGHT (1 << 3) /* page to the right has no downlink */ +#define F_ROOT (1 << 4) /* root page - not set prior to 9.3 */ typedef XLogRecPtr GistNSN; @@ -137,6 +138,14 @@ typedef struct GISTENTRY #define GistMarkFollowRight(page) ( GistPageGetOpaque(page)->flags |= F_FOLLOW_RIGHT) #define GistClearFollowRight(page) ( GistPageGetOpaque(page)->flags &= ~F_FOLLOW_RIGHT) +/* + * Prior to PostgreSQL 9.3, the F_ROOT flag did not exist; the root page was + * at a fixed location, GIST_OLD_ROOT_BLKNO. This block now contains the + * metapage. The correct test for whether you've got the root block is + * therefore blkno == 0 || GistPageIsRoot(page). + */ +#define GistPageIsRoot(page) ( GistPageGetOpaque(page)->flags & F_ROOT) + /* * Vector of GISTENTRY structs; user-defined methods union and picksplit * take it as one of their arguments diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h index 9af9a0cf8c..142a30ec64 100644 --- a/src/include/access/gist_private.h +++ b/src/include/access/gist_private.h @@ -16,6 +16,7 @@ #include "access/gist.h" #include "access/itup.h" +#include "access/metapage.h" #include "fmgr.h" #include "storage/bufmgr.h" #include "storage/buffile.h" @@ -211,6 +212,12 @@ typedef struct gistxlogPage int num; /* number of index tuples following */ } gistxlogPage; +typedef struct gistxlogCreateIndex +{ + RelFileNode node; + BlockNumber blkno; +} gistxlogCreateIndex; + typedef struct gistxlogPageDelete { RelFileNode node; @@ -278,8 +285,12 @@ typedef struct GISTInsertStack *stack; } GISTInsertState; -/* root page of a gist index */ -#define GIST_ROOT_BLKNO 0 +/* + * Before PostgreSQL 9.3, the root block was always at block 0. Now, the + * metapage is at block 0, and it contains the location of the root block. + */ +#define GIST_OLD_ROOT_BLKNO 0 +#define GIST_METAPAGE_BLKNO 0 /* * Before PostgreSQL 9.1, we used rely on so-called "invalid tuples" on inner @@ -461,12 +472,31 @@ extern Datum gistgetbitmap(PG_FUNCTION_ARGS); /* gistutil.c */ +#define GIST_MAGIC 0x23c1eb95 +#define GIST_VERSION 1 + +typedef struct GistMetaPageData +{ + uint32 gist_magic; /* should contain GIST_MAGIC */ + uint32 gist_version; /* should contain GIST_VERSION */ + BlockNumber gist_root; /* current root location */ +} GistMetaPageData; + +/* + * Prior to PostgreSQL 9.3, GiST indexes had no metapage. Thus, this should + * only ever get called on a relation metapage. + */ +#define GistPageGetMeta(p) \ + (AssertMacro(PageIsRelationMetapage(p)), \ + (GistMetaPageData *) BlindGetAccessMethodMeta(p)) + #define GiSTPageSize \ ( BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(GISTPageOpaqueData)) ) #define GIST_MIN_FILLFACTOR 10 #define GIST_DEFAULT_FILLFACTOR 90 +extern GistMetaPageData *GistMetaData(Relation index); extern Datum gistoptions(PG_FUNCTION_ARGS); extern bool gistfitpage(IndexTuple *itvec, int len); extern bool gistnospace(Page page, IndexTuple *itvec, int len, OffsetNumber todelete, Size freespace); -- 2.39.5