Phase 1 of fix for 'SMgrRelation hashtable corrupted' problem. This
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 10 Jan 2005 20:02:24 +0000 (20:02 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 10 Jan 2005 20:02:24 +0000 (20:02 +0000)
is the minimum required fix.  I want to look next at taking advantage of
it by simplifying the message semantics in the shared inval message queue,
but that part can be held over for 8.1 if it turns out too ugly.

14 files changed:
src/backend/access/transam/xlogutils.c
src/backend/catalog/heap.c
src/backend/catalog/index.c
src/backend/commands/tablecmds.c
src/backend/postmaster/bgwriter.c
src/backend/rewrite/rewriteDefine.c
src/backend/storage/buffer/bufmgr.c
src/backend/storage/buffer/localbuf.c
src/backend/storage/smgr/smgr.c
src/backend/utils/cache/inval.c
src/backend/utils/cache/relcache.c
src/include/storage/smgr.h
src/include/utils/rel.h
src/include/utils/relcache.h

index efc40be75968d87e9e5112367d75880dcb838429..3a92cdc9e695589559e92cb992d4c978287570ea 100644 (file)
@@ -125,8 +125,7 @@ _xl_remove_hash_entry(XLogRelDesc *rdesc)
        if (hentry == NULL)
                elog(PANIC, "_xl_remove_hash_entry: file was not found in cache");
 
-       if (rdesc->reldata.rd_smgr != NULL)
-               smgrclose(rdesc->reldata.rd_smgr);
+       RelationCloseSmgr(&(rdesc->reldata));
 
        memset(rdesc, 0, sizeof(XLogRelDesc));
        memset(tpgc, 0, sizeof(FormData_pg_class));
@@ -233,7 +232,8 @@ XLogOpenRelation(bool redo, RmgrId rmid, RelFileNode rnode)
                hentry->rdesc = res;
 
                res->reldata.rd_targblock = InvalidBlockNumber;
-               res->reldata.rd_smgr = smgropen(res->reldata.rd_node);
+               res->reldata.rd_smgr = NULL;
+               RelationOpenSmgr(&(res->reldata));
 
                /*
                 * Create the target file if it doesn't already exist.  This lets
@@ -278,7 +278,5 @@ XLogCloseRelation(RelFileNode rnode)
 
        rdesc = hentry->rdesc;
 
-       if (rdesc->reldata.rd_smgr != NULL)
-               smgrclose(rdesc->reldata.rd_smgr);
-       rdesc->reldata.rd_smgr = NULL;
+       RelationCloseSmgr(&(rdesc->reldata));
 }
index 80c2e929ef5059d0ecc30b9469041a2b4bdb7e70..3d6c80390f9213bbecd631f6571662a7b05ab22c 100644 (file)
@@ -322,7 +322,7 @@ heap_create(const char *relname,
        if (create_storage)
        {
                Assert(rel->rd_smgr == NULL);
-               rel->rd_smgr = smgropen(rel->rd_node);
+               RelationOpenSmgr(rel);
                smgrcreate(rel->rd_smgr, rel->rd_istemp, false);
        }
 
@@ -1186,10 +1186,8 @@ heap_drop_with_catalog(Oid relid)
        if (rel->rd_rel->relkind != RELKIND_VIEW &&
                rel->rd_rel->relkind != RELKIND_COMPOSITE_TYPE)
        {
-               if (rel->rd_smgr == NULL)
-                       rel->rd_smgr = smgropen(rel->rd_node);
+               RelationOpenSmgr(rel);
                smgrscheduleunlink(rel->rd_smgr, rel->rd_istemp);
-               rel->rd_smgr = NULL;
        }
 
        /*
index 711e0f8784edf1fbedf9c9381fa77a27a1e36553..6c2aa723525da0e0968413eb247849540d358edb 100644 (file)
@@ -780,11 +780,9 @@ index_drop(Oid indexId)
         */
        FlushRelationBuffers(userIndexRelation, (BlockNumber) 0);
 
-       if (userIndexRelation->rd_smgr == NULL)
-               userIndexRelation->rd_smgr = smgropen(userIndexRelation->rd_node);
+       RelationOpenSmgr(userIndexRelation);
        smgrscheduleunlink(userIndexRelation->rd_smgr,
                                           userIndexRelation->rd_istemp);
-       userIndexRelation->rd_smgr = NULL;
 
        /*
         * Close and flush the index's relcache entry, to ensure relcache
@@ -1133,10 +1131,8 @@ setNewRelfilenode(Relation relation)
        smgrclose(srel);
 
        /* schedule unlinking old relfilenode */
-       if (relation->rd_smgr == NULL)
-               relation->rd_smgr = smgropen(relation->rd_node);
+       RelationOpenSmgr(relation);
        smgrscheduleunlink(relation->rd_smgr, relation->rd_istemp);
-       relation->rd_smgr = NULL;
 
        /* update the pg_class row */
        rd_rel->relfilenode = newrelfilenode;
index cb555223760edb64c9eaca4261bd2d1f3c87ef70..1c1620d75a7152006619e84080489e336d459b09 100644 (file)
@@ -5521,10 +5521,8 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace)
        copy_relation_data(rel, dstrel);
 
        /* schedule unlinking old physical file */
-       if (rel->rd_smgr == NULL)
-               rel->rd_smgr = smgropen(rel->rd_node);
+       RelationOpenSmgr(rel);
        smgrscheduleunlink(rel->rd_smgr, rel->rd_istemp);
-       rel->rd_smgr = NULL;
 
        /*
         * Now drop smgr references.  The source was already dropped by
index fd2942742369d41792f2066144e459e392b5b7bd..307e4d15b3bdcc2b2c9ffc21a999dd6d37db3d63 100644 (file)
@@ -349,9 +349,7 @@ BackgroundWriterMain(void)
                        /*
                         * After any checkpoint, close all smgr files.  This is so we
                         * won't hang onto smgr references to deleted files
-                        * indefinitely. (It is safe to do this because this process
-                        * does not have a relcache, and so no dangling references
-                        * could remain.)
+                        * indefinitely.
                         */
                        smgrcloseall();
 
index c4ab2e32f5ae346930a1e99a8d61742d15f1495a..5d1d6a3ecbe71bca09db916bf5007facf3a72326 100644 (file)
@@ -484,10 +484,8 @@ DefineQueryRewrite(RuleStmt *stmt)
         */
        if (RelisBecomingView)
        {
-               if (event_relation->rd_smgr == NULL)
-                       event_relation->rd_smgr = smgropen(event_relation->rd_node);
+               RelationOpenSmgr(event_relation);
                smgrscheduleunlink(event_relation->rd_smgr, event_relation->rd_istemp);
-               event_relation->rd_smgr = NULL;
        }
 
        /* Close rel, but keep lock till commit... */
index 1cf66a6e4bed789cbd93b1c2f803450bf9a992e8..629e2f8f65964930e23635a21a6c82267f878126 100644 (file)
@@ -131,8 +131,7 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum,
        isLocalBuf = reln->rd_istemp;
 
        /* Open it at the smgr level if not already done */
-       if (reln->rd_smgr == NULL)
-               reln->rd_smgr = smgropen(reln->rd_node);
+       RelationOpenSmgr(reln);
 
        /* Substitute proper block number if caller asked for P_NEW */
        if (isExtend)
@@ -1130,8 +1129,7 @@ BlockNumber
 RelationGetNumberOfBlocks(Relation relation)
 {
        /* Open it at the smgr level if not already done */
-       if (relation->rd_smgr == NULL)
-               relation->rd_smgr = smgropen(relation->rd_node);
+       RelationOpenSmgr(relation);
 
        return smgrnblocks(relation->rd_smgr);
 }
@@ -1147,8 +1145,7 @@ void
 RelationTruncate(Relation rel, BlockNumber nblocks)
 {
        /* Open it at the smgr level if not already done */
-       if (rel->rd_smgr == NULL)
-               rel->rd_smgr = smgropen(rel->rd_node);
+       RelationOpenSmgr(rel);
 
        /* Make sure rd_targblock isn't pointing somewhere past end */
        rel->rd_targblock = InvalidBlockNumber;
@@ -1445,8 +1442,7 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
        BufferDesc *bufHdr;
 
        /* Open rel at the smgr level if not already done */
-       if (rel->rd_smgr == NULL)
-               rel->rd_smgr = smgropen(rel->rd_node);
+       RelationOpenSmgr(rel);
 
        if (rel->rd_istemp)
        {
index ab1ae2ff495e63d14fb50a01a118c9ef4b6d9f22..b466f0949dc13ac0b766c7515e1932f0f36444b5 100644 (file)
@@ -108,13 +108,13 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
         */
        if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
        {
-               SMgrRelation reln;
+               SMgrRelation oreln;
 
                /* Find smgr relation for buffer */
-               reln = smgropen(bufHdr->tag.rnode);
+               oreln = smgropen(bufHdr->tag.rnode);
 
                /* And write... */
-               smgrwrite(reln,
+               smgrwrite(oreln,
                                  bufHdr->tag.blockNum,
                                  (char *) MAKE_PTR(bufHdr->data),
                                  true);
index ba1a5743326bd942369278b519f6a07762946604..7da9cd7b299ab0e294b5b063d1c8d7a207e28eca 100644 (file)
@@ -216,6 +216,7 @@ smgropen(RelFileNode rnode)
        if (!found)
        {
                /* hash_search already filled in the lookup key */
+               reln->smgr_owner = NULL;
                reln->smgr_which = 0;   /* we only have md.c at present */
                reln->md_fd = NULL;             /* mark it not open */
        }
@@ -224,15 +225,36 @@ smgropen(RelFileNode rnode)
 }
 
 /*
- *     smgrclose() -- Close and delete an SMgrRelation object.
+ * smgrsetowner() -- Establish a long-lived reference to an SMgrRelation object
  *
- * It is the caller's responsibility not to leave any dangling references
- * to the object.  (Pointers should be cleared after successful return;
- * on the off chance of failure, the SMgrRelation object will still exist.)
+ * There can be only one owner at a time; this is sufficient since currently
+ * the only such owners exist in the relcache.
+ */
+void
+smgrsetowner(SMgrRelation *owner, SMgrRelation reln)
+{
+       /*
+        * First, unhook any old owner.  (Normally there shouldn't be any, but
+        * it seems possible that this can happen during swap_relation_files()
+        * depending on the order of processing.  It's ok to close the old
+        * relcache entry early in that case.)
+        */
+       if (reln->smgr_owner)
+               *(reln->smgr_owner) = NULL;
+
+       /* Now establish the ownership relationship. */
+       reln->smgr_owner = owner;
+       *owner = reln;
+}
+
+/*
+ *     smgrclose() -- Close and delete an SMgrRelation object.
  */
 void
 smgrclose(SMgrRelation reln)
 {
+       SMgrRelation *owner;
+
        if (!(*(smgrsw[reln->smgr_which].smgr_close)) (reln))
                ereport(ERROR,
                                (errcode_for_file_access(),
@@ -241,16 +263,24 @@ smgrclose(SMgrRelation reln)
                                                reln->smgr_rnode.dbNode,
                                                reln->smgr_rnode.relNode)));
 
+       owner = reln->smgr_owner;
+
        if (hash_search(SMgrRelationHash,
                                        (void *) &(reln->smgr_rnode),
                                        HASH_REMOVE, NULL) == NULL)
                elog(ERROR, "SMgrRelation hashtable corrupted");
+
+       /*
+        * Unhook the owner pointer, if any.  We do this last since in the
+        * remote possibility of failure above, the SMgrRelation object will still
+        * exist.
+        */
+       if (owner)
+               *owner = NULL;
 }
 
 /*
  *     smgrcloseall() -- Close all existing SMgrRelation objects.
- *
- * It is the caller's responsibility not to leave any dangling references.
  */
 void
 smgrcloseall(void)
@@ -275,8 +305,6 @@ smgrcloseall(void)
  * This has the same effects as smgrclose(smgropen(rnode)), but it avoids
  * uselessly creating a hashtable entry only to drop it again when no
  * such entry exists already.
- *
- * It is the caller's responsibility not to leave any dangling references.
  */
 void
 smgrclosenode(RelFileNode rnode)
index 689da61d5a96b948460f7dd82894b3b582ed77b1..5b9c3c23ce39fa56dcd1c335ac2a8e3efa8b464a 100644 (file)
@@ -414,17 +414,9 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
        }
        else if (msg->id == SHAREDINVALRELCACHE_ID)
        {
-               /*
-                * If the message includes a valid relfilenode, we must ensure
-                * that smgr cache entry gets zapped.  The relcache will handle
-                * this if called, otherwise we must do it directly.
-                */
                if (msg->rc.dbId == MyDatabaseId || msg->rc.dbId == InvalidOid)
                {
-                       if (OidIsValid(msg->rc.physId.relNode))
-                               RelationCacheInvalidateEntry(msg->rc.relId, &msg->rc.physId);
-                       else
-                               RelationCacheInvalidateEntry(msg->rc.relId, NULL);
+                       RelationCacheInvalidateEntry(msg->rc.relId);
 
                        for (i = 0; i < cache_callback_count; i++)
                        {
@@ -434,12 +426,17 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
                                        (*ccitem->function) (ccitem->arg, msg->rc.relId);
                        }
                }
-               else
-               {
-                       /* might have smgr entry even if not in our database */
-                       if (OidIsValid(msg->rc.physId.relNode))
-                               smgrclosenode(msg->rc.physId);
-               }
+               /*
+                * If the message includes a valid relfilenode, we must ensure
+                * the smgr cache entry gets zapped.  This might not have happened
+                * above since the relcache entry might not have existed or might
+                * have been associated with a different relfilenode.
+                *
+                * XXX there is no real good reason for rnode inval to be in the
+                * same message at all.  FIXME in 8.1.
+                */
+               if (OidIsValid(msg->rc.physId.relNode))
+                       smgrclosenode(msg->rc.physId);
        }
        else
                elog(FATAL, "unrecognized SI message id: %d", msg->id);
index 10e6bfa4d614978f83ff1a40a078f41e5b554caf..41870a2fb257572f175af2444df11532d87eb941 100644 (file)
@@ -1659,11 +1659,7 @@ RelationClearRelation(Relation relation, bool rebuild)
         * ensures that the low-level file access state is updated after, say,
         * a vacuum truncation.
         */
-       if (relation->rd_smgr)
-       {
-               smgrclose(relation->rd_smgr);
-               relation->rd_smgr = NULL;
-       }
+       RelationCloseSmgr(relation);
 
        /*
         * Never, never ever blow away a nailed-in system relation, because
@@ -1857,16 +1853,7 @@ RelationForgetRelation(Oid rid)
  *
  * Any relcache entry matching the relid must be flushed.  (Note: caller has
  * already determined that the relid belongs to our database or is a shared
- * relation.)  If rnode isn't NULL, we must also ensure that any smgr cache
- * entry matching that rnode is flushed.
- *
- * Ordinarily, if rnode is supplied then it will match the relfilenode of
- * the target relid.  However, it's possible for rnode to be different if
- * someone is engaged in a relfilenode change. In that case we want to
- * make sure we clear the right cache entries. This has to be done here
- * to keep things in sync between relcache and smgr cache --- we can't have
- * someone flushing an smgr cache entry that a relcache entry still points
- * to.
+ * relation.)
  *
  * We used to skip local relations, on the grounds that they could
  * not be targets of cross-backend SI update messages; but it seems
@@ -1875,7 +1862,7 @@ RelationForgetRelation(Oid rid)
  * local and nonlocal relations.
  */
 void
-RelationCacheInvalidateEntry(Oid relationId, RelFileNode *rnode)
+RelationCacheInvalidateEntry(Oid relationId)
 {
        Relation        relation;
 
@@ -1884,20 +1871,8 @@ RelationCacheInvalidateEntry(Oid relationId, RelFileNode *rnode)
        if (PointerIsValid(relation))
        {
                relcacheInvalsReceived++;
-               if (rnode)
-               {
-                       /* Need to be sure smgr is flushed, but don't do it twice */
-                       if (relation->rd_smgr == NULL ||
-                               !RelFileNodeEquals(*rnode, relation->rd_node))
-                               smgrclosenode(*rnode);
-               }
                RelationFlushRelation(relation);
        }
-       else
-       {
-               if (rnode)
-                       smgrclosenode(*rnode);
-       }
 }
 
 /*
@@ -1946,11 +1921,7 @@ RelationCacheInvalidate(void)
                relation = idhentry->reldesc;
 
                /* Must close all smgr references to avoid leaving dangling ptrs */
-               if (relation->rd_smgr)
-               {
-                       smgrclose(relation->rd_smgr);
-                       relation->rd_smgr = NULL;
-               }
+               RelationCloseSmgr(relation);
 
                /* Ignore new relations, since they are never SI targets */
                if (relation->rd_createSubid != InvalidSubTransactionId)
index daf0b50f5d99f59dc7b74eae19b0e25fd14b2e43..5c79214f5de734a5ecc43aa07a0b0b0ebcaa4fc4 100644 (file)
  * operations imply I/O, they just create or destroy a hashtable entry.
  * (But smgrclose() may release associated resources, such as OS-level file
  * descriptors.)
+ *
+ * An SMgrRelation may have an "owner", which is just a pointer to it from
+ * somewhere else; smgr.c will clear this pointer if the SMgrRelation is
+ * closed.  We use this to avoid dangling pointers from relcache to smgr
+ * without having to make the smgr explicitly aware of relcache.  There
+ * can't be more than one "owner" pointer per SMgrRelation, but that's
+ * all we need.
  */
 typedef struct SMgrRelationData
 {
        /* rnode is the hashtable lookup key, so it must be first! */
        RelFileNode smgr_rnode;         /* relation physical identifier */
 
+       /* pointer to owning pointer, or NULL if none */
+       struct SMgrRelationData **smgr_owner;
+
        /* additional public fields may someday exist here */
 
        /*
@@ -49,6 +59,7 @@ typedef SMgrRelationData *SMgrRelation;
 
 extern void smgrinit(void);
 extern SMgrRelation smgropen(RelFileNode rnode);
+extern void smgrsetowner(SMgrRelation *owner, SMgrRelation reln);
 extern void smgrclose(SMgrRelation reln);
 extern void smgrcloseall(void);
 extern void smgrclosenode(RelFileNode rnode);
index a68ff76b8974efad0d468f142b8a00bf1e7f9de2..f93a535991691a6ea5a771c2f2068fa0772bfc82 100644 (file)
@@ -233,6 +233,31 @@ typedef Relation *RelationPtr;
 #define RelationGetNamespace(relation) \
        ((relation)->rd_rel->relnamespace)
 
+/*
+ * RelationOpenSmgr
+ *             Open the relation at the smgr level, if not already done.
+ */
+#define RelationOpenSmgr(relation) \
+       do { \
+               if ((relation)->rd_smgr == NULL) \
+                       smgrsetowner(&((relation)->rd_smgr), smgropen((relation)->rd_node)); \
+       } while (0)
+
+/*
+ * RelationCloseSmgr
+ *             Close the relation at the smgr level, if not already done.
+ *
+ * Note: smgrclose should unhook from owner pointer, hence the Assert.
+ */
+#define RelationCloseSmgr(relation) \
+       do { \
+               if ((relation)->rd_smgr != NULL) \
+               { \
+                       smgrclose((relation)->rd_smgr); \
+                       Assert((relation)->rd_smgr == NULL); \
+               } \
+       } while (0)
+
 /*
  * RELATION_IS_LOCAL
  *             If a rel is either temp or newly created in the current transaction,
index 40ec1f3844b448ac7e091e0aad09a1ecf44ea19e..0305b3171ee170d2e04e37236569786c5f56f06d 100644 (file)
@@ -61,7 +61,7 @@ extern Relation RelationBuildLocalRelation(const char *relname,
  */
 extern void RelationForgetRelation(Oid rid);
 
-extern void RelationCacheInvalidateEntry(Oid relationId, RelFileNode *rnode);
+extern void RelationCacheInvalidateEntry(Oid relationId);
 
 extern void RelationCacheInvalidate(void);