Repair very-low-probability race condition between relation extension

author Tom Lane <tgl@sss.pgh.pa.us>

Sat, 7 May 2005 21:33:21 +0000 (21:33 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Sat, 7 May 2005 21:33:21 +0000 (21:33 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Sat, 7 May 2005 21:33:21 +0000 (21:33 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Sat, 7 May 2005 21:33:21 +0000 (21:33 +0000)
diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c

index aa46ac44080db9271947f7c5f4db20d40c833f7b..93da4f23b39c09aa134aee2d5cad2b7df60164e7 100644 (file)
--- a/src/backend/access/heap/hio.c
+++ b/src/backend/access/heap/hio.c
@@ -246,13 +246,6 @@ RelationGetBufferForTuple(Relation relation, Size len,
          */
         buffer = ReadBuffer(relation, P_NEW);
  
-       /*
-        * Release the file-extension lock; it's now OK for someone else to
-        * extend the relation some more.
-        */
-       if (needLock)
-               UnlockPage(relation, 0, ExclusiveLock);
-
         /*
          * We can be certain that locking the otherBuffer first is OK, since
          * it must have a lower page number.
@@ -261,9 +254,22 @@ RelationGetBufferForTuple(Relation relation, Size len,
                 LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
  
         /*
-        * We need to initialize the empty new page.
+        * Now acquire lock on the new page.
          */
         LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+       /*
+        * Release the file-extension lock; it's now OK for someone else to
+        * extend the relation some more.  Note that we cannot release this
+        * lock before we have buffer lock on the new page, or we risk a
+        * race condition against vacuumlazy.c --- see comments therein.
+        */
+       if (needLock)
+               UnlockPage(relation, 0, ExclusiveLock);
+
+       /*
+        * We need to initialize the empty new page.
+        */
         pageHeader = (Page) BufferGetPage(buffer);
         Assert(PageIsNew((PageHeader) pageHeader));
         PageInit(pageHeader, BufferGetPageSize(buffer), 0);
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c

index e1d8d2e87616e5b829834e5f5fd08d4875c060aa..79710e8985487633d9c8171634b4c924525625b4 100644 (file)
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -473,18 +473,21 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
  
                 buf = ReadBuffer(rel, P_NEW);
  
+               /* Acquire buffer lock on new page */
+               LockBuffer(buf, BT_WRITE);
+
                 /*
-                * Release the file-extension lock; it's now OK for someone else
-                * to extend the relation some more.
+                * Release the file-extension lock; it's now OK for someone else to
+                * extend the relation some more.  Note that we cannot release this
+                * lock before we have buffer lock on the new page, or we risk a
+                * race condition against btvacuumcleanup --- see comments therein.
                  */
                 if (needLock)
                         UnlockPage(rel, 0, ExclusiveLock);
  
-               /* Acquire appropriate buffer lock on new page */
-               LockBuffer(buf, access);
-
                 /* Initialize the new page before returning it */
                 page = BufferGetPage(buf);
+               Assert(PageIsNew((PageHeader) page));
                 _bt_pageinit(page, BufferGetPageSize(buf));
         }
  
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c

index 14516d263e2ca5b91e237b7764653dafc6465e07..7f9853f2711ca750579e4d9159e1fa3d857278e9 100644 (file)
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -707,11 +707,35 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
         BlockNumber pages_deleted = 0;
         MemoryContext mycontext;
         MemoryContext oldcontext;
+       bool            needLock;
  
         Assert(stats != NULL);
  
+       /*
+        * First find out the number of pages in the index.  We must acquire
+        * the relation-extension lock while doing this to avoid a race
+        * condition: if someone else is extending the relation, there is
+        * a window where bufmgr/smgr have created a new all-zero page but
+        * it hasn't yet been write-locked by _bt_getbuf().  If we manage to
+        * scan such a page here, we'll improperly assume it can be recycled.
+        * Taking the lock synchronizes things enough to prevent a problem:
+        * either num_pages won't include the new page, or _bt_getbuf already
+        * has write lock on the buffer and it will be fully initialized before
+        * we can examine it.  (See also vacuumlazy.c, which has the same issue.)
+        *
+        * We can skip locking for new or temp relations,
+        * however, since no one else could be accessing them.
+        */
+       needLock = !(rel->rd_isnew || rel->rd_istemp);
+
+       if (needLock)
+               LockPage(rel, 0, ExclusiveLock);
+
         num_pages = RelationGetNumberOfBlocks(rel);
  
+       if (needLock)
+               UnlockPage(rel, 0, ExclusiveLock);
+
         /* No point in remembering more than MaxFSMPages pages */
         maxFreePages = MaxFSMPages;
         if ((BlockNumber) maxFreePages > num_pages)
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c

index ba2408bb3abffc2165b3c9d89a402973d161e00e..25d7e8e2da012ff87f98fe7ee58947ab01a2b42d 100644 (file)
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -254,8 +254,30 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
  
                 if (PageIsNew(page))
                 {
-                       /* Not sure we still need to handle this case, but... */
+                       /*
+                        * An all-zeroes page could be left over if a backend extends
+                        * the relation but crashes before initializing the page.
+                        * Reclaim such pages for use.
+                        *
+                        * We have to be careful here because we could be looking at
+                        * a page that someone has just added to the relation and not
+                        * yet been able to initialize (see RelationGetBufferForTuple).
+                        * To interlock against that, release the buffer read lock
+                        * (which we must do anyway) and grab the relation extension
+                        * lock before re-locking in exclusive mode.  If the page is
+                        * still uninitialized by then, it must be left over from a
+                        * crashed backend, and we can initialize it.
+                        *
+                        * We don't really need the relation lock when this is a new
+                        * or temp relation, but it's probably not worth the code space
+                        * to check that, since this surely isn't a critical path.
+                        *
+                        * Note: the comparable code in vacuum.c need not do all this
+                        * because it's got exclusive lock on the whole relation.
+                        */
                         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+                       LockPage(onerel, 0, ExclusiveLock);
+                       UnlockPage(onerel, 0, ExclusiveLock);
                         LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
                         if (PageIsNew(page))
                         {
author	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 7 May 2005 21:33:21 +0000 (21:33 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 7 May 2005 21:33:21 +0000 (21:33 +0000)
src/backend/access/heap/hio.c		patch \| blob \| blame \| history
src/backend/access/nbtree/nbtpage.c		patch \| blob \| blame \| history
src/backend/access/nbtree/nbtree.c		patch \| blob \| blame \| history
src/backend/commands/vacuumlazy.c		patch \| blob \| blame \| history