More hacking.
authorRobert Haas <rhaas@postgresql.org>
Wed, 19 Feb 2014 20:47:18 +0000 (15:47 -0500)
committerRobert Haas <rhaas@postgresql.org>
Wed, 19 Feb 2014 20:47:18 +0000 (15:47 -0500)
src/backend/utils/mmgr/freepage.c

index e6a30a1b8cbcae0cda6cd431b2a93055e422da10..63a46e05faabe2d0b4d6c06cd43daf8a252264d9 100644 (file)
@@ -83,11 +83,11 @@ typedef struct FreePageBtreeSearchResult
 } FreePageBtreeSearchResult;
 
 /* Helper functions */
+static void FreePageBtreeAdjustAncestorKeys(FreePageManager *fpm,
+                                       FreePageBtree *btp);
 static bool FreePageManagerGetInternal(FreePageManager *fpm, Size npages,
                                                   Size *first_page);
 static void FreePageBtreeRecycle(FreePageManager *fpm, Size pageno);
-static void FreePageBtreeReduceAncestorKeys(FreePageManager *fpm,
-                                       FreePageBtree *btp);
 static void FreePageBtreePageRemove(FreePageBtree *btp, Size index);
 static bool FreePageManagerPutInternal(FreePageManager *fpm, Size first_page,
                                                   Size npages, bool soft);
@@ -194,6 +194,86 @@ FreePageManagerPut(FreePageManager *fpm, Size first_page, Size npages)
                LWLockRelease(lock);
 }
 
+/*
+ * The first_page value stored it index zero in any non-root page must match
+ * the first_page value stored in its parent at the index which points to that
+ * page.  So when the value stored at index zero in a leaf page changes, we've
+ * got to walk up the tree adjusting ancestor keys until we reach an ancestor
+ * where that key isn't index zero.  This function should be called after
+ * updating the first key on the leaf page; it will propagate the change
+ * upward as far as needed.
+ *
+ * We assume here that the first key on the page has not changed enough to
+ * require changes in the ordering of keys on its ancestor pages.  Thus,
+ * if we search the parent page for the first key greater than or equal to
+ * the first key on the current page, the downlink to this page will be either
+ * the exact index returned by the search (if the first key decreased)
+ * or one less (if the first key increased).
+ */
+static void
+FreePageBtreeAdjustAncestorKeys(FreePageManager *fpm, FreePageBtree *btp)
+{
+       char *base = fpm_segment_base(fpm);
+       Size    first_page;
+       FreePageBtree *parent;
+       FreePageBtree *child;
+
+       Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
+       Assert(btp->hdr.nused > 0 && btp->hdr.nused <= FPM_ITEMS_PER_LEAF_PAGE);
+       first_page = btp->u.leaf_key[0].first_page;
+       child = btp;
+
+       for (;;)
+       {
+               Size    s;
+
+               parent = relptr_access(base, child->hdr.parent);
+               if (parent == NULL)
+                       break;
+               s = FreePageBtreeSearchInternal(parent, first_page);
+
+               /* Key is either at index s or index s-1; figure out which. */
+               if (s >= parent->hdr.nused)
+               {
+                       Assert(s == parent->hdr.nused);
+                       --s;
+               }
+               else
+               {
+                       FreePageBtree *check;
+
+                       check = relptr_access(base, parent->u.internal_key[s].child);
+                       if (check != child)
+                       {
+                               Assert(s > 0);
+                               --s;
+                       }
+               }
+
+#ifdef USE_ASSERT_CHECKING
+               /* Debugging double-check. */
+               if (assert_enabled)
+               {
+                       FreePageBtree *check;
+
+                       Assert(s < parent->hdr.nused);
+                       Assert(child == check);
+               }
+#endif
+
+               /* Update the parent key. */
+               parent->u.internal_key[s].first_page = first_page;
+
+               /*
+                * If this is the first key in the parent, go up another level;
+                * else done.
+                */
+               if (s > 0)
+                       break;
+               child = parent;
+       }
+}
+
 /*
  * Like FreePageManagerGet, this function allocates a run of pages of the
  * given length from the free page manager, but without taking and releasing
@@ -319,55 +399,6 @@ FreePageBtreeRecycle(FreePageManager *fpm, Size pageno)
        relptr_store(base, fpm->btree_recycle, span);
 }
 
-/*
- * When the first key on a leaf page is reduced, the first_page value stored
- * in the parent's key also needs to be reduced.  We assume here that the key
- * is not reduced to a value less than the first key of the next-lower leaf
- * page, since that would badly hose the btree.  If the parent's key is the
- * first one on the internal page that contains it, then we need to update
- * it's parent as well, and so on until we either reach the root of the btree
- * or reduce a key that's not the first one on the page.
- */
-static void
-FreePageBtreeReduceAncestorKeys(FreePageManager *fpm, FreePageBtree *btp)
-{
-       char *base = fpm_segment_base(fpm);
-       Size    first_page;
-       FreePageBtree *parent;
-       FreePageBtree *child;
-
-       Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
-       Assert(btp->hdr.nused > 0 && btp->hdr.nused <= FPM_ITEMS_PER_LEAF_PAGE);
-       first_page = btp->u.leaf_key[0].first_page;
-       child = btp;
-
-       for (;;)
-       {
-               Size    s;
-
-               parent = relptr_access(base, child->hdr.parent);
-               if (parent == NULL)
-                       break;
-               s = FreePageBtreeSearchInternal(parent, first_page);
-
-#ifdef USE_ASSERT_CHECKING
-               if (assert_enabled)
-               {
-                       FreePageBtree *check;
-
-                       Assert(s < parent->hdr.nused);
-                       check = relptr_access(base, parent->u.internal_key[s].child);
-                       Assert(child == check);
-               }
-#endif
-
-               parent->u.internal_key[s].first_page = first_page;
-               if (s > 0)
-                       break;
-               child = parent;
-       }
-}
-
 /*
  * Remove an item from the btree at the given position on the given page.
  */
@@ -705,7 +736,7 @@ FreePageManagerPutInternal(FreePageManager *fpm, Size first_page, Size npages,
 
                /* If reducing first key on page, ancestors might need adjustment. */
                if (result.index_next == 0)
-                       FreePageBtreeReduceAncestorKeys(fpm, result.page_next);
+                       FreePageBtreeAdjustAncestorKeys(fpm, result.page_next);
 
                return true;
        }
@@ -787,5 +818,5 @@ FreePageManagerPutInternal(FreePageManager *fpm, Size first_page, Size npages,
 
        /* If new first key on page, ancestors might need adjustment. */
        if (index == 0)
-               FreePageBtreeReduceAncestorKeys(fpm, result.page_next);
+               FreePageBtreeAdjustAncestorKeys(fpm, result.page_next);
 }