More hacking.
authorRobert Haas <rhaas@postgresql.org>
Thu, 27 Feb 2014 16:41:54 +0000 (11:41 -0500)
committerRobert Haas <rhaas@postgresql.org>
Thu, 27 Feb 2014 16:41:54 +0000 (11:41 -0500)
src/backend/utils/mmgr/freepage.c

index a891a039da6ccd64135525e706d7b34657a46c3f..d2f4474dfaee561dd6cb6bcef519ee6d090b2f16 100644 (file)
@@ -95,6 +95,7 @@ static void FreePageBtreeInsertLeaf(FreePageBtree *btp, Size index,
 static void FreePageBtreeRecycle(FreePageManager *fpm, Size pageno);
 static void FreePageBtreeRemove(FreePageManager *fpm, FreePageBtree *btp,
                                        Size index);
+static void FreePageBtreeRemovePage(FreePageManager *fpm, FreePageBtree *btp);
 static void FreePageBtreeSearch(FreePageManager *fpm, Size first_page,
                                        FreePageBtreeSearchResult *result);
 static Size FreePageBtreeSearchInternal(FreePageBtree *btp, Size first_page);
@@ -244,7 +245,7 @@ FreePageManagerDump(FreePageManager *fpm)
        recycle = relptr_access(base, fpm->btree_recycle);
        if (recycle != NULL)
        {
-               appendStringInfo(&buf, "btree recycle: ");
+               appendStringInfo(&buf, "btree recycle:");
                FreePageManagerDumpSpans(fpm, recycle, 1, &buf);
        }
 
@@ -367,12 +368,65 @@ FreePageBtreeAdjustAncestorKeys(FreePageManager *fpm, FreePageBtree *btp)
 }
 
 /*
- * Find the leaf page which follows the passed page in key order.
+ * Consider consolidating the given page with its left or right sibling,
+ * if it's fairly empty.
+ */
+static void
+FreePageBtreeConsolidate(FreePageManager *fpm, FreePageBtree *btp)
+{
+       char *base = fpm_segment_base(fpm);
+       FreePageBtree *np;
+       Size    max;
+
+       /*
+        * We only try to consolidate pages that are less than a third full.
+        * We could be more aggressive about this, but that might risk performing
+        * consolidation only to end up splitting again shortly thereafter.  Since
+        * the btree should be very small compared to the space under management,
+        * our goal isn't so much to ensure that it always occupies the absolutely
+        * smallest possible number of pages as to reclaim pages before things get
+        * too egregiously out of hand.
+        */
+       if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
+               max = FPM_ITEMS_PER_LEAF_PAGE;
+       else
+       {
+               Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
+               max = FPM_ITEMS_PER_INTERNAL_PAGE;
+       }
+       if (btp->hdr.nused >= max / 3)
+               return;
+
+       /*
+        * If we can fit our right sibling's keys onto this page, consolidate.
+        */
+       np = FreePageBtreeFindRightSibling(base, btp);
+       if (np != NULL && btp->hdr.nused + np->hdr.nused <= max)
+       {
+               if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
+                       memcpy(&btp->u.leaf_key[btp->hdr.nused], &np->u.leaf_key[0],
+                                  sizeof(FreePageBtreeLeafKey) * np->hdr.nused);
+               else
+                       memcpy(&btp->u.internal_key[btp->hdr.nused], &np->u.internal_key[0],
+                                  sizeof(FreePageBtreeInternalKey) * np->hdr.nused);
+               btp->hdr.nused += np->hdr.nused;
+               FreePageBtreeRemovePage(fpm, np);
+       }
+
+       /*
+        * XXX. Check whether we can merge with our left sibling.
+        */
+}
+
+/*
+ * Find the passed page's right sibling; that is, the page at the same level
+ * of the tree whose keyspace immediately follows ours.
  */
 static FreePageBtree *
 FreePageBtreeFindRightSibling(char *base, FreePageBtree *btp)
 {
-       Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
+       FreePageBtree *p = btp;
+       int             levels = 0;
 
        /* Move up until we can move right. */
        for (;;)
@@ -380,29 +434,32 @@ FreePageBtreeFindRightSibling(char *base, FreePageBtree *btp)
                Size    first_page;
                Size    index;
 
-               if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
-                       first_page = btp->u.leaf_key[0].first_page;
-               else
-                       first_page = btp->u.internal_key[0].first_page;
-               btp = relptr_access(base, btp->hdr.parent);
+               p = relptr_access(base, p->hdr.parent);
 
-               if (btp == NULL)
+               if (p == NULL)
                        return NULL;            /* we were passed the rightmost page */
 
-               index = FreePageBtreeSearchInternal(btp, first_page);
-               if (index < btp->hdr.nused - 1)
+               first_page = FreePageBtreeFirstKey(p);
+               index = FreePageBtreeSearchInternal(p, first_page);
+               if (index < p->hdr.nused - 1)
                {
-                       btp = relptr_access(base, btp->u.internal_key[index + 1].child);
+                       p = relptr_access(base, p->u.internal_key[index + 1].child);
                        break;
                }
-               Assert(index == btp->hdr.nused - 1);
+               Assert(index == p->hdr.nused - 1);
+               ++levels;
        }
 
        /* Descend left. */
-       while (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
-               btp = relptr_access(base, btp->u.internal_key[0].child);
+       while (levels > 0)
+       {
+               Assert(p->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
+               p = relptr_access(base, p->u.internal_key[0].child);
+               --levels;
+       }
+       Assert(p->hdr.magic == btp->hdr.magic);
 
-       return btp;
+       return p;
 }
 
 /*
@@ -411,6 +468,8 @@ FreePageBtreeFindRightSibling(char *base, FreePageBtree *btp)
 static Size
 FreePageBtreeFirstKey(FreePageBtree *btp)
 {
+       Assert(btp->hdr.nused > 0);
+
        if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
                return btp->u.leaf_key[0].first_page;
        else
@@ -501,87 +560,101 @@ FreePageBtreeRecycle(FreePageManager *fpm, Size pageno)
 static void
 FreePageBtreeRemove(FreePageManager *fpm, FreePageBtree *btp, Size index)
 {
-       char *base = fpm_segment_base(fpm);
-       FreePageBtree *parent;
-       FreePageBtree *child;
-       Size    first_page;
-
        Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
        Assert(index < btp->hdr.nused);
 
-       /* 
-        * If there's more than one key remaining on the page, then things are
-        * pretty simple.  We need to physically remove the key from the page;
-        * and if it's the first key on the page, then we need to adjust its
-        * ancestor keys.  Then we're done.
-        */
-       if (btp->hdr.nused > 1)
+       /* When last item is removed, extirpate entire page from btree. */
+       if (btp->hdr.nused == 1)
        {
-               --btp->hdr.nused;
-               if (index < btp->hdr.nused)
-                       memmove(&btp->u.leaf_key[index], &btp->u.leaf_key[index + 1],
-                                       sizeof(FreePageBtreeLeafKey) * (btp->hdr.nused - index));
-               if (index == 0)
-                       FreePageBtreeAdjustAncestorKeys(fpm, btp);
-
-               /*
-                * XXX. We could try to consolidate the page with its left or right
-                * sibling, or some more complicated key redistribution scheme, to
-                * avoid ending up with lots of mostly-empty btree pages.
-                */
-
+               FreePageBtreeRemovePage(fpm, btp);
                return;
        }
 
-       /*
-        * We're removing the last key on the leaf page; this will require
-        * removing the downlink from the parent, which may in turn cause the
-        * parent to become empty.  We work our way up the tree until we reach
-        * a point where removing the key doesn't leave the tree empty, or until
-        * we reach the root.
-        */
-       first_page = btp->u.leaf_key[index].first_page;
-       child = btp;
+       /* Physically remove the key from the page. */
+       --btp->hdr.nused;
+       if (index < btp->hdr.nused)
+               memmove(&btp->u.leaf_key[index], &btp->u.leaf_key[index + 1],
+                               sizeof(FreePageBtreeLeafKey) * (btp->hdr.nused - index));
+
+       /* If we just removed the first key, adjust ancestor keys. */
+       if (index == 0)
+               FreePageBtreeAdjustAncestorKeys(fpm, btp);
+
+       /* Consider whether to consolidate this page with a sibling. */
+       FreePageBtreeConsolidate(fpm, btp);
+}
+
+/*
+ * Remove a page from the btree.  Caller is responsible for having relocated
+ * any keys from this page that are still wanted.  The page is placed on the
+ * recycled list.
+ */
+static void
+FreePageBtreeRemovePage(FreePageManager *fpm, FreePageBtree *btp)
+{
+       char *base = fpm_segment_base(fpm);
+       FreePageBtree *parent;
+
        for (;;)
        {
-               /* Find parent page. */
-               parent = relptr_access(base, child->hdr.parent);
+               Size    index;
+               Size    first_page;
 
-               /* Recycle child page. */
-               FreePageBtreeRecycle(fpm, fpm_pointer_to_page(base, child));
+               /* Find parent page. */
+               parent = relptr_access(base, btp->hdr.parent);
+               if (parent == NULL)
+               {
+                       /* We are removing the root page. */
+                       relptr_store(base, fpm->btree_root, (FreePageBtree *) NULL);
+                       fpm->btree_depth = 0;
+                       Assert(fpm->singleton_first_page == 0);
+                       Assert(fpm->singleton_npages == 0);
+                       return;
+               }
 
-               /* Stop if we don't need to remove this page, or it's the root. */
-               if (parent == NULL || parent->hdr.nused > 1)
-                       break;
+               /* Find and remove the downlink. */
+               first_page = FreePageBtreeFirstKey(btp);
+               if (parent->hdr.magic == FREE_PAGE_LEAF_MAGIC)
+               {
+                       index = FreePageBtreeSearchLeaf(parent, first_page);
+                       Assert(index < parent->hdr.nused);
+                       if (index < parent->hdr.nused - 1)
+                               memmove(&parent->u.leaf_key[index],
+                                               &parent->u.leaf_key[index + 1],
+                                               sizeof(FreePageBtreeLeafKey)
+                                                       * (parent->hdr.nused - index - 1));
+               }
+               else
+               {
+                       index = FreePageBtreeSearchInternal(parent, first_page);
+                       Assert(index < parent->hdr.nused);
+                       if (index < parent->hdr.nused - 1)
+                               memmove(&parent->u.internal_key[index],
+                                               &parent->u.internal_key[index + 1],
+                                               sizeof(FreePageBtreeInternalKey)
+                                                       * (btp->hdr.nused - index - 1));
+               }
+               parent->hdr.nused--;
 
-               /* Prepare to loop around again. */
-               child = parent;
-       }
+               /* Recycle target page. */
+               FreePageBtreeRecycle(fpm, fpm_pointer_to_page(base, btp));
 
-       /* Handle the case where we've just recycled the root. */
-       if (parent == NULL)
-       {
-               relptr_store(base, fpm->btree_root, (FreePageBtree *) NULL);
-               fpm->btree_depth = 0;
-               Assert(fpm->singleton_first_page == 0);
-               Assert(fpm->singleton_npages == 0);
-               return;
+               /*
+                * If the parent has become empty in turn, we must loop around and
+                * remove it in turn.  If not, we can break out of the loop, but must
+                * adjust ancestor keys if we updated the first key on the page.
+                */
+               if (parent->hdr.nused > 0)
+               {
+                       if (index == 0)
+                               FreePageBtreeAdjustAncestorKeys(fpm, parent);
+                       break;
+               }
+               btp = parent;
        }
 
-       /*
-        * We've reached an internal page containing more than one key. Remove
-        * the downlink, and adjust ancestor keys as needed.
-        */
-       Assert(parent->hdr.nused > 1);
-       Assert(parent->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
-       index = FreePageBtreeSearchInternal(parent, first_page);
-       Assert(parent->u.internal_key[index].first_page == first_page);
-       --parent->hdr.nused;
-       if (index < parent->hdr.nused)
-               memmove(&btp->u.internal_key[index], &btp->u.internal_key[index + 1],
-                               sizeof(FreePageBtreeInternalKey) * (btp->hdr.nused - index));
-       if (index == 0)
-               FreePageBtreeAdjustAncestorKeys(fpm, parent);
+       /* Consider whether to consolidate the parent with a sibings. */
+       FreePageBtreeConsolidate(fpm, parent);
 }
 
 /*