More hacking.
authorRobert Haas <rhaas@postgresql.org>
Thu, 27 Feb 2014 18:07:28 +0000 (13:07 -0500)
committerRobert Haas <rhaas@postgresql.org>
Thu, 27 Feb 2014 18:07:28 +0000 (13:07 -0500)
src/backend/utils/mmgr/freepage.c

index d2f4474dfaee561dd6cb6bcef519ee6d090b2f16..cdcea73c368e6778ad5d9f1332eff954fa1814a2 100644 (file)
@@ -84,6 +84,9 @@ typedef struct FreePageBtreeSearchResult
 /* Helper functions */
 static void FreePageBtreeAdjustAncestorKeys(FreePageManager *fpm,
                                        FreePageBtree *btp);
+static void FreePageBtreeCleanup(FreePageManager *fpm);
+static FreePageBtree *FreePageBtreeFindLeftSibling(char *base,
+                                                        FreePageBtree *btp);
 static FreePageBtree *FreePageBtreeFindRightSibling(char *base,
                                                          FreePageBtree *btp);
 static Size FreePageBtreeFirstKey(FreePageBtree *btp);
@@ -172,7 +175,7 @@ FreePageManagerGet(FreePageManager *fpm, Size npages, Size *first_page)
        if (lock != NULL)
                LWLockAcquire(lock, LW_EXCLUSIVE);
        result = FreePageManagerGetInternal(fpm, npages, first_page);
-       /* XXX. Try to softly PutInternal recycled pages? */
+       FreePageBtreeCleanup(fpm);
        if (lock != NULL)
                LWLockRelease(lock);
 
@@ -193,8 +196,7 @@ FreePageManagerPut(FreePageManager *fpm, Size first_page, Size npages)
                LWLockAcquire(lock, LW_EXCLUSIVE);
 
        FreePageManagerPutInternal(fpm, first_page, npages, false);
-
-       /* XXX. Try to softly PutInternal recycled pages? */
+       FreePageBtreeCleanup(fpm);
 
        /* Release lock (if there is one). */
        if (lock != NULL)
@@ -367,6 +369,74 @@ FreePageBtreeAdjustAncestorKeys(FreePageManager *fpm, FreePageBtree *btp)
        }
 }
 
+/*
+ * Attempt to reclaim space from the free-page btree.
+ */
+static void
+FreePageBtreeCleanup(FreePageManager *fpm)
+{
+       char *base = fpm_segment_base(fpm);
+
+       /* Attempt to shrink the depth of the btree. */
+       while (!relptr_is_null(fpm->btree_root))
+       {
+               FreePageBtree *root = relptr_access(base, fpm->btree_root);
+
+               /* Can't do anything if the root has multiple keys. */
+               if (root->hdr.nused > 1)
+                       break;
+
+               /* Root should never be empty. */
+               Assert(root->hdr.nused == 1);
+
+               /* Shrink depth of tree by one. */
+               Assert(fpm->btree_depth > 0);
+               --fpm->btree_depth;
+               if (root->hdr.magic == FREE_PAGE_LEAF_MAGIC)
+               {
+                       /* If root is a leaf, convert only entry to singleton range. */
+                       relptr_store(base, fpm->btree_root, (FreePageBtree *) NULL);
+                       fpm->singleton_first_page = root->u.leaf_key[0].first_page;
+                       fpm->singleton_npages = root->u.leaf_key[0].npages;
+               }
+               else
+               {
+                       FreePageBtree *newroot;
+
+                       /* If root is an internal page, make only child the root. */
+                       Assert(root->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
+                       relptr_copy(fpm->btree_root, root->u.internal_key[0].child);
+                       newroot = relptr_access(base, fpm->btree_root);
+                       relptr_store(base, newroot->hdr.parent, (FreePageBtree *) NULL);
+               }
+               FreePageBtreeRecycle(fpm, fpm_pointer_to_page(base, root));
+       }
+
+       /*
+        * Attempt to free recycled btree pages.  We skip this if releasing
+        * the recycled page would require a btree page split, because the page
+        * we're trying to recycle would be consumed by the split, which would
+        * be counterproductive.
+        *
+        * We also currently only ever attempt to recycle the first page on the
+        * list; that could be made more aggressive, but it's not clear that the
+        * complexity would be worthwhile.
+        */
+       while (fpm->btree_recycle_count > 0)
+       {
+               FreePageBtree *btp;
+               Size    first_page;
+
+               btp = FreePageBtreeGetRecycled(fpm);
+               first_page = fpm_pointer_to_page(base, btp);
+               if (!FreePageManagerPutInternal(fpm, first_page, 1, true))
+               {
+                       FreePageBtreeRecycle(fpm, first_page);
+                       break;
+               }
+       }
+}
+
 /*
  * Consider consolidating the given page with its left or right sibling,
  * if it's fairly empty.
@@ -411,11 +481,71 @@ FreePageBtreeConsolidate(FreePageManager *fpm, FreePageBtree *btp)
                                   sizeof(FreePageBtreeInternalKey) * np->hdr.nused);
                btp->hdr.nused += np->hdr.nused;
                FreePageBtreeRemovePage(fpm, np);
+               return;
        }
 
        /*
-        * XXX. Check whether we can merge with our left sibling.
+        * If we can fit our keys onto our left sibling's page, consolidate.
+        * In this case, we move our keys onto the other page rather than visca
+        * versa, to avoid having to adjust ancestor keys.
         */
+       np = FreePageBtreeFindLeftSibling(base, btp);
+       if (np != NULL && btp->hdr.nused + np->hdr.nused <= max)
+       {
+               if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
+                       memcpy(&np->u.leaf_key[np->hdr.nused], &btp->u.leaf_key[0],
+                                  sizeof(FreePageBtreeLeafKey) * btp->hdr.nused);
+               else
+                       memcpy(&np->u.internal_key[np->hdr.nused], &btp->u.internal_key[0],
+                                  sizeof(FreePageBtreeInternalKey) * btp->hdr.nused);
+               np->hdr.nused += btp->hdr.nused;
+               FreePageBtreeRemovePage(fpm, btp);
+               return;
+       }
+}
+
+/*
+ * Find the passed page's left sibling; that is, the page at the same level
+ * of the tree whose keyspace immediately precedes ours.
+ */
+static FreePageBtree *
+FreePageBtreeFindLeftSibling(char *base, FreePageBtree *btp)
+{
+       FreePageBtree *p = btp;
+       int             levels = 0;
+
+       /* Move up until we can move left. */
+       for (;;)
+       {
+               Size    first_page;
+               Size    index;
+
+               p = relptr_access(base, p->hdr.parent);
+
+               if (p == NULL)
+                       return NULL;            /* we were passed the rightmost page */
+
+               first_page = FreePageBtreeFirstKey(p);
+               index = FreePageBtreeSearchInternal(p, first_page);
+               if (index > 0)
+               {
+                       p = relptr_access(base, p->u.internal_key[index - 1].child);
+                       break;
+               }
+               Assert(index == 0);
+               ++levels;
+       }
+
+       /* Descend left. */
+       while (levels > 0)
+       {
+               Assert(p->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
+               p = relptr_access(base, p->u.internal_key[p->hdr.nused - 1].child);
+               --levels;
+       }
+       Assert(p->hdr.magic == btp->hdr.magic);
+
+       return p;
 }
 
 /*
@@ -1232,10 +1362,14 @@ FreePageManagerPutInternal(FreePageManager *fpm, Size first_page, Size npages,
        if (result.split_pages > 0)
        {
                /*
-                * XXX. Try any coping strategies we want to use to avoid a split,
-                * such as inserting to np if np != result.page, or shuffling
-                * keys between siblings.  If any of those strategies work, make
-                * sure to update result.split_pages, or just return.
+                * NB: We could consider various coping strategies here to avoid a
+                * split; most obviously, if np != result.page, we could target that
+                * page instead.   More complicated shuffling strategies could be
+                * possible as well; basically, unless every single leaf page is 100%
+                * full, we can jam this key in there if we try hard enough.  It's
+                * unlikely that trying that hard is worthwhile, but it's possible
+                * we might need to make more than no effort.  For now, we just do
+                * the easy thing, which is nothing.
                 */
 
                /* If this is a soft insert, it's time to give up. */