Widen MultiXactOffset to 64 bits

author Heikki Linnakangas <heikki.linnakangas@iki.fi>

Tue, 9 Dec 2025 11:53:03 +0000 (13:53 +0200)

committer Heikki Linnakangas <heikki.linnakangas@iki.fi>

Tue, 9 Dec 2025 11:53:03 +0000 (13:53 +0200)
author Heikki Linnakangas <heikki.linnakangas@iki.fi>
Tue, 9 Dec 2025 11:53:03 +0000 (13:53 +0200)
committer Heikki Linnakangas <heikki.linnakangas@iki.fi>
Tue, 9 Dec 2025 11:53:03 +0000 (13:53 +0200)
diff --git a/doc/src/sgml/ref/pg_resetwal.sgml b/doc/src/sgml/ref/pg_resetwal.sgml

index 2c019c2aac6eb9170003a4fa082cefc09864cb7e..41f2b1d480c513ea30efcf9f38c25964cefd5c7e 100644 (file)
--- a/doc/src/sgml/ref/pg_resetwal.sgml
+++ b/doc/src/sgml/ref/pg_resetwal.sgml
@@ -267,14 +267,17 @@ PostgreSQL documentation
        A safe value for the next multitransaction ID (first part) can be
        determined by looking for the numerically largest file name in the
        directory <filename>pg_multixact/offsets</filename> under the data directory,
-      adding one, and then multiplying by 65536 (0x10000).  Conversely, a safe
+      adding one, and then multiplying by 32768 (0x8000).  Conversely, a safe
        value for the oldest multitransaction ID (second part of
        <option>-m</option>) can be determined by looking for the numerically smallest
-      file name in the same directory and multiplying by 65536.  The file
-      names are in hexadecimal, so the easiest way to do this is to specify
-      the option value in hexadecimal and append four zeroes.
+      file name in the same directory and multiplying by 32768 (0x8000).
+      Note that the file names are in hexadecimal.  It is usually easiest
+      to specify the option value in hexadecimal too.  For example, if
+      <filename>000F</filename> and <filename>0007</filename> are the greatest and
+      smallest entries in <filename>pg_multixact/offsets</filename>,
+      <literal>-m 0x80000,0x38000</literal> will work.
       </para>
-     <!-- 65536 = SLRU_PAGES_PER_SEGMENT * BLCKSZ / sizeof(MultiXactOffset) -->
+     <!-- 32768 = SLRU_PAGES_PER_SEGMENT * BLCKSZ / sizeof(MultiXactOffset) -->
      </listitem>
     </varlistentry>
  
diff --git a/src/backend/access/rmgrdesc/mxactdesc.c b/src/backend/access/rmgrdesc/mxactdesc.c

index 3ca0582db3647787e9cf69f1c6ae8afccbd1084f..052dd0a4ce56d490e86eeed8b15e046343a77157 100644 (file)
--- a/src/backend/access/rmgrdesc/mxactdesc.c
+++ b/src/backend/access/rmgrdesc/mxactdesc.c
@@ -65,7 +65,7 @@ multixact_desc(StringInfo buf, XLogReaderState *record)
         xl_multixact_create *xlrec = (xl_multixact_create *) rec;
         int         i;
  
-       appendStringInfo(buf, "%u offset %u nmembers %d: ", xlrec->mid,
+       appendStringInfo(buf, "%u offset %" PRIu64 " nmembers %d: ", xlrec->mid,
                          xlrec->moff, xlrec->nmembers);
         for (i = 0; i < xlrec->nmembers; i++)
             out_member(buf, &xlrec->members[i]);
@@ -74,7 +74,7 @@ multixact_desc(StringInfo buf, XLogReaderState *record)
     {
         xl_multixact_truncate *xlrec = (xl_multixact_truncate *) rec;
  
-       appendStringInfo(buf, "offsets [%u, %u), members [%u, %u)",
+       appendStringInfo(buf, "offsets [%u, %u), members [%" PRIu64 ", %" PRIu64 ")",
                          xlrec->startTruncOff, xlrec->endTruncOff,
                          xlrec->startTruncMemb, xlrec->endTruncMemb);
     }
diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c

index cd6c2a2f650a68f467e704ca88e8b719a0f9e70c..441034f5929cf1de544c07686a0512d6b3cb4eac 100644 (file)
--- a/src/backend/access/rmgrdesc/xlogdesc.c
+++ b/src/backend/access/rmgrdesc/xlogdesc.c
@@ -66,7 +66,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
         CheckPoint *checkpoint = (CheckPoint *) rec;
  
         appendStringInfo(buf, "redo %X/%08X; "
-                        "tli %u; prev tli %u; fpw %s; wal_level %s; xid %u:%u; oid %u; multi %u; offset %u; "
+                        "tli %u; prev tli %u; fpw %s; wal_level %s; xid %u:%u; oid %u; multi %u; offset %" PRIu64 "; "
                          "oldest xid %u in DB %u; oldest multi %u in DB %u; "
                          "oldest/newest commit timestamp xid: %u/%u; "
                          "oldest running xid %u; %s",
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c

index 14d46fb761b4e9cfc490cfa66720111c8e3a5183..72a4e50852a4b85642680a82618465297605b9bb 100644 (file)
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -89,10 +89,14 @@
  #include "utils/memutils.h"
  
  
-/* Multixact members wraparound thresholds. */
-#define MULTIXACT_MEMBER_SAFE_THRESHOLD        (MaxMultiXactOffset / 2)
-#define MULTIXACT_MEMBER_DANGER_THRESHOLD  \
-   (MaxMultiXactOffset - MaxMultiXactOffset / 4)
+/*
+ * Thresholds used to keep members disk usage in check when multixids have a
+ * lot of members.  When MULTIXACT_MEMBER_LOW_THRESHOLD is reached, vacuum
+ * starts freezing multixids more aggressively, even if the normal multixid
+ * age limits haven't been reached yet.
+ */
+#define MULTIXACT_MEMBER_LOW_THRESHOLD     UINT64CONST(2000000000)
+#define MULTIXACT_MEMBER_HIGH_THRESHOLD        UINT64CONST(4000000000)
  
  static inline MultiXactId
  PreviousMultiXactId(MultiXactId multi)
@@ -137,11 +141,9 @@ typedef struct MultiXactStateData
  
     /*
      * Oldest multixact offset that is potentially referenced by a multixact
-    * referenced by a relation.  We don't always know this value, so there's
-    * a flag here to indicate whether or not we currently do.
+    * referenced by a relation.
      */
     MultiXactOffset oldestOffset;
-   bool        oldestOffsetKnown;
  
     /* support for anti-wraparound measures */
     MultiXactId multiVacLimit;
@@ -149,9 +151,6 @@ typedef struct MultiXactStateData
     MultiXactId multiStopLimit;
     MultiXactId multiWrapLimit;
  
-   /* support for members anti-wraparound measures */
-   MultiXactOffset offsetStopLimit;    /* known if oldestOffsetKnown */
-
     /*
      * Per-backend data starts here.  We have two arrays stored in the area
      * immediately following the MultiXactStateData struct. Each is indexed by
@@ -272,13 +271,9 @@ static void mXactCachePut(MultiXactId multi, int nmembers,
  /* management of SLRU infrastructure */
  static bool MultiXactOffsetPagePrecedes(int64 page1, int64 page2);
  static bool MultiXactMemberPagePrecedes(int64 page1, int64 page2);
-static bool MultiXactOffsetPrecedes(MultiXactOffset offset1,
-                                   MultiXactOffset offset2);
  static void ExtendMultiXactOffset(MultiXactId multi);
  static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers);
-static bool MultiXactOffsetWouldWrap(MultiXactOffset boundary,
-                                    MultiXactOffset start, uint32 distance);
-static bool SetOffsetVacuumLimit(bool is_startup);
+static void SetOldestOffset(void);
  static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result);
  static void WriteMTruncateXlogRec(Oid oldestMultiDB,
                                   MultiXactId startTruncOff,
@@ -1073,90 +1068,22 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
     ExtendMultiXactOffset(result + 1);
  
     /*
-    * Reserve the members space, similarly to above.  Also, be careful not to
-    * return zero as the starting offset for any multixact. See
-    * GetMultiXactIdMembers() for motivation.
+    * Reserve the members space, similarly to above.
      */
     nextOffset = MultiXactState->nextOffset;
-   if (nextOffset == 0)
-   {
-       *offset = 1;
-       nmembers++;             /* allocate member slot 0 too */
-   }
-   else
-       *offset = nextOffset;
-
-   /*----------
-    * Protect against overrun of the members space as well, with the
-    * following rules:
-    *
-    * If we're past offsetStopLimit, refuse to generate more multis.
-    * If we're close to offsetStopLimit, emit a warning.
-    *
-    * Arbitrarily, we start emitting warnings when we're 20 segments or less
-    * from offsetStopLimit.
-    *
-    * Note we haven't updated the shared state yet, so if we fail at this
-    * point, the multixact ID we grabbed can still be used by the next guy.
-    *
-    * Note that there is no point in forcing autovacuum runs here: the
-    * multixact freeze settings would have to be reduced for that to have any
-    * effect.
-    *----------
-    */
-#define OFFSET_WARN_SEGMENTS   20
-   if (MultiXactState->oldestOffsetKnown &&
-       MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, nextOffset,
-                                nmembers))
-   {
-       /* see comment in the corresponding offsets wraparound case */
-       SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
-
-       ereport(ERROR,
-               (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-                errmsg("multixact \"members\" limit exceeded"),
-                errdetail_plural("This command would create a multixact with %u members, but the remaining space is only enough for %u member.",
-                                 "This command would create a multixact with %u members, but the remaining space is only enough for %u members.",
-                                 MultiXactState->offsetStopLimit - nextOffset - 1,
-                                 nmembers,
-                                 MultiXactState->offsetStopLimit - nextOffset - 1),
-                errhint("Execute a database-wide VACUUM in database with OID %u with reduced \"vacuum_multixact_freeze_min_age\" and \"vacuum_multixact_freeze_table_age\" settings.",
-                        MultiXactState->oldestMultiXactDB)));
-   }
  
     /*
-    * Check whether we should kick autovacuum into action, to prevent members
-    * wraparound. NB we use a much larger window to trigger autovacuum than
-    * just the warning limit. The warning is just a measure of last resort -
-    * this is in line with GetNewTransactionId's behaviour.
+    * Offsets are 64-bit integers and will never wrap around.  Firstly, it
+    * would take an unrealistic amount of time and resources to consume 2^64
+    * offsets.  Secondly, multixid creation is WAL-logged, so you would run
+    * out of LSNs before reaching offset wraparound.  Nevertheless, check for
+    * wraparound as a sanity check.
      */
-   if (!MultiXactState->oldestOffsetKnown ||
-       (MultiXactState->nextOffset - MultiXactState->oldestOffset
-        > MULTIXACT_MEMBER_SAFE_THRESHOLD))
-   {
-       /*
-        * To avoid swamping the postmaster with signals, we issue the autovac
-        * request only when crossing a segment boundary. With default
-        * compilation settings that's roughly after 50k members.  This still
-        * gives plenty of chances before we get into real trouble.
-        */
-       if ((MXOffsetToMemberPage(nextOffset) / SLRU_PAGES_PER_SEGMENT) !=
-           (MXOffsetToMemberPage(nextOffset + nmembers) / SLRU_PAGES_PER_SEGMENT))
-           SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
-   }
-
-   if (MultiXactState->oldestOffsetKnown &&
-       MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit,
-                                nextOffset,
-                                nmembers + MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT * OFFSET_WARN_SEGMENTS))
-       ereport(WARNING,
+   if (nextOffset + nmembers < nextOffset)
+       ereport(ERROR,
                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-                errmsg_plural("database with OID %u must be vacuumed before %d more multixact member is used",
-                              "database with OID %u must be vacuumed before %d more multixact members are used",
-                              MultiXactState->offsetStopLimit - nextOffset + nmembers,
-                              MultiXactState->oldestMultiXactDB,
-                              MultiXactState->offsetStopLimit - nextOffset + nmembers),
-                errhint("Execute a database-wide VACUUM in that database with reduced \"vacuum_multixact_freeze_min_age\" and \"vacuum_multixact_freeze_table_age\" settings.")));
+                errmsg("MultiXact members would wrap around")));
+   *offset = nextOffset;
  
     ExtendMultiXactMember(nextOffset, nmembers);
  
@@ -1177,8 +1104,7 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
      * the next iteration.  But note that nextMXact may be InvalidMultiXactId
      * or the first value on a segment-beginning page after this routine
      * exits, so anyone else looking at the variable must be prepared to deal
-    * with either case.  Similarly, nextOffset may be zero, but we won't use
-    * that as the actual start offset of the next multixact.
+    * with either case.
      */
     (MultiXactState->nextMXact)++;
  
@@ -1186,7 +1112,8 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
  
     LWLockRelease(MultiXactGenLock);
  
-   debug_elog4(DEBUG2, "GetNew: returning %u offset %u", result, *offset);
+   debug_elog4(DEBUG2, "GetNew: returning %u offset %" PRIu64,
+               result, *offset);
     return result;
  }
  
@@ -1228,7 +1155,6 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
     MultiXactOffset *offptr;
     MultiXactOffset offset;
     int         length;
-   int         truelength;
     MultiXactId oldestMXact;
     MultiXactId nextMXact;
     MultiXactMember *ptr;
@@ -1304,16 +1230,7 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
      * Find out the offset at which we need to start reading MultiXactMembers
      * and the number of members in the multixact.  We determine the latter as
      * the difference between this multixact's starting offset and the next
-    * one's.  However, there is one corner case to worry about:
-    *
-    * Because GetNewMultiXactId skips over offset zero, to reserve zero for
-    * to mean "unset", there is an ambiguity near the point of offset
-    * wraparound.  If we see next multixact's offset is one, is that our
-    * multixact's actual endpoint, or did it end at zero with a subsequent
-    * increment?  We handle this using the knowledge that if the zero'th
-    * member slot wasn't filled, it'll contain zero, and zero isn't a valid
-    * transaction ID so it can't be a multixact member.  Therefore, if we
-    * read a zero from the members array, just ignore it.
+    * one's.
      */
     pageno = MultiXactIdToOffsetPage(multi);
     entryno = MultiXactIdToOffsetEntry(multi);
@@ -1380,10 +1297,11 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
     LWLockRelease(lock);
     lock = NULL;
  
+   /* A multixid with zero members should not happen */
+   Assert(length > 0);
+
     /* read the members */
     ptr = (MultiXactMember *) palloc(length * sizeof(MultiXactMember));
-
-   truelength = 0;
     prev_pageno = -1;
     for (int i = 0; i < length; i++, offset++)
     {
@@ -1420,37 +1338,27 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
  
         xactptr = (TransactionId *)
             (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
-
-       if (!TransactionIdIsValid(*xactptr))
-       {
-           /* Corner case: we must be looking at unused slot zero */
-           Assert(offset == 0);
-           continue;
-       }
+       Assert(TransactionIdIsValid(*xactptr));
  
         flagsoff = MXOffsetToFlagsOffset(offset);
         bshift = MXOffsetToFlagsBitShift(offset);
         flagsptr = (uint32 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
  
-       ptr[truelength].xid = *xactptr;
-       ptr[truelength].status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK;
-       truelength++;
+       ptr[i].xid = *xactptr;
+       ptr[i].status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK;
     }
  
     LWLockRelease(lock);
  
-   /* A multixid with zero members should not happen */
-   Assert(truelength > 0);
-
     /*
      * Copy the result into the local cache.
      */
-   mXactCachePut(multi, truelength, ptr);
+   mXactCachePut(multi, length, ptr);
  
     debug_elog3(DEBUG2, "GetMembers: no cache for %s",
-               mxid_to_string(multi, truelength, ptr));
+               mxid_to_string(multi, length, ptr));
     *members = ptr;
-   return truelength;
+   return length;
  }
  
  /*
@@ -1857,7 +1765,7 @@ MultiXactShmemInit(void)
                   "pg_multixact/members", LWTRANCHE_MULTIXACTMEMBER_BUFFER,
                   LWTRANCHE_MULTIXACTMEMBER_SLRU,
                   SYNC_HANDLER_MULTIXACT_MEMBER,
-                 false);
+                 true);
     /* doesn't call SimpleLruTruncate() or meet criteria for unit tests */
  
     /* Initialize our shared state struct */
@@ -1912,48 +1820,6 @@ BootStrapMultiXact(void)
     SimpleLruZeroAndWritePage(MultiXactMemberCtl, 0);
  }
  
-/*
- * MaybeExtendOffsetSlru
- *     Extend the offsets SLRU area, if necessary
- *
- * After a binary upgrade from <= 9.2, the pg_multixact/offsets SLRU area might
- * contain files that are shorter than necessary; this would occur if the old
- * installation had used multixacts beyond the first page (files cannot be
- * copied, because the on-disk representation is different).  pg_upgrade would
- * update pg_control to set the next offset value to be at that position, so
- * that tuples marked as locked by such MultiXacts would be seen as visible
- * without having to consult multixact.  However, trying to create and use a
- * new MultiXactId would result in an error because the page on which the new
- * value would reside does not exist.  This routine is in charge of creating
- * such pages.
- */
-static void
-MaybeExtendOffsetSlru(void)
-{
-   int64       pageno;
-   LWLock     *lock;
-
-   pageno = MultiXactIdToOffsetPage(MultiXactState->nextMXact);
-   lock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
-
-   LWLockAcquire(lock, LW_EXCLUSIVE);
-
-   if (!SimpleLruDoesPhysicalPageExist(MultiXactOffsetCtl, pageno))
-   {
-       int         slotno;
-
-       /*
-        * Fortunately for us, SimpleLruWritePage is already prepared to deal
-        * with creating a new segment file even if the page we're writing is
-        * not the first in it, so this is enough.
-        */
-       slotno = SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
-       SimpleLruWritePage(MultiXactOffsetCtl, slotno);
-   }
-
-   LWLockRelease(lock);
-}
-
  /*
   * This must be called ONCE during postmaster or standalone-backend startup.
   *
@@ -2092,8 +1958,8 @@ TrimMultiXact(void)
     MultiXactState->finishedStartup = true;
     LWLockRelease(MultiXactGenLock);
  
-   /* Now compute how far away the next members wraparound is. */
-   SetMultiXactIdLimit(oldestMXact, oldestMXactDB, true);
+   /* Now compute how far away the next multixid wraparound is. */
+   SetMultiXactIdLimit(oldestMXact, oldestMXactDB);
  }
  
  /*
@@ -2114,7 +1980,7 @@ MultiXactGetCheckptMulti(bool is_shutdown,
     LWLockRelease(MultiXactGenLock);
  
     debug_elog6(DEBUG2,
-               "MultiXact: checkpoint is nextMulti %u, nextOffset %u, oldestMulti %u in DB %u",
+               "MultiXact: checkpoint is nextMulti %u, nextOffset %" PRIu64 ", oldestMulti %u in DB %u",
                 *nextMulti, *nextMultiOffset, *oldestMulti, *oldestMultiDB);
  }
  
@@ -2149,26 +2015,12 @@ void
  MultiXactSetNextMXact(MultiXactId nextMulti,
                       MultiXactOffset nextMultiOffset)
  {
-   debug_elog4(DEBUG2, "MultiXact: setting next multi to %u offset %u",
+   debug_elog4(DEBUG2, "MultiXact: setting next multi to %u offset %" PRIu64,
                 nextMulti, nextMultiOffset);
     LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
     MultiXactState->nextMXact = nextMulti;
     MultiXactState->nextOffset = nextMultiOffset;
     LWLockRelease(MultiXactGenLock);
-
-   /*
-    * During a binary upgrade, make sure that the offsets SLRU is large
-    * enough to contain the next value that would be created.
-    *
-    * We need to do this pretty early during the first startup in binary
-    * upgrade mode: before StartupMultiXact() in fact, because this routine
-    * is called even before that by StartupXLOG().  And we can't do it
-    * earlier than at this point, because during that first call of this
-    * routine we determine the MultiXactState->nextMXact value that
-    * MaybeExtendOffsetSlru needs.
-    */
-   if (IsBinaryUpgrade)
-       MaybeExtendOffsetSlru();
  }
  
  /*
@@ -2176,28 +2028,24 @@ MultiXactSetNextMXact(MultiXactId nextMulti,
   * datminmxid (ie, the oldest MultiXactId that might exist in any database
   * of our cluster), and the OID of the (or a) database with that value.
   *
- * is_startup is true when we are just starting the cluster, false when we
- * are updating state in a running cluster.  This only affects log messages.
+ * This also updates MultiXactState->oldestOffset, by looking up the offset of
+ * MultiXactState->oldestMultiXactId.
   */
  void
-SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid,
-                   bool is_startup)
+SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
  {
     MultiXactId multiVacLimit;
     MultiXactId multiWarnLimit;
     MultiXactId multiStopLimit;
     MultiXactId multiWrapLimit;
     MultiXactId curMulti;
-   bool        needs_offset_vacuum;
  
     Assert(MultiXactIdIsValid(oldest_datminmxid));
  
     /*
      * We pretend that a wrap will happen halfway through the multixact ID
      * space, but that's not really true, because multixacts wrap differently
-    * from transaction IDs.  Note that, separately from any concern about
-    * multixact IDs wrapping, we must ensure that multixact members do not
-    * wrap.  Limits for that are set in SetOffsetVacuumLimit, not here.
+    * from transaction IDs.
      */
     multiWrapLimit = oldest_datminmxid + (MaxMultiXactId >> 1);
     if (multiWrapLimit < FirstMultiXactId)
@@ -2265,8 +2113,14 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid,
  
     Assert(!InRecovery);
  
-   /* Set limits for offset vacuum. */
-   needs_offset_vacuum = SetOffsetVacuumLimit(is_startup);
+   /*
+    * Offsets are 64-bits wide and never wrap around, so we don't need to
+    * consider them for emergency autovacuum purposes.  But now that we're in
+    * a consistent state, determine MultiXactState->oldestOffset.  It will be
+    * used to adjust the freezing cutoff, to keep the offsets disk usage in
+    * check.
+    */
+   SetOldestOffset();
  
     /*
      * If past the autovacuum force point, immediately signal an autovac
@@ -2275,8 +2129,7 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid,
      * database, it'll call here, and we'll signal the postmaster to start
      * another iteration immediately if there are still any old databases.
      */
-   if ((MultiXactIdPrecedes(multiVacLimit, curMulti) ||
-        needs_offset_vacuum) && IsUnderPostmaster)
+   if (MultiXactIdPrecedes(multiVacLimit, curMulti) && IsUnderPostmaster)
         SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
  
     /* Give an immediate warning if past the wrap warn point */
@@ -2338,9 +2191,9 @@ MultiXactAdvanceNextMXact(MultiXactId minMulti,
         debug_elog3(DEBUG2, "MultiXact: setting next multi to %u", minMulti);
         MultiXactState->nextMXact = minMulti;
     }
-   if (MultiXactOffsetPrecedes(MultiXactState->nextOffset, minMultiOffset))
+   if (MultiXactState->nextOffset < minMultiOffset)
     {
-       debug_elog3(DEBUG2, "MultiXact: setting next offset to %u",
+       debug_elog3(DEBUG2, "MultiXact: setting next offset to %" PRIU64,
                     minMultiOffset);
         MultiXactState->nextOffset = minMultiOffset;
     }
@@ -2359,7 +2212,7 @@ MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
     Assert(InRecovery);
  
     if (MultiXactIdPrecedes(MultiXactState->oldestMultiXactId, oldestMulti))
-       SetMultiXactIdLimit(oldestMulti, oldestMultiDB, false);
+       SetMultiXactIdLimit(oldestMulti, oldestMultiDB);
  }
  
  /*
@@ -2442,27 +2295,11 @@ ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
             LWLockRelease(lock);
         }
  
-       /*
-        * Compute the number of items till end of current page.  Careful: if
-        * addition of unsigned ints wraps around, we're at the last page of
-        * the last segment; since that page holds a different number of items
-        * than other pages, we need to do it differently.
-        */
-       if (offset + MAX_MEMBERS_IN_LAST_MEMBERS_PAGE < offset)
-       {
-           /*
-            * This is the last page of the last segment; we can compute the
-            * number of items left to allocate in it without modulo
-            * arithmetic.
-            */
-           difference = MaxMultiXactOffset - offset + 1;
-       }
-       else
-           difference = MULTIXACT_MEMBERS_PER_PAGE - offset % MULTIXACT_MEMBERS_PER_PAGE;
+       /* Compute the number of items till end of current page. */
+       difference = MULTIXACT_MEMBERS_PER_PAGE - offset % MULTIXACT_MEMBERS_PER_PAGE;
  
         /*
-        * Advance to next page, taking care to properly handle the wraparound
-        * case.  OK if nmembers goes negative.
+        * Advance to next page.  OK if nmembers goes negative.
          */
         nmembers -= difference;
         offset += difference;
@@ -2524,28 +2361,17 @@ GetOldestMultiXactId(void)
  }
  
  /*
- * Determine how aggressively we need to vacuum in order to prevent member
- * wraparound.
- *
- * To do so determine what's the oldest member offset and install the limit
- * info in MultiXactState, where it can be used to prevent overrun of old data
- * in the members SLRU area.
- *
- * The return value is true if emergency autovacuum is required and false
- * otherwise.
+ * Calculate the oldest member offset and install it in MultiXactState, where
+ * it can be used to adjust multixid freezing cutoffs.
   */
-static bool
-SetOffsetVacuumLimit(bool is_startup)
+static void
+SetOldestOffset(void)
  {
     MultiXactId oldestMultiXactId;
     MultiXactId nextMXact;
     MultiXactOffset oldestOffset = 0;   /* placate compiler */
-   MultiXactOffset prevOldestOffset;
     MultiXactOffset nextOffset;
     bool        oldestOffsetKnown = false;
-   bool        prevOldestOffsetKnown;
-   MultiXactOffset offsetStopLimit = 0;
-   MultiXactOffset prevOffsetStopLimit;
  
     /*
      * NB: Have to prevent concurrent truncation, we might otherwise try to
@@ -2558,9 +2384,6 @@ SetOffsetVacuumLimit(bool is_startup)
     oldestMultiXactId = MultiXactState->oldestMultiXactId;
     nextMXact = MultiXactState->nextMXact;
     nextOffset = MultiXactState->nextOffset;
-   prevOldestOffsetKnown = MultiXactState->oldestOffsetKnown;
-   prevOldestOffset = MultiXactState->oldestOffset;
-   prevOffsetStopLimit = MultiXactState->offsetStopLimit;
     Assert(MultiXactState->finishedStartup);
     LWLockRelease(MultiXactGenLock);
  
@@ -2583,121 +2406,39 @@ SetOffsetVacuumLimit(bool is_startup)
     else
     {
         /*
-        * Figure out where the oldest existing multixact's offsets are
-        * stored. Due to bugs in early release of PostgreSQL 9.3.X and 9.4.X,
-        * the supposedly-earliest multixact might not really exist.  We are
-        * careful not to fail in that case.
+        * Look up the offset at which the oldest existing multixact's members
+        * are stored.  If we cannot find it, be careful not to fail, and
+        * leave oldestOffset unchanged.  oldestOffset is initialized to zero
+        * at system startup, which prevents truncating members until a proper
+        * value is calculated.
+        *
+        * (We had bugs in early releases of PostgreSQL 9.3.X and 9.4.X where
+        * the supposedly-earliest multixact might not really exist.  Those
+        * should be long gone by now, so this should not fail, but let's
+        * still be defensive.)
          */
         oldestOffsetKnown =
             find_multixact_start(oldestMultiXactId, &oldestOffset);
  
         if (oldestOffsetKnown)
             ereport(DEBUG1,
-                   (errmsg_internal("oldest MultiXactId member is at offset %u",
+                   (errmsg_internal("oldest MultiXactId member is at offset %" PRIu64,
                                      oldestOffset)));
         else
             ereport(LOG,
-                   (errmsg("MultiXact member wraparound protections are disabled because oldest checkpointed MultiXact %u does not exist on disk",
+                   (errmsg("MultiXact member truncation is disabled because oldest checkpointed MultiXact %u does not exist on disk",
                             oldestMultiXactId)));
     }
  
     LWLockRelease(MultiXactTruncationLock);
  
-   /*
-    * If we can, compute limits (and install them MultiXactState) to prevent
-    * overrun of old data in the members SLRU area. We can only do so if the
-    * oldest offset is known though.
-    */
+   /* Install the computed value */
     if (oldestOffsetKnown)
     {
-       /* move back to start of the corresponding segment */
-       offsetStopLimit = oldestOffset - (oldestOffset %
-                                         (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT));
-
-       /* always leave one segment before the wraparound point */
-       offsetStopLimit -= (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT);
-
-       if (!prevOldestOffsetKnown && !is_startup)
-           ereport(LOG,
-                   (errmsg("MultiXact member wraparound protections are now enabled")));
-
-       ereport(DEBUG1,
-               (errmsg_internal("MultiXact member stop limit is now %u based on MultiXact %u",
-                                offsetStopLimit, oldestMultiXactId)));
-   }
-   else if (prevOldestOffsetKnown)
-   {
-       /*
-        * If we failed to get the oldest offset this time, but we have a
-        * value from a previous pass through this function, use the old
-        * values rather than automatically forcing an emergency autovacuum
-        * cycle again.
-        */
-       oldestOffset = prevOldestOffset;
-       oldestOffsetKnown = true;
-       offsetStopLimit = prevOffsetStopLimit;
+       LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
+       MultiXactState->oldestOffset = oldestOffset;
+       LWLockRelease(MultiXactGenLock);
     }
-
-   /* Install the computed values */
-   LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
-   MultiXactState->oldestOffset = oldestOffset;
-   MultiXactState->oldestOffsetKnown = oldestOffsetKnown;
-   MultiXactState->offsetStopLimit = offsetStopLimit;
-   LWLockRelease(MultiXactGenLock);
-
-   /*
-    * Do we need an emergency autovacuum?  If we're not sure, assume yes.
-    */
-   return !oldestOffsetKnown ||
-       (nextOffset - oldestOffset > MULTIXACT_MEMBER_SAFE_THRESHOLD);
-}
-
-/*
- * Return whether adding "distance" to "start" would move past "boundary".
- *
- * We use this to determine whether the addition is "wrapping around" the
- * boundary point, hence the name.  The reason we don't want to use the regular
- * 2^31-modulo arithmetic here is that we want to be able to use the whole of
- * the 2^32-1 space here, allowing for more multixacts than would fit
- * otherwise.
- */
-static bool
-MultiXactOffsetWouldWrap(MultiXactOffset boundary, MultiXactOffset start,
-                        uint32 distance)
-{
-   MultiXactOffset finish;
-
-   /*
-    * Note that offset number 0 is not used (see GetMultiXactIdMembers), so
-    * if the addition wraps around the UINT_MAX boundary, skip that value.
-    */
-   finish = start + distance;
-   if (finish < start)
-       finish++;
-
-   /*-----------------------------------------------------------------------
-    * When the boundary is numerically greater than the starting point, any
-    * value numerically between the two is not wrapped:
-    *
-    *  <----S----B---->
-    *  [---)            = F wrapped past B (and UINT_MAX)
-    *       [---)       = F not wrapped
-    *            [----] = F wrapped past B
-    *
-    * When the boundary is numerically less than the starting point (i.e. the
-    * UINT_MAX wraparound occurs somewhere in between) then all values in
-    * between are wrapped:
-    *
-    *  <----B----S---->
-    *  [---)            = F not wrapped past B (but wrapped past UINT_MAX)
-    *       [---)       = F wrapped past B (and UINT_MAX)
-    *            [----] = F not wrapped
-    *-----------------------------------------------------------------------
-    */
-   if (start < boundary)
-       return finish >= boundary || finish < start;
-   else
-       return finish >= boundary && finish < start;
  }
  
  /*
@@ -2751,37 +2492,23 @@ find_multixact_start(MultiXactId multi, MultiXactOffset *result)
   * members: Number of member entries (nextOffset - oldestOffset)
   * oldestMultiXactId: Oldest MultiXact ID still in use
   * oldestOffset: Oldest offset still in use
- *
- * Returns false if unable to determine, the oldest offset being unknown.
   */
-bool
+void
  GetMultiXactInfo(uint32 *multixacts, MultiXactOffset *members,
                  MultiXactId *oldestMultiXactId, MultiXactOffset *oldestOffset)
  {
     MultiXactOffset nextOffset;
     MultiXactId nextMultiXactId;
-   bool        oldestOffsetKnown;
  
     LWLockAcquire(MultiXactGenLock, LW_SHARED);
     nextOffset = MultiXactState->nextOffset;
     *oldestMultiXactId = MultiXactState->oldestMultiXactId;
     nextMultiXactId = MultiXactState->nextMXact;
     *oldestOffset = MultiXactState->oldestOffset;
-   oldestOffsetKnown = MultiXactState->oldestOffsetKnown;
     LWLockRelease(MultiXactGenLock);
  
-   if (!oldestOffsetKnown)
-   {
-       *members = 0;
-       *multixacts = 0;
-       *oldestMultiXactId = InvalidMultiXactId;
-       *oldestOffset = 0;
-       return false;
-   }
-
     *members = nextOffset - *oldestOffset;
     *multixacts = nextMultiXactId - *oldestMultiXactId;
-   return true;
  }
  
  /*
@@ -2790,26 +2517,27 @@ GetMultiXactInfo(uint32 *multixacts, MultiXactOffset *members,
   * vacuum_multixact_freeze_table_age work together to make sure we never have
   * too many multixacts; we hope that, at least under normal circumstances,
   * this will also be sufficient to keep us from using too many offsets.
- * However, if the average multixact has many members, we might exhaust the
- * members space while still using few enough members that these limits fail
- * to trigger relminmxid advancement by VACUUM.  At that point, we'd have no
- * choice but to start failing multixact-creating operations with an error.
- *
- * To prevent that, if more than a threshold portion of the members space is
- * used, we effectively reduce autovacuum_multixact_freeze_max_age and
- * to a value just less than the number of multixacts in use.  We hope that
- * this will quickly trigger autovacuuming on the table or tables with the
- * oldest relminmxid, thus allowing datminmxid values to advance and removing
- * some members.
- *
- * As the fraction of the member space currently in use grows, we become
- * more aggressive in clamping this value.  That not only causes autovacuum
- * to ramp up, but also makes any manual vacuums the user issues more
- * aggressive.  This happens because vacuum_get_cutoffs() will clamp the
- * freeze table and the minimum freeze age cutoffs based on the effective
- * autovacuum_multixact_freeze_max_age this function returns.  In the worst
- * case, we'll claim the freeze_max_age to zero, and every vacuum of any
- * table will freeze every multixact.
+ * However, if the average multixact has many members, we might accumulate a
+ * large amount of members, consuming disk space, while still using few enough
+ * multixids that the multixid limits fail to trigger relminmxid advancement
+ * by VACUUM.
+ *
+ * To prevent that, if the members space usage exceeds a threshold
+ * (MULTIXACT_MEMBER_LOW_THRESHOLD), we effectively reduce
+ * autovacuum_multixact_freeze_max_age to a value just less than the number of
+ * multixacts in use.  We hope that this will quickly trigger autovacuuming on
+ * the table or tables with the oldest relminmxid, thus allowing datminmxid
+ * values to advance and removing some members.
+ *
+ * As the amount of the member space in use grows, we become more aggressive
+ * in clamping this value.  That not only causes autovacuum to ramp up, but
+ * also makes any manual vacuums the user issues more aggressive.  This
+ * happens because vacuum_get_cutoffs() will clamp the freeze table and the
+ * minimum freeze age cutoffs based on the effective
+ * autovacuum_multixact_freeze_max_age this function returns.  At the extreme,
+ * when the members usage reaches MULTIXACT_MEMBER_HIGH_THRESHOLD, we clamp
+ * freeze_max_age to zero, and every vacuum of any table will freeze every
+ * multixact.
   */
  int
  MultiXactMemberFreezeThreshold(void)
@@ -2822,26 +2550,33 @@ MultiXactMemberFreezeThreshold(void)
     MultiXactId oldestMultiXactId;
     MultiXactOffset oldestOffset;
  
-   /* If we can't determine member space utilization, assume the worst. */
-   if (!GetMultiXactInfo(&multixacts, &members, &oldestMultiXactId, &oldestOffset))
-       return 0;
+   /* Read the current offsets and members usage. */
+   GetMultiXactInfo(&multixacts, &members, &oldestMultiXactId, &oldestOffset);
  
     /* If member space utilization is low, no special action is required. */
-   if (members <= MULTIXACT_MEMBER_SAFE_THRESHOLD)
+   if (members <= MULTIXACT_MEMBER_LOW_THRESHOLD)
         return autovacuum_multixact_freeze_max_age;
  
     /*
      * Compute a target for relminmxid advancement.  The number of multixacts
      * we try to eliminate from the system is based on how far we are past
-    * MULTIXACT_MEMBER_SAFE_THRESHOLD.
+    * MULTIXACT_MEMBER_LOW_THRESHOLD.
+    *
+    * The way this formula works is that when members is exactly at the low
+    * threshold, fraction = 0.0, and we set freeze_max_age equal to
+    * mxid_age(oldestMultiXactId).  As members grows further, towards the
+    * high threshold, fraction grows linearly from 0.0 to 1.0, and the result
+    * shrinks from mxid_age(oldestMultiXactId) to 0.  Beyond the high
+    * threshold, fraction > 1.0 and the result is clamped to 0.
      */
-   fraction = (double) (members - MULTIXACT_MEMBER_SAFE_THRESHOLD) /
-       (MULTIXACT_MEMBER_DANGER_THRESHOLD - MULTIXACT_MEMBER_SAFE_THRESHOLD);
-   victim_multixacts = multixacts * fraction;
+   fraction = (double) (members - MULTIXACT_MEMBER_LOW_THRESHOLD) /
+       (MULTIXACT_MEMBER_HIGH_THRESHOLD - MULTIXACT_MEMBER_LOW_THRESHOLD);
  
     /* fraction could be > 1.0, but lowest possible freeze age is zero */
-   if (victim_multixacts > multixacts)
+   if (fraction >= 1.0)
         return 0;
+
+   victim_multixacts = multixacts * fraction;
     result = multixacts - victim_multixacts;
  
     /*
@@ -2877,36 +2612,12 @@ SlruScanDirCbFindEarliest(SlruCtl ctl, char *filename, int64 segpage, void *data
  
  /*
   * Delete members segments [oldest, newOldest)
- *
- * The members SLRU can, in contrast to the offsets one, be filled to almost
- * the full range at once. This means SimpleLruTruncate() can't trivially be
- * used - instead the to-be-deleted range is computed using the offsets
- * SLRU. C.f. TruncateMultiXact().
   */
  static void
  PerformMembersTruncation(MultiXactOffset oldestOffset, MultiXactOffset newOldestOffset)
  {
-   const int64 maxsegment = MXOffsetToMemberSegment(MaxMultiXactOffset);
-   int64       startsegment = MXOffsetToMemberSegment(oldestOffset);
-   int64       endsegment = MXOffsetToMemberSegment(newOldestOffset);
-   int64       segment = startsegment;
-
-   /*
-    * Delete all the segments but the last one. The last segment can still
-    * contain, possibly partially, valid data.
-    */
-   while (segment != endsegment)
-   {
-       elog(DEBUG2, "truncating multixact members segment %" PRIx64,
-            segment);
-       SlruDeleteSegment(MultiXactMemberCtl, segment);
-
-       /* move to next segment, handling wraparound correctly */
-       if (segment == maxsegment)
-           segment = 0;
-       else
-           segment += 1;
-   }
+   SimpleLruTruncate(MultiXactMemberCtl,
+                     MXOffsetToMemberPage(newOldestOffset));
  }
  
  /*
@@ -3050,7 +2761,7 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
  
     elog(DEBUG1, "performing multixact truncation: "
          "offsets [%u, %u), offsets segments [%" PRIx64 ", %" PRIx64 "), "
-        "members [%u, %u), members segments [%" PRIx64 ", %" PRIx64 ")",
+        "members [%" PRIu64 ", %" PRIu64 "), members segments [%" PRIx64 ", %" PRIx64 ")",
          oldestMulti, newOldestMulti,
          MultiXactIdToOffsetSegment(oldestMulti),
          MultiXactIdToOffsetSegment(newOldestMulti),
@@ -3091,6 +2802,7 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
     LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
     MultiXactState->oldestMultiXactId = newOldestMulti;
     MultiXactState->oldestMultiXactDB = newOldestMultiDB;
+   MultiXactState->oldestOffset = newOldestOffset;
     LWLockRelease(MultiXactGenLock);
  
     /* First truncate members */
@@ -3130,20 +2842,13 @@ MultiXactOffsetPagePrecedes(int64 page1, int64 page2)
  
  /*
   * Decide whether a MultiXactMember page number is "older" for truncation
- * purposes.  There is no "invalid offset number" so use the numbers verbatim.
+ * purposes.  There is no "invalid offset number" and members never wrap
+ * around, so use the numbers verbatim.
   */
  static bool
  MultiXactMemberPagePrecedes(int64 page1, int64 page2)
  {
-   MultiXactOffset offset1;
-   MultiXactOffset offset2;
-
-   offset1 = ((MultiXactOffset) page1) * MULTIXACT_MEMBERS_PER_PAGE;
-   offset2 = ((MultiXactOffset) page2) * MULTIXACT_MEMBERS_PER_PAGE;
-
-   return (MultiXactOffsetPrecedes(offset1, offset2) &&
-           MultiXactOffsetPrecedes(offset1,
-                                   offset2 + MULTIXACT_MEMBERS_PER_PAGE - 1));
+   return page1 < page2;
  }
  
  /*
@@ -3175,17 +2880,6 @@ MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
  }
  
  
-/*
- * Decide which of two offsets is earlier.
- */
-static bool
-MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2)
-{
-   int32       diff = (int32) (offset1 - offset2);
-
-   return (diff < 0);
-}
-
  /*
   * Write a TRUNCATE xlog record
   *
@@ -3278,7 +2972,7 @@ multixact_redo(XLogReaderState *record)
  
         elog(DEBUG1, "replaying multixact truncation: "
              "offsets [%u, %u), offsets segments [%" PRIx64 ", %" PRIx64 "), "
-            "members [%u, %u), members segments [%" PRIx64 ", %" PRIx64 ")",
+            "members [%" PRIu64 ", %" PRIu64 "), members segments [%" PRIx64 ", %" PRIx64 ")",
              xlrec.startTruncOff, xlrec.endTruncOff,
              MultiXactIdToOffsetSegment(xlrec.startTruncOff),
              MultiXactIdToOffsetSegment(xlrec.endTruncOff),
@@ -3293,7 +2987,7 @@ multixact_redo(XLogReaderState *record)
          * Advance the horizon values, so they're current at the end of
          * recovery.
          */
-       SetMultiXactIdLimit(xlrec.endTruncOff, xlrec.oldestMultiDB, false);
+       SetMultiXactIdLimit(xlrec.endTruncOff, xlrec.oldestMultiDB);
  
         PerformMembersTruncation(xlrec.startTruncMemb, xlrec.endTruncMemb);
  
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c

index 89cbda9cc7c607b4e030f6fbf11711f811e5d7d6..6ced1d5728240f90a53d78705e86ee7c2a4c20f8 100644 (file)
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -5137,7 +5137,7 @@ BootStrapXLOG(uint32 data_checksum_version)
         FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId);
     checkPoint.nextOid = FirstGenbkiObjectId;
     checkPoint.nextMulti = FirstMultiXactId;
-   checkPoint.nextMultiOffset = 0;
+   checkPoint.nextMultiOffset = 1;
     checkPoint.oldestXid = FirstNormalTransactionId;
     checkPoint.oldestXidDB = Template1DbOid;
     checkPoint.oldestMulti = FirstMultiXactId;
@@ -5153,7 +5153,7 @@ BootStrapXLOG(uint32 data_checksum_version)
     MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
     AdvanceOldestClogXid(checkPoint.oldestXid);
     SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
-   SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
+   SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB);
     SetCommitTsLimit(InvalidTransactionId, InvalidTransactionId);
  
     /* Set up the XLOG page header */
@@ -5632,7 +5632,7 @@ StartupXLOG(void)
     MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
     AdvanceOldestClogXid(checkPoint.oldestXid);
     SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
-   SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
+   SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB);
     SetCommitTsLimit(checkPoint.oldestCommitTsXid,
                      checkPoint.newestCommitTsXid);
  
diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c

index 21b8f179ba0db932d356aa1a0298e99fa9ba8fbc..51dea342a4d14aed87ba2bd444e60c5375db697b 100644 (file)
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -886,7 +886,7 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
                              U64FromFullTransactionId(checkPoint.nextXid),
                              checkPoint.nextOid)));
     ereport(DEBUG1,
-           (errmsg_internal("next MultiXactId: %u; next MultiXactOffset: %u",
+           (errmsg_internal("next MultiXactId: %u; next MultiXactOffset: %" PRIu64,
                              checkPoint.nextMulti, checkPoint.nextMultiOffset)));
     ereport(DEBUG1,
             (errmsg_internal("oldest unfrozen transaction ID: %u, in database %u",
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c

index 29def1e94faa200b4e934c7fdee3c6dac509ce7e..0528d1b6ecbedb8a6374e1f10e3264502b99bc00 100644 (file)
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -1147,8 +1147,8 @@ vacuum_get_cutoffs(Relation rel, const VacuumParams params,
  
     /*
      * Also compute the multixact age for which freezing is urgent.  This is
-    * normally autovacuum_multixact_freeze_max_age, but may be less if we are
-    * short of multixact member space.
+    * normally autovacuum_multixact_freeze_max_age, but may be less if
+    * multixact members are bloated.
      */
     effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
  
@@ -1973,7 +1973,7 @@ vac_truncate_clog(TransactionId frozenXID,
      * signaling twice?
      */
     SetTransactionIdLimit(frozenXID, oldestxid_datoid);
-   SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
+   SetMultiXactIdLimit(minMulti, minmulti_datoid);
  
     LWLockRelease(WrapLimitsVacuumLock);
  }
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c

index 1c38488f2cbba350f835c2d2a6aebeb9c8c51c9d..f4830f896f33cecf5c379f4de83c5492b2907025 100644 (file)
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -1936,8 +1936,8 @@ do_autovacuum(void)
  
     /*
      * Compute the multixact age for which freezing is urgent.  This is
-    * normally autovacuum_multixact_freeze_max_age, but may be less if we are
-    * short of multixact member space.
+    * normally autovacuum_multixact_freeze_max_age, but may be less if
+    * multixact members are bloated.
      */
     effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
  
diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c

index 30ad46912e1808d3dfd195f83720abdc41e9e4e2..a4060309ae0e43563c2fe3c715cc7c275a3b9d1d 100644 (file)
--- a/src/bin/pg_controldata/pg_controldata.c
+++ b/src/bin/pg_controldata/pg_controldata.c
@@ -271,7 +271,7 @@ main(int argc, char *argv[])
            ControlFile->checkPointCopy.nextOid);
     printf(_("Latest checkpoint's NextMultiXactId:  %u\n"),
            ControlFile->checkPointCopy.nextMulti);
-   printf(_("Latest checkpoint's NextMultiOffset:  %u\n"),
+   printf(_("Latest checkpoint's NextMultiOffset:  %" PRIu64 "\n"),
            ControlFile->checkPointCopy.nextMultiOffset);
     printf(_("Latest checkpoint's oldestXID:        %u\n"),
            ControlFile->checkPointCopy.oldestXid);
diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c

index 07c95f9ab808bcc56cb57d5299f7dfdc4737c8a0..56012d5f4c461597ecbc9b6bdb783fa6cd8512c4 100644 (file)
--- a/src/bin/pg_resetwal/pg_resetwal.c
+++ b/src/bin/pg_resetwal/pg_resetwal.c
@@ -115,6 +115,7 @@ static void KillExistingWALSummaries(void);
  static void WriteEmptyXLOG(void);
  static void usage(void);
  static uint32 strtouint32_strict(const char *restrict s, char **restrict endptr, int base);
+static uint64 strtouint64_strict(const char *restrict s, char **restrict endptr, int base);
  
  
  int
@@ -293,7 +294,7 @@ main(int argc, char *argv[])
  
             case 'O':
                 errno = 0;
-               next_mxoff_val = strtouint32_strict(optarg, &endptr, 0);
+               next_mxoff_val = strtouint64_strict(optarg, &endptr, 0);
                 if (endptr == optarg || *endptr != '\0' || errno != 0)
                 {
                     pg_log_error("invalid argument for option %s", "-O");
@@ -772,7 +773,7 @@ PrintControlValues(bool guessed)
            ControlFile.checkPointCopy.nextOid);
     printf(_("Latest checkpoint's NextMultiXactId:  %u\n"),
            ControlFile.checkPointCopy.nextMulti);
-   printf(_("Latest checkpoint's NextMultiOffset:  %u\n"),
+   printf(_("Latest checkpoint's NextMultiOffset:  %" PRIu64 "\n"),
            ControlFile.checkPointCopy.nextMultiOffset);
     printf(_("Latest checkpoint's oldestXID:        %u\n"),
            ControlFile.checkPointCopy.oldestXid);
@@ -848,7 +849,7 @@ PrintNewControlValues(void)
  
     if (next_mxoff_given)
     {
-       printf(_("NextMultiOffset:                      %u\n"),
+       printf(_("NextMultiOffset:                      %" PRIu64 "\n"),
                ControlFile.checkPointCopy.nextMultiOffset);
     }
  
@@ -1276,3 +1277,34 @@ strtouint32_strict(const char *restrict s, char **restrict endptr, int base)
  
     return (uint32) val;
  }
+
+/*
+ * strtouint64_strict -- like strtou64(), but doesn't accept negative values
+ */
+static uint64
+strtouint64_strict(const char *restrict s, char **restrict endptr, int base)
+{
+   uint64      val;
+   bool        is_neg;
+
+   /* skip leading whitespace */
+   while (isspace((unsigned char) *s))
+       s++;
+
+   /*
+    * Is it negative?  We still call strtou64() if it was, to set 'endptr'.
+    * (The current callers don't care though.)
+    */
+   is_neg = (*s == '-');
+
+   val = strtou64(s, endptr, base);
+
+   /* reject if it was negative */
+   if (errno == 0 && is_neg)
+   {
+       errno = ERANGE;
+       val = 0;
+   }
+
+   return val;
+}
diff --git a/src/bin/pg_resetwal/t/001_basic.pl b/src/bin/pg_resetwal/t/001_basic.pl

index 8717b144bc0436d85a0490083ac3e77923e0a8d7..8bab9add74f475e1c50853e3931363fa1508210b 100644 (file)
--- a/src/bin/pg_resetwal/t/001_basic.pl
+++ b/src/bin/pg_resetwal/t/001_basic.pl
@@ -237,7 +237,7 @@ push @cmd,
    sprintf("%d,%d", hex($files[0]) == 0 ? 3 : hex($files[0]), hex($files[-1]));
  
  @files = get_slru_files('pg_multixact/offsets');
-$mult = 32 * $blcksz / 4;
+$mult = 32 * $blcksz / 8;
  # --multixact-ids argument is "new,old"
  push @cmd,
    '--multixact-ids' => sprintf("%d,%d",
diff --git a/src/bin/pg_upgrade/Makefile b/src/bin/pg_upgrade/Makefile

index 69fcf593caec9c117abe6107662ee59983f3b761..726df4b7525f6d435d7ce666b6df45144fc1e861 100644 (file)
--- a/src/bin/pg_upgrade/Makefile
+++ b/src/bin/pg_upgrade/Makefile
@@ -18,11 +18,14 @@ OBJS = \
     file.o \
     function.o \
     info.o \
+   multixact_read_v18.o \
+   multixact_rewrite.o \
     option.o \
     parallel.o \
     pg_upgrade.o \
     relfilenumber.o \
     server.o \
+   slru_io.o \
     tablespace.o \
     task.o \
     util.o \
diff --git a/src/bin/pg_upgrade/meson.build b/src/bin/pg_upgrade/meson.build

index ac992f0d14b1da0161fb1faf430e9c8d323fc760..41f1126206bda682fd2e6db8b5864834939eda75 100644 (file)
--- a/src/bin/pg_upgrade/meson.build
+++ b/src/bin/pg_upgrade/meson.build
@@ -8,11 +8,14 @@ pg_upgrade_sources = files(
    'file.c',
    'function.c',
    'info.c',
+  'multixact_read_v18.c',
+  'multixact_rewrite.c',
    'option.c',
    'parallel.c',
    'pg_upgrade.c',
    'relfilenumber.c',
    'server.c',
+  'slru_io.c',
    'tablespace.c',
    'task.c',
    'util.c',
@@ -47,6 +50,7 @@ tests += {
        't/004_subscription.pl',
        't/005_char_signedness.pl',
        't/006_transfer_modes.pl',
+      't/007_multixact_conversion.pl',
      ],
      'test_kwargs': {'priority': 40}, # pg_upgrade tests are slow
    },
diff --git a/src/bin/pg_upgrade/multixact_read_v18.c b/src/bin/pg_upgrade/multixact_read_v18.c

new file mode 100644 (file)

index 0000000..e7496a7
--- /dev/null
+++ b/src/bin/pg_upgrade/multixact_read_v18.c
@@ -0,0 +1,340 @@
+/*
+ * multixact_read_v18.c
+ *
+ * Functions to read multixact SLRUs from clusters of PostgreSQL version 18
+ * and older.  In version 19, the multixid offsets were expanded from 32 to 64
+ * bits.
+ *
+ * Copyright (c) 2025, PostgreSQL Global Development Group
+ * src/bin/pg_upgrade/multixact_read_v18.c
+ */
+
+#include "postgres_fe.h"
+
+#include "multixact_read_v18.h"
+#include "pg_upgrade.h"
+
+/*
+ * NOTE: below are a bunch of definitions that are copy-pasted from
+ * multixact.c from version 18.  It's important that this file doesn't
+ * #include the new definitions with same names from "multixact_internal.h"!
+ *
+ * To further avoid confusion in the functions exposed outside this source
+ * file, we use MultiXactOffset32 to represent the old-style 32-bit multixid
+ * offsets.  The new 64-bit MultiXactOffset should not be used anywhere in
+ * this file.
+ */
+#ifdef MULTIXACT_INTERNAL_H
+#error multixact_internal.h should not be included in multixact_read_v18.c
+#endif
+#define MultiXactOffset should_not_be_used
+
+/* We need four bytes per offset and 8 bytes per base for each page. */
+#define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset32))
+
+static inline int64
+MultiXactIdToOffsetPage(MultiXactId multi)
+{
+   return multi / MULTIXACT_OFFSETS_PER_PAGE;
+}
+
+static inline int
+MultiXactIdToOffsetEntry(MultiXactId multi)
+{
+   return multi % MULTIXACT_OFFSETS_PER_PAGE;
+}
+
+/*
+ * The situation for members is a bit more complex: we store one byte of
+ * additional flag bits for each TransactionId.  To do this without getting
+ * into alignment issues, we store four bytes of flags, and then the
+ * corresponding 4 Xids.  Each such 5-word (20-byte) set we call a "group", and
+ * are stored as a whole in pages.  Thus, with 8kB BLCKSZ, we keep 409 groups
+ * per page.  This wastes 12 bytes per page, but that's OK -- simplicity (and
+ * performance) trumps space efficiency here.
+ *
+ * Note that the "offset" macros work with byte offset, not array indexes, so
+ * arithmetic must be done using "char *" pointers.
+ */
+/* We need eight bits per xact, so one xact fits in a byte */
+#define MXACT_MEMBER_BITS_PER_XACT         8
+#define MXACT_MEMBER_FLAGS_PER_BYTE            1
+#define MXACT_MEMBER_XACT_BITMASK  ((1 << MXACT_MEMBER_BITS_PER_XACT) - 1)
+
+/* how many full bytes of flags are there in a group? */
+#define MULTIXACT_FLAGBYTES_PER_GROUP      4
+#define MULTIXACT_MEMBERS_PER_MEMBERGROUP  \
+   (MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE)
+/* size in bytes of a complete group */
+#define MULTIXACT_MEMBERGROUP_SIZE \
+   (sizeof(TransactionId) * MULTIXACT_MEMBERS_PER_MEMBERGROUP + MULTIXACT_FLAGBYTES_PER_GROUP)
+#define MULTIXACT_MEMBERGROUPS_PER_PAGE (BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE)
+#define MULTIXACT_MEMBERS_PER_PAGE \
+   (MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP)
+
+/* page in which a member is to be found */
+static inline int64
+MXOffsetToMemberPage(MultiXactOffset32 offset)
+{
+   return offset / MULTIXACT_MEMBERS_PER_PAGE;
+}
+
+/* Location (byte offset within page) of flag word for a given member */
+static inline int
+MXOffsetToFlagsOffset(MultiXactOffset32 offset)
+{
+   MultiXactOffset32 group = offset / MULTIXACT_MEMBERS_PER_MEMBERGROUP;
+   int         grouponpg = group % MULTIXACT_MEMBERGROUPS_PER_PAGE;
+   int         byteoff = grouponpg * MULTIXACT_MEMBERGROUP_SIZE;
+
+   return byteoff;
+}
+
+/* Location (byte offset within page) of TransactionId of given member */
+static inline int
+MXOffsetToMemberOffset(MultiXactOffset32 offset)
+{
+   int         member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
+
+   return MXOffsetToFlagsOffset(offset) +
+       MULTIXACT_FLAGBYTES_PER_GROUP +
+       member_in_group * sizeof(TransactionId);
+}
+
+static inline int
+MXOffsetToFlagsBitShift(MultiXactOffset32 offset)
+{
+   int         member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
+   int         bshift = member_in_group * MXACT_MEMBER_BITS_PER_XACT;
+
+   return bshift;
+}
+
+/*
+ * Construct reader of old multixacts.
+ *
+ * Returns the malloced memory used by the all other calls in this module.
+ */
+OldMultiXactReader *
+AllocOldMultiXactRead(char *pgdata, MultiXactId nextMulti,
+                     MultiXactOffset32 nextOffset)
+{
+   OldMultiXactReader *state = state = pg_malloc(sizeof(*state));
+   char        dir[MAXPGPATH] = {0};
+
+   state->nextMXact = nextMulti;
+   state->nextOffset = nextOffset;
+
+   pg_sprintf(dir, "%s/pg_multixact/offsets", pgdata);
+   state->offset = AllocSlruRead(dir, false);
+
+   pg_sprintf(dir, "%s/pg_multixact/members", pgdata);
+   state->members = AllocSlruRead(dir, false);
+
+   return state;
+}
+
+/*
+ * This is a simplified version of the GetMultiXactIdMembers() server
+ * function:
+ *
+ * - Only return the updating member, if any.  Upgrade only cares about the
+ *   updaters.  If there is no updating member, return somewhat arbitrarily
+ *   the first locking-only member, because we don't have any way to represent
+ *   "no members".
+ *
+ * - Because there's no concurrent activity, we don't need to worry about
+ *   locking and some corner cases.
+ *
+ * - Don't bail out on invalid entries.  If the server crashes, it can leave
+ *   invalid or half-written entries on disk.  Such multixids won't appear
+ *   anywhere else on disk, so the server will never try to read them.  During
+ *   upgrade, however, we scan through all multixids in order, and will
+ *   encounter such invalid but unreferenced multixids too.
+ *
+ * Returns true on success, false if the multixact was invalid.
+ */
+bool
+GetOldMultiXactIdSingleMember(OldMultiXactReader *state, MultiXactId multi,
+                             MultiXactMember *member)
+{
+   MultiXactId nextMXact,
+               nextOffset,
+               tmpMXact;
+   int64       pageno,
+               prev_pageno;
+   int         entryno,
+               length;
+   char       *buf;
+   MultiXactOffset32 *offptr,
+               offset;
+   MultiXactOffset32 nextMXOffset;
+   TransactionId result_xid = InvalidTransactionId;
+   MultiXactStatus result_status = 0;
+
+   nextMXact = state->nextMXact;
+   nextOffset = state->nextOffset;
+
+   /*
+    * Comment copied from GetMultiXactIdMembers in PostgreSQL v18
+    * multixact.c:
+    *
+    * Find out the offset at which we need to start reading MultiXactMembers
+    * and the number of members in the multixact.  We determine the latter as
+    * the difference between this multixact's starting offset and the next
+    * one's.  However, there are some corner cases to worry about:
+    *
+    * 1. This multixact may be the latest one created, in which case there is
+    * no next one to look at.  The next multixact's offset should be set
+    * already, as we set it in RecordNewMultiXact(), but we used to not do
+    * that in older minor versions.  To cope with that case, if this
+    * multixact is the latest one created, use the nextOffset value we read
+    * above as the endpoint.
+    *
+    * 2. Because GetNewMultiXactId skips over offset zero, to reserve zero
+    * for to mean "unset", there is an ambiguity near the point of offset
+    * wraparound.  If we see next multixact's offset is one, is that our
+    * multixact's actual endpoint, or did it end at zero with a subsequent
+    * increment?  We handle this using the knowledge that if the zero'th
+    * member slot wasn't filled, it'll contain zero, and zero isn't a valid
+    * transaction ID so it can't be a multixact member.  Therefore, if we
+    * read a zero from the members array, just ignore it.
+    */
+
+   pageno = MultiXactIdToOffsetPage(multi);
+   entryno = MultiXactIdToOffsetEntry(multi);
+
+   buf = SlruReadSwitchPage(state->offset, pageno);
+   offptr = (MultiXactOffset32 *) buf;
+   offptr += entryno;
+   offset = *offptr;
+
+   if (offset == 0)
+   {
+       /* Invalid entry */
+       return false;
+   }
+
+   /*
+    * Use the same increment rule as GetNewMultiXactId(), that is, don't
+    * handle wraparound explicitly until needed.
+    */
+   tmpMXact = multi + 1;
+
+   if (nextMXact == tmpMXact)
+   {
+       /* Corner case 1: there is no next multixact */
+       nextMXOffset = nextOffset;
+   }
+   else
+   {
+       /* handle wraparound if needed */
+       if (tmpMXact < FirstMultiXactId)
+           tmpMXact = FirstMultiXactId;
+
+       prev_pageno = pageno;
+
+       pageno = MultiXactIdToOffsetPage(tmpMXact);
+       entryno = MultiXactIdToOffsetEntry(tmpMXact);
+
+       if (pageno != prev_pageno)
+           buf = SlruReadSwitchPage(state->offset, pageno);
+
+       offptr = (MultiXactOffset32 *) buf;
+       offptr += entryno;
+       nextMXOffset = *offptr;
+   }
+
+   if (nextMXOffset == 0)
+   {
+       /* Invalid entry */
+       return false;
+   }
+   length = nextMXOffset - offset;
+
+   /* read the members */
+   prev_pageno = -1;
+   for (int i = 0; i < length; i++, offset++)
+   {
+       TransactionId *xactptr;
+       uint32     *flagsptr;
+       int         flagsoff;
+       int         bshift;
+       int         memberoff;
+       MultiXactStatus status;
+
+       pageno = MXOffsetToMemberPage(offset);
+       memberoff = MXOffsetToMemberOffset(offset);
+
+       if (pageno != prev_pageno)
+       {
+           buf = SlruReadSwitchPage(state->members, pageno);
+           prev_pageno = pageno;
+       }
+
+       xactptr = (TransactionId *) (buf + memberoff);
+       if (!TransactionIdIsValid(*xactptr))
+       {
+           /*
+            * Corner case 2: we are looking at unused slot zero
+            */
+           if (offset == 0)
+               continue;
+
+           /*
+            * Otherwise this is an invalid entry that should not be
+            * referenced from anywhere in the heap.  We could return 'false'
+            * here, but we prefer to continue reading the members and
+            * converting them the best we can, to preserve evidence in case
+            * this is corruption that should not happen.
+            */
+       }
+
+       flagsoff = MXOffsetToFlagsOffset(offset);
+       bshift = MXOffsetToFlagsBitShift(offset);
+       flagsptr = (uint32 *) (buf + flagsoff);
+
+       status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK;
+
+       /*
+        * Remember the updating XID among the members, or first locking XID
+        * if no updating XID.
+        */
+       if (ISUPDATE_from_mxstatus(status))
+       {
+           /* sanity check */
+           if (ISUPDATE_from_mxstatus(result_status))
+           {
+               /*
+                * We don't expect to see more than one updating member, even
+                * if the server had crashed.
+                */
+               pg_fatal("multixact %u has more than one updating member",
+                        multi);
+           }
+           result_xid = *xactptr;
+           result_status = status;
+       }
+       else if (!TransactionIdIsValid(result_xid))
+       {
+           result_xid = *xactptr;
+           result_status = status;
+       }
+   }
+
+   member->xid = result_xid;
+   member->status = result_status;
+   return true;
+}
+
+/*
+ * Frees the malloced reader.
+ */
+void
+FreeOldMultiXactReader(OldMultiXactReader *state)
+{
+   FreeSlruRead(state->offset);
+   FreeSlruRead(state->members);
+
+   pfree(state);
+}
diff --git a/src/bin/pg_upgrade/multixact_read_v18.h b/src/bin/pg_upgrade/multixact_read_v18.h

new file mode 100644 (file)

index 0000000..6ef485b
--- /dev/null
+++ b/src/bin/pg_upgrade/multixact_read_v18.h
@@ -0,0 +1,37 @@
+/*
+ * multixact_read_v18.h
+ *
+ * Copyright (c) 2025, PostgreSQL Global Development Group
+ * src/bin/pg_upgrade/multixact_read_v18.h
+ */
+#ifndef MULTIXACT_READ_V18_H
+#define MULTIXACT_READ_V18_H
+
+#include "access/multixact.h"
+#include "slru_io.h"
+
+/*
+ * MultiXactOffset changed from uint32 to uint64 between versions 18 and 19.
+ * MultiXactOffset32 is used to represent a 32-bit offset from the old
+ * cluster.
+ */
+typedef uint32 MultiXactOffset32;
+
+typedef struct OldMultiXactReader
+{
+   MultiXactId nextMXact;
+   MultiXactOffset32 nextOffset;
+
+   SlruSegState *offset;
+   SlruSegState *members;
+} OldMultiXactReader;
+
+extern OldMultiXactReader *AllocOldMultiXactRead(char *pgdata,
+                                                MultiXactId nextMulti,
+                                                MultiXactOffset32 nextOffset);
+extern bool GetOldMultiXactIdSingleMember(OldMultiXactReader *state,
+                                         MultiXactId multi,
+                                         MultiXactMember *member);
+extern void FreeOldMultiXactReader(OldMultiXactReader *reader);
+
+#endif                         /* MULTIXACT_READ_V18_H */
diff --git a/src/bin/pg_upgrade/multixact_rewrite.c b/src/bin/pg_upgrade/multixact_rewrite.c

new file mode 100644 (file)

index 0000000..4e56922
--- /dev/null
+++ b/src/bin/pg_upgrade/multixact_rewrite.c
@@ -0,0 +1,191 @@
+/*
+ * multixact_rewrite.c
+ *
+ * Functions to convert multixact SLRUs from the pre-v19 format to the current
+ * format with 64-bit MultiXactOffsets.
+ *
+ * Copyright (c) 2025, PostgreSQL Global Development Group
+ * src/bin/pg_upgrade/multixact_rewrite.c
+ */
+
+#include "postgres_fe.h"
+
+#include "access/multixact_internal.h"
+#include "multixact_read_v18.h"
+#include "pg_upgrade.h"
+
+static void RecordMultiXactOffset(SlruSegState *offsets_writer, MultiXactId multi,
+                                 MultiXactOffset offset);
+static void RecordMultiXactMembers(SlruSegState *members_writer,
+                                  MultiXactOffset offset,
+                                  int nmembers, MultiXactMember *members);
+
+/*
+ * Convert pg_multixact/offset and /members from the old pre-v19 format with
+ * 32-bit offsets to the current format.
+ *
+ * Multixids in the range [from_multi, to_multi) are read from the old
+ * cluster, and written in the new format.  An important edge case is that if
+ * from_multi == to_multi, this initializes the new pg_multixact files in the
+ * new format without trying to open any old files.  (We rely on that when
+ * upgrading from PostgreSQL version 9.2 or below.)
+ *
+ * Returns the new nextOffset value; the caller should set it in the new
+ * control file.  The new members always start from offset 1, regardless of
+ * the offset range used in the old cluster.
+ */
+MultiXactOffset
+rewrite_multixacts(MultiXactId from_multi, MultiXactId to_multi)
+{
+   MultiXactOffset next_offset;
+   SlruSegState *offsets_writer;
+   SlruSegState *members_writer;
+   char        dir[MAXPGPATH] = {0};
+   bool        prev_multixid_valid = false;
+
+   /*
+    * The range of valid multi XIDs is unchanged by the conversion (they are
+    * referenced from the heap tables), but the members SLRU is rewritten to
+    * start from offset 1.
+    */
+   next_offset = 1;
+
+   /* Prepare to write the new SLRU files */
+   pg_sprintf(dir, "%s/pg_multixact/offsets", new_cluster.pgdata);
+   offsets_writer = AllocSlruWrite(dir, false);
+   SlruWriteSwitchPage(offsets_writer, MultiXactIdToOffsetPage(from_multi));
+
+   pg_sprintf(dir, "%s/pg_multixact/members", new_cluster.pgdata);
+   members_writer = AllocSlruWrite(dir, true /* use long segment names */ );
+   SlruWriteSwitchPage(members_writer, MXOffsetToMemberPage(next_offset));
+
+   /*
+    * Convert old multixids, if needed, by reading them one-by-one from the
+    * old cluster.
+    */
+   if (to_multi != from_multi)
+   {
+       OldMultiXactReader *old_reader;
+
+       old_reader = AllocOldMultiXactRead(old_cluster.pgdata,
+                                          old_cluster.controldata.chkpnt_nxtmulti,
+                                          old_cluster.controldata.chkpnt_nxtmxoff);
+
+       for (MultiXactId multi = from_multi; multi != to_multi;)
+       {
+           MultiXactMember member;
+           bool        multixid_valid;
+
+           /*
+            * Read this multixid's members.
+            *
+            * Locking-only XIDs that may be part of multi-xids don't matter
+            * after upgrade, as there can be no transactions running across
+            * upgrade.  So as a small optimization, we only read one member
+            * from each multixid: the one updating one, or if there was no
+            * update, arbitrarily the first locking xid.
+            */
+           multixid_valid = GetOldMultiXactIdSingleMember(old_reader, multi, &member);
+
+           /*
+            * Write the new offset to pg_multixact/offsets.
+            *
+            * Even if this multixid is invalid, we still need to write its
+            * offset if the *previous* multixid was valid.  That's because
+            * when reading a multixid, the number of members is calculated
+            * from the difference between the two offsets.
+            */
+           RecordMultiXactOffset(offsets_writer, multi,
+                                 (multixid_valid || prev_multixid_valid) ? next_offset : 0);
+
+           /* Write the members */
+           if (multixid_valid)
+           {
+               RecordMultiXactMembers(members_writer, next_offset, 1, &member);
+               next_offset += 1;
+           }
+
+           /* Advance to next multixid, handling wraparound */
+           multi++;
+           if (multi < FirstMultiXactId)
+               multi = FirstMultiXactId;
+           prev_multixid_valid = multixid_valid;
+       }
+
+       FreeOldMultiXactReader(old_reader);
+   }
+
+   /* Write the final 'next' offset to the last SLRU page */
+   RecordMultiXactOffset(offsets_writer, to_multi,
+                         prev_multixid_valid ? next_offset : 0);
+
+   /* Flush the last SLRU pages */
+   FreeSlruWrite(offsets_writer);
+   FreeSlruWrite(members_writer);
+
+   return next_offset;
+}
+
+
+/*
+ * Write one offset to the offset SLRU
+ */
+static void
+RecordMultiXactOffset(SlruSegState *offsets_writer, MultiXactId multi,
+                     MultiXactOffset offset)
+{
+   int64       pageno;
+   int         entryno;
+   char       *buf;
+   MultiXactOffset *offptr;
+
+   pageno = MultiXactIdToOffsetPage(multi);
+   entryno = MultiXactIdToOffsetEntry(multi);
+
+   buf = SlruWriteSwitchPage(offsets_writer, pageno);
+   offptr = (MultiXactOffset *) buf;
+   offptr[entryno] = offset;
+}
+
+/*
+ * Write the members for one multixid in the members SLRU
+ *
+ * (Currently, this is only ever called with nmembers == 1)
+ */
+static void
+RecordMultiXactMembers(SlruSegState *members_writer,
+                      MultiXactOffset offset,
+                      int nmembers, MultiXactMember *members)
+{
+   for (int i = 0; i < nmembers; i++, offset++)
+   {
+       int64       pageno;
+       char       *buf;
+       TransactionId *memberptr;
+       uint32     *flagsptr;
+       uint32      flagsval;
+       int         bshift;
+       int         flagsoff;
+       int         memberoff;
+
+       Assert(members[i].status <= MultiXactStatusUpdate);
+
+       pageno = MXOffsetToMemberPage(offset);
+       memberoff = MXOffsetToMemberOffset(offset);
+       flagsoff = MXOffsetToFlagsOffset(offset);
+       bshift = MXOffsetToFlagsBitShift(offset);
+
+       buf = SlruWriteSwitchPage(members_writer, pageno);
+
+       memberptr = (TransactionId *) (buf + memberoff);
+
+       *memberptr = members[i].xid;
+
+       flagsptr = (uint32 *) (buf + flagsoff);
+
+       flagsval = *flagsptr;
+       flagsval &= ~(((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift);
+       flagsval |= (members[i].status << bshift);
+       *flagsptr = flagsval;
+   }
+}
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c

index 490e98fa26f2adcd469df0bdcbf26932711ae526..47119222655edfc2c521bc7660a1f11dca7197ca 100644 (file)
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -43,6 +43,7 @@
  
  #include <time.h>
  
+#include "access/multixact.h"
  #include "catalog/pg_class_d.h"
  #include "common/file_perm.h"
  #include "common/logging.h"
@@ -807,15 +808,15 @@ copy_xact_xlog_xid(void)
               new_cluster.pgdata);
     check_ok();
  
-   /*
-    * If the old server is before the MULTIXACT_FORMATCHANGE_CAT_VER change
-    * (see pg_upgrade.h) and the new server is after, then we don't copy
-    * pg_multixact files, but we need to reset pg_control so that the new
-    * server doesn't attempt to read multis older than the cutoff value.
-    */
-   if (old_cluster.controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER &&
-       new_cluster.controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER)
+   /* Copy or convert pg_multixact files */
+   Assert(new_cluster.controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER);
+   Assert(new_cluster.controldata.cat_ver >= MULTIXACTOFFSET_FORMATCHANGE_CAT_VER);
+   if (old_cluster.controldata.cat_ver >= MULTIXACTOFFSET_FORMATCHANGE_CAT_VER)
     {
+       /* No change in multixact format, just copy the files */
+       MultiXactId new_nxtmulti = old_cluster.controldata.chkpnt_nxtmulti;
+       MultiXactOffset new_nxtmxoff = old_cluster.controldata.chkpnt_nxtmxoff;
+
         copy_subdir_files("pg_multixact/offsets", "pg_multixact/offsets");
         copy_subdir_files("pg_multixact/members", "pg_multixact/members");
  
@@ -826,38 +827,67 @@ copy_xact_xlog_xid(void)
          * counters here and the oldest multi present on system.
          */
         exec_prog(UTILITY_LOG_FILE, NULL, true, true,
-                 "\"%s/pg_resetwal\" -O %u -m %u,%u \"%s\"",
-                 new_cluster.bindir,
-                 old_cluster.controldata.chkpnt_nxtmxoff,
-                 old_cluster.controldata.chkpnt_nxtmulti,
+                 "\"%s/pg_resetwal\" -O %" PRIu64 " -m %u,%u \"%s\"",
+                 new_cluster.bindir, new_nxtmxoff, new_nxtmulti,
                   old_cluster.controldata.chkpnt_oldstMulti,
                   new_cluster.pgdata);
         check_ok();
     }
-   else if (new_cluster.controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER)
+   else
     {
+       /* Conversion is needed */
+       MultiXactId nxtmulti;
+       MultiXactId oldstMulti;
+       MultiXactOffset nxtmxoff;
+
         /*
-        * Remove offsets/0000 file created by initdb that no longer matches
-        * the new multi-xid value.  "members" starts at zero so no need to
-        * remove it.
+        * Determine the range of multixacts to convert.
          */
-       remove_new_subdir("pg_multixact/offsets", false);
+       nxtmulti = old_cluster.controldata.chkpnt_nxtmulti;
+       if (old_cluster.controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER)
+       {
+           /* Versions 9.3 - 18: convert all multixids  */
+           oldstMulti = old_cluster.controldata.chkpnt_oldstMulti;
+       }
+       else
+       {
+           /*
+            * In PostgreSQL 9.2 and below, multitransactions were only used
+            * for row locking, and as such don't need to be preserved during
+            * upgrade.  In that case, we utilize rewrite_multixacts() just to
+            * initialize new, empty files in the new format.
+            *
+            * It's important that the oldest multi is set to the latest value
+            * used by the old system, so that multixact.c returns the empty
+            * set for multis that might be present on disk.
+            */
+           oldstMulti = nxtmulti;
+       }
+       /* handle wraparound */
+       if (nxtmulti < FirstMultiXactId)
+           nxtmulti = FirstMultiXactId;
+       if (oldstMulti < FirstMultiXactId)
+           oldstMulti = FirstMultiXactId;
  
-       prep_status("Setting oldest multixact ID in new cluster");
+       /*
+        * Remove the files created by initdb in the new cluster.
+        * rewrite_multixacts() will create new ones.
+        */
+       remove_new_subdir("pg_multixact/members", false);
+       remove_new_subdir("pg_multixact/offsets", false);
  
         /*
-        * We don't preserve files in this case, but it's important that the
-        * oldest multi is set to the latest value used by the old system, so
-        * that multixact.c returns the empty set for multis that might be
-        * present on disk.  We set next multi to the value following that; it
-        * might end up wrapped around (i.e. 0) if the old cluster had
-        * next=MaxMultiXactId, but multixact.c can cope with that just fine.
+        * Create new pg_multixact files, converting old ones if needed.
          */
+       prep_status("Converting pg_multixact files");
+       nxtmxoff = rewrite_multixacts(oldstMulti, nxtmulti);
+       check_ok();
+
+       prep_status("Setting next multixact ID and offset for new cluster");
         exec_prog(UTILITY_LOG_FILE, NULL, true, true,
-                 "\"%s/pg_resetwal\" -m %u,%u \"%s\"",
+                 "\"%s/pg_resetwal\" -O %" PRIu64 " -m %u,%u \"%s\"",
                   new_cluster.bindir,
-                 old_cluster.controldata.chkpnt_nxtmulti + 1,
-                 old_cluster.controldata.chkpnt_nxtmulti,
+                 nxtmxoff, nxtmulti, oldstMulti,
                   new_cluster.pgdata);
         check_ok();
     }
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h

index e86336f4be953b1acd3850344bb82f59a4887a09..be30dceed5cbebcacd04e69c0a4ee570e17630c4 100644 (file)
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -114,6 +114,13 @@ extern char *output_files[];
   */
  #define MULTIXACT_FORMATCHANGE_CAT_VER 201301231
  
+/*
+ * MultiXactOffset was changed from 32-bit to 64-bit in version 19, at this
+ * catalog version.  pg_multixact files need to be converted when upgrading
+ * across this version.
+ */
+#define MULTIXACTOFFSET_FORMATCHANGE_CAT_VER 202512091
+
  /*
   * large object chunk size added to pg_controldata,
   * commit 5f93c37805e7485488480916b4585e098d3cc883
@@ -235,7 +242,7 @@ typedef struct
     uint32      chkpnt_nxtepoch;
     uint32      chkpnt_nxtoid;
     uint32      chkpnt_nxtmulti;
-   uint32      chkpnt_nxtmxoff;
+   uint64      chkpnt_nxtmxoff;
     uint32      chkpnt_oldstMulti;
     uint32      chkpnt_oldstxid;
     uint32      align;
@@ -499,6 +506,9 @@ void        old_9_6_invalidate_hash_indexes(ClusterInfo *cluster,
  
  void       report_extension_updates(ClusterInfo *cluster);
  
+/* multixact_rewrite.c */
+MultiXactOffset rewrite_multixacts(MultiXactId from_multi, MultiXactId to_multi);
+
  /* parallel.c */
  void       parallel_exec_prog(const char *log_file, const char *opt_log_file,
                                const char *fmt,...) pg_attribute_printf(3, 4);
diff --git a/src/bin/pg_upgrade/slru_io.c b/src/bin/pg_upgrade/slru_io.c

new file mode 100644 (file)

index 0000000..812a241
--- /dev/null
+++ b/src/bin/pg_upgrade/slru_io.c
@@ -0,0 +1,268 @@
+/*
+ * slru_io.c
+ *
+ * Routines for reading and writing SLRU files during upgrade.
+ *
+ * Copyright (c) 2025, PostgreSQL Global Development Group
+ * src/bin/pg_upgrade/slru_io.c
+ */
+
+#include "postgres_fe.h"
+
+#include <fcntl.h>
+
+#include "common/fe_memutils.h"
+#include "common/file_perm.h"
+#include "common/file_utils.h"
+#include "pg_upgrade.h"
+#include "port/pg_iovec.h"
+#include "slru_io.h"
+
+static SlruSegState *AllocSlruSegState(const char *dir);
+static char *SlruFileName(SlruSegState *state, int64 segno);
+static void SlruFlush(SlruSegState *state);
+
+/* common parts of AllocSlruRead and AllocSlruWrite */
+static SlruSegState *
+AllocSlruSegState(const char *dir)
+{
+   SlruSegState *state = pg_malloc(sizeof(*state));
+
+   state->dir = pstrdup(dir);
+   state->fn = NULL;
+   state->fd = -1;
+   state->segno = -1;
+   state->pageno = 0;
+
+   /* state->writing and state->long_segment_names must be set by caller! */
+
+   return state;
+}
+
+/* similar to the backend function with the same name */
+static char *
+SlruFileName(SlruSegState *state, int64 segno)
+{
+   if (state->long_segment_names)
+   {
+       Assert(segno >= 0 && segno <= INT64CONST(0xFFFFFFFFFFFFFFF));
+       return psprintf("%s/%015" PRIX64, state->dir, segno);
+   }
+   else
+   {
+       Assert(segno >= 0 && segno <= INT64CONST(0xFFFFFF));
+       return psprintf("%s/%04X", state->dir, (unsigned int) segno);
+   }
+}
+
+/*
+ * Create SLRU reader for dir.
+ */
+SlruSegState *
+AllocSlruRead(const char *dir, bool long_segment_names)
+{
+   SlruSegState *state = AllocSlruSegState(dir);
+
+   state->writing = false;
+   state->long_segment_names = long_segment_names;
+
+   return state;
+}
+
+/*
+ * Read the given page into memory buffer.
+ *
+ * Reading can be done in random order.
+ *
+ * If the file containing 'pageno' does not exist, a fatal error is raised.
+ * If the file exists but is shorter than expected, the missing part is read
+ * as zeros and a warning is logged.  That is reasonable behavior for current
+ * callers.
+ *
+ * This is the slow path of the inlineable SlruReadSwitchPage() function.
+ */
+char *
+SlruReadSwitchPageSlow(SlruSegState *state, uint64 pageno)
+{
+   int64       segno;
+   off_t       offset;
+   ssize_t     bytes_read;
+
+   Assert(!state->writing);    /* read only mode */
+
+   if (state->segno != -1 && pageno == state->pageno)
+       return state->buf.data;
+
+   /* If the new page is on a different SLRU segment, open the new segment */
+   segno = pageno / SLRU_PAGES_PER_SEGMENT;
+   if (segno != state->segno)
+   {
+       if (state->segno != -1)
+       {
+           close(state->fd);
+           state->fd = -1;
+
+           pg_free(state->fn);
+           state->fn = NULL;
+
+           state->segno = -1;
+       }
+
+       state->fn = SlruFileName(state, segno);
+       if ((state->fd = open(state->fn, O_RDONLY | PG_BINARY, 0)) < 0)
+           pg_fatal("could not open file \"%s\": %m", state->fn);
+       state->segno = segno;
+   }
+
+   offset = (pageno % SLRU_PAGES_PER_SEGMENT) * BLCKSZ;
+   bytes_read = 0;
+   while (bytes_read < BLCKSZ)
+   {
+       ssize_t     rc;
+
+       rc = pg_pread(state->fd,
+                     &state->buf.data + bytes_read,
+                     BLCKSZ - bytes_read,
+                     offset + bytes_read);
+       if (rc < 0)
+       {
+           if (errno == EINTR)
+               continue;
+           pg_fatal("could not read file \"%s\": %m", state->fn);
+       }
+       if (rc == 0)
+       {
+           /* unexpected EOF */
+           pg_log(PG_WARNING, "unexpected EOF reading file \"%s\" at offset %zd, reading as zeros", state->fn,
+                  offset + bytes_read);
+           memset(&state->buf.data + bytes_read, 0, BLCKSZ - bytes_read);
+           break;
+       }
+       bytes_read += rc;
+   }
+   state->pageno = pageno;
+
+   return state->buf.data;
+}
+
+/*
+ * Free the reader.
+ */
+void
+FreeSlruRead(SlruSegState *state)
+{
+   Assert(!state->writing);    /* read only mode */
+
+   if (state->fd != -1)
+       close(state->fd);
+   pg_free(state);
+}
+
+/*
+ * Create SLRU writer for dir.
+ */
+SlruSegState *
+AllocSlruWrite(const char *dir, bool long_segment_names)
+{
+   SlruSegState *state = AllocSlruSegState(dir);
+
+   state->writing = true;
+   state->long_segment_names = long_segment_names;
+
+   return state;
+}
+
+/*
+ * Open the given page for writing.
+ *
+ * NOTE: This uses O_EXCL when stepping to a new segment, so this assumes that
+ * each segment is written in full before moving on to the next one.  This
+ * limitation would be easy to lift if needed, but it fits the usage pattern
+ * of current callers.
+ *
+ * This is the slow path of the inlineable SlruWriteSwitchPage() function.
+ */
+char *
+SlruWriteSwitchPageSlow(SlruSegState *state, uint64 pageno)
+{
+   int64       segno;
+   off_t       offset;
+
+   Assert(state->writing);
+
+   if (state->segno != -1 && pageno == state->pageno)
+       return state->buf.data;
+
+   segno = pageno / SLRU_PAGES_PER_SEGMENT;
+   offset = (pageno % SLRU_PAGES_PER_SEGMENT) * BLCKSZ;
+
+   SlruFlush(state);
+   memset(state->buf.data, 0, BLCKSZ);
+
+   if (segno != state->segno)
+   {
+       if (state->segno != -1)
+       {
+           close(state->fd);
+           state->fd = -1;
+
+           pg_free(state->fn);
+           state->fn = NULL;
+
+           state->segno = -1;
+       }
+
+       /* Create the segment */
+       state->fn = SlruFileName(state, segno);
+       if ((state->fd = open(state->fn, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
+                             pg_file_create_mode)) < 0)
+       {
+           pg_fatal("could not create file \"%s\": %m", state->fn);
+       }
+
+       state->segno = segno;
+
+       if (offset > 0)
+       {
+           if (pg_pwrite_zeros(state->fd, offset, 0) < 0)
+               pg_fatal("could not write file \"%s\": %m", state->fn);
+       }
+   }
+
+   state->pageno = pageno;
+
+   return state->buf.data;
+}
+
+static void
+SlruFlush(SlruSegState *state)
+{
+   struct iovec iovec = {
+       .iov_base = &state->buf,
+       .iov_len = BLCKSZ,
+   };
+   off_t       offset;
+
+   if (state->segno == -1)
+       return;
+
+   offset = (state->pageno % SLRU_PAGES_PER_SEGMENT) * BLCKSZ;
+
+   if (pg_pwritev_with_retry(state->fd, &iovec, 1, offset) < 0)
+       pg_fatal("could not write file \"%s\": %m", state->fn);
+}
+
+/*
+ * Free the writer.
+ */
+void
+FreeSlruWrite(SlruSegState *state)
+{
+   Assert(state->writing);
+
+   SlruFlush(state);
+
+   if (state->fd != -1)
+       close(state->fd);
+   pg_free(state);
+}
diff --git a/src/bin/pg_upgrade/slru_io.h b/src/bin/pg_upgrade/slru_io.h

new file mode 100644 (file)

index 0000000..5c80a67
--- /dev/null
+++ b/src/bin/pg_upgrade/slru_io.h
@@ -0,0 +1,52 @@
+/*
+ * slru_io.h
+ *
+ * Copyright (c) 2025, PostgreSQL Global Development Group
+ * src/bin/pg_upgrade/slru_io.h
+ */
+
+#ifndef SLRU_IO_H
+#define SLRU_IO_H
+
+/*
+ * State for reading or writing an SLRU, with a one page buffer.
+ */
+typedef struct SlruSegState
+{
+   bool        writing;
+   bool        long_segment_names;
+
+   char       *dir;
+   char       *fn;
+   int         fd;
+   int64       segno;
+   uint64      pageno;
+
+   PGAlignedBlock buf;
+} SlruSegState;
+
+extern SlruSegState *AllocSlruRead(const char *dir, bool long_segment_names);
+extern char *SlruReadSwitchPageSlow(SlruSegState *state, uint64 pageno);
+extern void FreeSlruRead(SlruSegState *state);
+
+static inline char *
+SlruReadSwitchPage(SlruSegState *state, uint64 pageno)
+{
+   if (state->segno != -1 && pageno == state->pageno)
+       return state->buf.data;
+   return SlruReadSwitchPageSlow(state, pageno);
+}
+
+extern SlruSegState *AllocSlruWrite(const char *dir, bool long_segment_names);
+extern char *SlruWriteSwitchPageSlow(SlruSegState *state, uint64 pageno);
+extern void FreeSlruWrite(SlruSegState *state);
+
+static inline char *
+SlruWriteSwitchPage(SlruSegState *state, uint64 pageno)
+{
+   if (state->segno != -1 && pageno == state->pageno)
+       return state->buf.data;
+   return SlruWriteSwitchPageSlow(state, pageno);
+}
+
+#endif                         /* SLRU_IO_H */
diff --git a/src/bin/pg_upgrade/t/007_multixact_conversion.pl b/src/bin/pg_upgrade/t/007_multixact_conversion.pl

new file mode 100644 (file)

index 0000000..443b93c
--- /dev/null
+++ b/src/bin/pg_upgrade/t/007_multixact_conversion.pl
@@ -0,0 +1,427 @@
+# Copyright (c) 2025, PostgreSQL Global Development Group
+
+# Version 19 expanded MultiXactOffset from 32 to 64 bits.  Upgrading
+# across that requires rewriting the SLRU files to the new format.
+# This file contains tests for the conversion.
+#
+# To run, set 'oldinstall' ENV variable to point to a pre-v19
+# installation.  If it's not set, or if it points to a v19 or above
+# installation, this still performs a very basic test, upgrading a
+# cluster with some multixacts.  It's not very interesting, however,
+# because there's no conversion involved in that case.
+
+use strict;
+use warnings FATAL => 'all';
+
+use Math::BigInt;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+# Temp dir for a dumps.
+my $tempdir = PostgreSQL::Test::Utils::tempdir;
+
+# A workload that consumes multixids.  The purpose of this is to
+# generate some multixids in the old cluster, so that we can test
+# upgrading them.  The workload is a mix of KEY SHARE locking queries
+# and UPDATEs, and commits and aborts, to generate a mix of multixids
+# with different statuses.  It consumes around 3000 multixids with
+# 30000 members.  That's enough to span more than one multixids
+# 'offsets' page, and more than one 'members' segment.
+#
+# The workload leaves behind a table called 'mxofftest' containing a
+# small number of rows referencing some of the generated multixids.
+#
+# Because this function is used to generate test data on the old
+# installation, it needs to work with older PostgreSQL server
+# versions.
+#
+# The first argument is the cluster to connect to, the second argument
+# is a cluster using the new version.  We need the 'psql' binary from
+# the new version, the new cluster is otherwise unused.  (We need to
+# use the new 'psql' because some of the more advanced background psql
+# perl module features depend on a fairly recent psql version.)
+sub mxact_workload
+{
+   my $node = shift;       # Cluster to connect to
+   my $binnode = shift;    # Use the psql binary from this cluster
+
+   my $connstr = $node->connstr('postgres');
+
+   $node->start;
+   $node->safe_psql(
+       'postgres', qq[
+       CREATE TABLE mxofftest (id INT PRIMARY KEY, n_updated INT)
+         WITH (AUTOVACUUM_ENABLED=FALSE);
+       INSERT INTO mxofftest SELECT G, 0 FROM GENERATE_SERIES(1, 50) G;
+   ]);
+
+   my $nclients = 20;
+   my $update_every = 13;
+   my $abort_every = 11;
+   my @connections = ();
+
+   # Silence the logging of the statements we run to avoid
+   # unnecessarily bloating the test logs.  This runs before the
+   # upgrade we're testing, so the details should not be very
+   # interesting for debugging.  But if needed, you can make it more
+   # verbose by setting this.
+   my $verbose = 0;
+
+   # Open multiple connections to the database.  Start a transaction
+   # in each connection.
+   for (0 .. $nclients)
+   {
+       # Use the psql binary from the new installation.  The
+       # BackgroundPsql functionality doesn't work with older psql
+       # versions.
+       my $conn = $binnode->background_psql('',
+           connstr => $node->connstr('postgres'));
+
+       $conn->query_safe("SET log_statement=none", verbose => $verbose)
+         unless $verbose;
+       $conn->query_safe("SET enable_seqscan=off", verbose => $verbose);
+       $conn->query_safe("BEGIN", verbose => $verbose);
+
+       push(@connections, $conn);
+   }
+
+   # Run queries using cycling through the connections in a
+   # round-robin fashion.  We keep a transaction open in each
+   # connection at all times, and lock/update the rows.  With 10
+   # connections, each SELECT FOR KEY SHARE query generates a new
+   # multixid, containing the 10 XIDs of all the transactions running
+   # at the time.
+   for (my $i = 0; $i < 3000; $i++)
+   {
+       my $conn = $connections[ $i % $nclients ];
+
+       my $sql = ($i % $abort_every == 0) ? "ABORT" : "COMMIT";
+       $conn->query_safe($sql, verbose => $verbose);
+
+       $conn->query_safe("BEGIN", verbose => $verbose);
+       if ($i % $update_every == 0)
+       {
+           $sql = qq[
+             UPDATE mxofftest SET n_updated = n_updated + 1 WHERE id = ${i} % 50;
+           ];
+       }
+       else
+       {
+           my $threshold = int($i / 3000 * 50);
+           $sql = qq[
+             select count(*) from (
+               SELECT * FROM mxofftest WHERE id >= $threshold FOR KEY SHARE
+             ) as x
+           ];
+       }
+       $conn->query_safe($sql, verbose => $verbose);
+   }
+
+   for my $conn (@connections)
+   {
+       $conn->quit();
+   }
+
+   $node->stop;
+   return;
+}
+
+# Return contents of the 'mxofftest' table, created by mxact_workload
+sub get_test_table_contents
+{
+   my ($node, $filename) = @_;
+
+   my $contents = $node->safe_psql('postgres',
+       "SELECT ctid, xmin, xmax, * FROM mxofftest");
+
+   my $path = $tempdir . '/' . $filename;
+   open(my $fh, '>', $path)
+     || die "could not open $path for writing $!";
+   print $fh $contents;
+   close($fh);
+
+   return $path;
+}
+
+# Return the members of all updating multixids in the given range
+sub get_updating_multixact_members
+{
+   my ($node, $from, $to, $filename) = @_;
+
+   my $path = $tempdir . '/' . $filename;
+   open(my $fh, '>', $path)
+     || die "could not open $path for writing $!";
+
+   if ($to >= $from)
+   {
+       my $res = $node->safe_psql(
+           'postgres', qq[
+           SELECT multi, mode, xid
+           FROM generate_series($from, $to - 1) as multi,
+                pg_get_multixact_members(multi::text::xid)
+           WHERE mode not in ('keysh', 'sh');
+       ]);
+       print $fh $res;
+   }
+   else
+   {
+       # Multixids wrapped around.  Split the query into two parts,
+       # before and after the wraparound.
+       my $res = $node->safe_psql(
+           'postgres', qq[
+           SELECT multi, mode, xid
+           FROM generate_series($from, 4294967295) as multi,
+                pg_get_multixact_members(multi::text::xid)
+           WHERE mode not in ('keysh', 'sh');
+       ]);
+       print $fh $res;
+       $res = $node->safe_psql(
+           'postgres', qq[
+           SELECT multi, mode, xid
+           FROM generate_series(1, $to - 1) as multi,
+                pg_get_multixact_members(multi::text::xid)
+           WHERE mode not in ('keysh', 'sh');
+       ]);
+       print $fh $res;
+   }
+
+   close($fh);
+   return $path;
+}
+
+# Read multixid related fields from the control file
+#
+# Note: This is used on both the old and the new installation, so the
+# command arguments and the output parsing used here must work with
+# all PostgreSQL versions supported by the test.
+sub read_multixid_fields
+{
+   my $node = shift;
+
+   my $pg_controldata_path = $node->installed_command('pg_controldata');
+   my ($stdout, $stderr) =
+     run_command([ $pg_controldata_path, $node->data_dir ]);
+   $stdout =~ /^Latest checkpoint's oldestMultiXid:\s*(.*)$/m
+     or die "could not read oldestMultiXid from pg_controldata";
+   my $oldest_multi_xid = $1;
+   $stdout =~ /^Latest checkpoint's NextMultiXactId:\s*(.*)$/m
+     or die "could not read NextMultiXactId from pg_controldata";
+   my $next_multi_xid = $1;
+   $stdout =~ /^Latest checkpoint's NextMultiOffset:\s*(.*)$/m
+     or die "could not read NextMultiOffset from pg_controldata";
+   my $next_multi_offset = $1;
+
+   return ($oldest_multi_xid, $next_multi_xid, $next_multi_offset);
+}
+
+# Reset a cluster's next multixid and mxoffset to given values.
+#
+# Note: This is used on the old insallation, so the command arguments
+# and the output parsing used here must work with all pre-v19
+# PostgreSQL versions supported by the test.
+sub reset_mxid_mxoffset_pre_v19
+{
+   my $node = shift;
+   my $mxid = shift;
+   my $mxoffset = shift;
+
+   my $pg_resetwal_path = $node->installed_command('pg_resetwal');
+   # Get block size
+   my ($out, $err) =
+     run_command([ $pg_resetwal_path, '--dry-run', $node->data_dir ]);
+   $out =~ /^Database block size: *(\d+)$/m or die;
+
+   # Verify that no multixids are currently in use.  Resetting would
+   # destroy them.  (A freshly initialized cluster has no multixids.)
+   $out =~ /^Latest checkpoint's NextMultiXactId: *(\d+)$/m or die;
+   my $next_mxid = $1;
+   $out =~ /^Latest checkpoint's oldestMultiXid: *(\d+)$/m or die;
+   my $oldest_mxid = $1;
+   die "cluster has some multixids in use" unless $next_mxid == $oldest_mxid;
+
+   # Extract a few other values from pg_resetwal --dry-run output
+   # that we need for the calculations below
+   $out =~ /^Database block size: *(\d+)$/m or die;
+   my $blcksz = $1;
+   # SLRU_PAGES_PER_SEGMENT is always 32 on pre-19 versions
+   my $slru_pages_per_segment = 32;
+
+   # Do the reset
+   my @cmd = (
+       $pg_resetwal_path,
+       '--pgdata' => $node->data_dir,
+       '--multixact-offset' => $mxoffset,
+       '--multixact-ids' => "$mxid,$mxid");
+   command_ok(\@cmd, 'reset multixids and offset');
+
+   # pg_resetwal just updates the control file.  The cluster will
+   # refuse to start up, if the SLRU segments corresponding to the
+   # next multixid and offset does not exist.  Create a segments that
+   # covers the given values, filled with zeros.  But first remove
+   # any old segments.
+   unlink glob $node->data_dir . "/pg_multixact/offsets/*";
+   unlink glob $node->data_dir . "/pg_multixact/members/*";
+
+   # Initialize the 'offsets' SLRU file containing the new next multixid
+   # with zeros
+   #
+   # sizeof(MultiXactOffset) == 4 in PostgreSQL versions before 19
+   my $multixact_offsets_per_page = $blcksz / 4;
+   my $segno =
+     int($mxid / $multixact_offsets_per_page / $slru_pages_per_segment);
+   my $path =
+     sprintf('%s/pg_multixact/offsets/%04X', $node->data_dir, $segno);
+   open my $fh, ">", $path
+     or die "could not open \"$path\": $!";
+   binmode $fh;
+   my $bytes_per_seg = $slru_pages_per_segment * $blcksz;
+   syswrite($fh, "\0" x $bytes_per_seg) == $bytes_per_seg
+     or die "could not write to \"$path\": $!";
+   close $fh;
+
+   # Same for the 'members' SLRU
+   my $multixact_members_per_page = int($blcksz / 20) * 4;
+   $segno =
+     int($mxoffset / $multixact_members_per_page / $slru_pages_per_segment);
+   $path = sprintf "%s/pg_multixact/members/%04X", $node->data_dir, $segno;
+   open $fh, ">", $path
+     or die "could not open \"$path\": $!";
+   binmode $fh;
+   syswrite($fh, "\0" x $bytes_per_seg) == $bytes_per_seg
+     or die "could not write to \"$path\": $!";
+   close($fh);
+}
+
+# Main test workhorse routine.  Dump data on old version, run
+# pg_upgrade, compare data after upgrade.
+sub upgrade_and_compare
+{
+   my $tag = shift;
+   my $oldnode = shift;
+   my $newnode = shift;
+
+   command_ok(
+       [
+           'pg_upgrade', '--no-sync',
+           '--old-datadir' => $oldnode->data_dir,
+           '--new-datadir' => $newnode->data_dir,
+           '--old-bindir' => $oldnode->config_data('--bindir'),
+           '--new-bindir' => $newnode->config_data('--bindir'),
+           '--socketdir' => $newnode->host,
+           '--old-port' => $oldnode->port,
+           '--new-port' => $newnode->port,
+       ],
+       'run of pg_upgrade for new instance');
+
+   # Dump contents of the test table, and the status of all updating
+   # multixids from the old cluster.  (Locking-only multixids don't
+   # need to be preserved so we ignore those)
+   #
+   # Note: we do this *after* running pg_upgrade, to ensure that we
+   # don't set all the hint bits before upgrade by doing the SELECT
+   # on the table.
+   my ($multixids_start, $multixids_end, undef) =
+     read_multixid_fields($oldnode);
+   $oldnode->start;
+   my $old_table_contents =
+     get_test_table_contents($oldnode, "oldnode_${tag}_table_contents");
+   my $old_multixacts =
+     get_updating_multixact_members($oldnode, $multixids_start,
+       $multixids_end, "oldnode_${tag}_multixacts");
+   $oldnode->stop;
+
+   # Compare them with the upgraded cluster
+   $newnode->start;
+   my $new_table_contents =
+     get_test_table_contents($newnode, "newnode_${tag}_table_contents");
+   my $new_multixacts =
+     get_updating_multixact_members($newnode, $multixids_start,
+       $multixids_end, "newnode_${tag}_multixacts");
+   $newnode->stop;
+
+   compare_files($old_table_contents, $new_table_contents,
+       'test table contents from original and upgraded clusters match');
+   compare_files($old_multixacts, $new_multixacts,
+       'multixact members from original and upgraded clusters match');
+}
+
+my $old_version;
+
+# Basic scenario: Create a cluster using old installation, run
+# multixid-creating workload on it, then upgrade.
+#
+# This works even even if the old and new version is the same,
+# although it's not very interesting as the conversion routines only
+# run when upgrading from a pre-v19 cluster.
+{
+   my $tag = 'basic';
+   my $old =
+     PostgreSQL::Test::Cluster->new("${tag}_oldnode",
+       install_path => $ENV{oldinstall});
+   my $new = PostgreSQL::Test::Cluster->new("${tag}_newnode");
+
+   $old->init(extra => ['-k']);
+
+   $old_version = $old->pg_version;
+   note "old installation is version $old_version\n";
+
+   # Run the workload
+   my (undef, $start_mxid, $start_mxoff) = read_multixid_fields($old);
+   mxact_workload($old, $new);
+   my (undef, $finish_mxid, $finish_mxoff) = read_multixid_fields($old);
+
+   note "Testing upgrade, ${tag} scenario\n"
+     . " mxid from ${start_mxid} to ${finish_mxid}\n"
+     . " oldnode mxoff from ${start_mxoff} to ${finish_mxoff}\n";
+
+   $new->init;
+   upgrade_and_compare($tag, $old, $new);
+}
+
+# Wraparound scenario: This is the same as the basic scenario, but the
+# old cluster goes through multixid and offset wraparound.
+#
+# This requires the old installation to be version 18 or older,
+# because the hacks we use to reset the old cluster to a state just
+# before the wraparound rely on the pre-v19 file format.  If the old
+# cluster is of v19 or above, multixact SLRU conversion is not needed
+# anyway.
+SKIP:
+{
+   skip
+     "skipping mxoffset conversion tests because upgrading from the old version does not require conversion"
+     if ($old_version >= '19devel');
+
+   my $tag = 'wraparound';
+   my $old =
+     PostgreSQL::Test::Cluster->new("${tag}_oldnode",
+       install_path => $ENV{oldinstall});
+   my $new = PostgreSQL::Test::Cluster->new("${tag}_newnode");
+
+   $old->init(extra => ['-k']);
+
+   # Reset the old cluster to just before multixid and 32-bit offset
+   # wraparound.
+   reset_mxid_mxoffset_pre_v19($old, 0xFFFFFA00, 0xFFFFEC00);
+
+   # Run the workload.  This crosses multixid and offset wraparound.
+   my (undef, $start_mxid, $start_mxoff) = read_multixid_fields($old);
+   mxact_workload($old, $new);
+   my (undef, $finish_mxid, $finish_mxoff) = read_multixid_fields($old);
+
+   note "Testing upgrade, ${tag} scenario\n"
+     . " mxid from ${start_mxid} to ${finish_mxid}\n"
+     . " oldnode mxoff from ${start_mxoff} to ${finish_mxoff}\n";
+
+   # Verify that wraparounds happened.
+   cmp_ok($finish_mxid, '<', $start_mxid,
+       "multixid wrapped around in old cluster");
+   cmp_ok($finish_mxoff, '<', $start_mxoff,
+       "mxoff wrapped around in old cluster");
+
+   $new->init;
+   upgrade_and_compare($tag, $old, $new);
+}
+
+done_testing();
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h

index 82e4bb90dd581bd67a10f61b93536d12e25ea276..6433fe163641ea1d9e434af86f6753e4811afefa 100644 (file)
--- a/src/include/access/multixact.h
+++ b/src/include/access/multixact.h
@@ -28,8 +28,6 @@
  
  #define MultiXactIdIsValid(multi) ((multi) != InvalidMultiXactId)
  
-#define MaxMultiXactOffset ((MultiXactOffset) 0xFFFFFFFF)
-
  /*
   * Possible multixact lock modes ("status").  The first four modes are for
   * tuple locks (FOR KEY SHARE, FOR SHARE, FOR NO KEY UPDATE, FOR UPDATE); the
@@ -111,7 +109,7 @@ extern bool MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly);
  extern void MultiXactIdSetOldestMember(void);
  extern int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
                                   bool from_pgupgrade, bool isLockOnly);
-extern bool GetMultiXactInfo(uint32 *multixacts, MultiXactOffset *members,
+extern void GetMultiXactInfo(uint32 *multixacts, MultiXactOffset *members,
                              MultiXactId *oldestMultiXactId,
                              MultiXactOffset *oldestOffset);
  extern bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2);
@@ -131,8 +129,7 @@ extern void BootStrapMultiXact(void);
  extern void StartupMultiXact(void);
  extern void TrimMultiXact(void);
  extern void SetMultiXactIdLimit(MultiXactId oldest_datminmxid,
-                               Oid oldest_datoid,
-                               bool is_startup);
+                               Oid oldest_datoid);
  extern void MultiXactGetCheckptMulti(bool is_shutdown,
                                      MultiXactId *nextMulti,
                                      MultiXactOffset *nextMultiOffset,
diff --git a/src/include/access/multixact_internal.h b/src/include/access/multixact_internal.h

index f711f0a81ebe609103370886e035d3594bab50ad..f2d6539e8a67245d9d5e35eb1dae70614600d9d6 100644 (file)
--- a/src/include/access/multixact_internal.h
+++ b/src/include/access/multixact_internal.h
@@ -13,6 +13,11 @@
   * src/include/access/multixact_internal.h
   */
  #ifndef MULTIXACT_INTERNAL_H
+
+/*
+ * Note: This is not only to prevent including this file twice.
+ * MULTIXACT_INTERNAL_H is checked explicitly in multixact_read_v18.c.
+ */
  #define MULTIXACT_INTERNAL_H
  
  #include "access/multixact.h"
@@ -21,17 +26,9 @@
  /*
   * Defines for MultiXactOffset page sizes.  A page is the same BLCKSZ as is
   * used everywhere else in Postgres.
- *
- * Note: because MultiXactOffsets are 32 bits and wrap around at 0xFFFFFFFF,
- * MultiXact page numbering also wraps around at
- * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE, and segment numbering at
- * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE/SLRU_PAGES_PER_SEGMENT.  We need
- * take no explicit notice of that fact in this module, except when comparing
- * segment and page numbers in TruncateMultiXact (see
- * MultiXactOffsetPagePrecedes).
   */
  
-/* We need four bytes per offset */
+/* We need 8 bytes per offset */
  #define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset))
  
  static inline int64
@@ -80,19 +77,6 @@ MultiXactIdToOffsetSegment(MultiXactId multi)
  #define MULTIXACT_MEMBERS_PER_PAGE \
     (MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP)
  
-/*
- * Because the number of items per page is not a divisor of the last item
- * number (member 0xFFFFFFFF), the last segment does not use the maximum number
- * of pages, and moreover the last used page therein does not use the same
- * number of items as previous pages.  (Another way to say it is that the
- * 0xFFFFFFFF member is somewhere in the middle of the last page, so the page
- * has some empty space after that item.)
- *
- * This constant is the number of members in the last page of the last segment.
- */
-#define MAX_MEMBERS_IN_LAST_MEMBERS_PAGE \
-       ((uint32) ((0xFFFFFFFF % MULTIXACT_MEMBERS_PER_PAGE) + 1))
-
  /* page in which a member is to be found */
  static inline int64
  MXOffsetToMemberPage(MultiXactOffset offset)
diff --git a/src/include/c.h b/src/include/c.h

index ccd2b654d45971898aa634a62792426532644160..62cbf7a2eec20df1e8ce6064c6d675fce3e45154 100644 (file)
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -669,7 +669,7 @@ typedef uint32 SubTransactionId;
  /* MultiXactId must be equivalent to TransactionId, to fit in t_xmax */
  typedef TransactionId MultiXactId;
  
-typedef uint32 MultiXactOffset;
+typedef uint64 MultiXactOffset;
  
  typedef uint32 CommandId;
  
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h

index 2fa6c8c60f0f5d1df8a1ccc123a5f55929aac970..82dc84e4099dc147861265ca4890edc0a6e10688 100644 (file)
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -57,6 +57,6 @@
   */
  
  /*                         yyyymmddN */
-#define CATALOG_VERSION_NO 202512061
+#define CATALOG_VERSION_NO 202512091
  
  #endif
diff --git a/src/test/modules/test_slru/t/002_multixact_wraparound.pl b/src/test/modules/test_slru/t/002_multixact_wraparound.pl

index 169333fc564783da1aa49a1005adde6608e27a8a..272d8e6fb08769f59a7960f53e48920a94995294 100644 (file)
--- a/src/test/modules/test_slru/t/002_multixact_wraparound.pl
+++ b/src/test/modules/test_slru/t/002_multixact_wraparound.pl
@@ -37,7 +37,7 @@ my $slru_pages_per_segment = $1;
  
  # initialize the 'offsets' SLRU file containing the new next multixid
  # with zeros
-my $multixact_offsets_per_page = $blcksz / 4;   # sizeof(MultiXactOffset) == 4
+my $multixact_offsets_per_page = $blcksz / 8;   # sizeof(MultiXactOffset) == 8
  my $segno =
    int(0xFFFFFFF8 / $multixact_offsets_per_page / $slru_pages_per_segment);
  my $slru_file = sprintf('%s/pg_multixact/offsets/%04X', $node_pgdata, $segno);
diff --git a/src/test/perl/PostgreSQL/Test/BackgroundPsql.pm b/src/test/perl/PostgreSQL/Test/BackgroundPsql.pm

index 60bbd5dd445b3070db11a4bad5d6d2be4acb3f34..9825aaa9bb426cdcf427d5ab7db2c8a84447f894 100644 (file)
--- a/src/test/perl/PostgreSQL/Test/BackgroundPsql.pm
+++ b/src/test/perl/PostgreSQL/Test/BackgroundPsql.pm
@@ -230,18 +230,23 @@ Executes a query in the current session and returns the output in scalar
  context and (output, error) in list context where error is 1 in case there
  was output generated on stderr when executing the query.
  
+By default, the query and its results are printed to the test output. This
+can be disabled by passing the keyword parameter verbose => false.
+
  =cut
  
  sub query
  {
-   my ($self, $query) = @_;
+   my ($self, $query, %params) = @_;
     my $ret;
     my $output;
     my $query_cnt = $self->{query_cnt}++;
  
+   $params{verbose} = 1 unless defined $params{verbose};
+
     local $Test::Builder::Level = $Test::Builder::Level + 1;
  
-   note "issuing query $query_cnt via background psql: $query";
+   note "issuing query $query_cnt via background psql: $query" unless !$params{verbose};
  
     $self->{timeout}->start() if (defined($self->{query_timer_restart}));
  
@@ -280,7 +285,7 @@ sub query
       explain {
         stdout => $self->{stdout},
         stderr => $self->{stderr},
-     };
+     } unless !$params{verbose};
  
     # Remove banner from stdout and stderr, our caller doesn't care.  The
     # first newline is optional, as there would not be one if consuming an
@@ -308,9 +313,9 @@ Query failure is determined by it producing output on stderr.
  
  sub query_safe
  {
-   my ($self, $query) = @_;
+   my ($self, $query, %params) = @_;
  
-   my $ret = $self->query($query);
+   my $ret = $self->query($query, %params);
  
     if ($self->{stderr} ne "")
     {
diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm

index 747528c4af1fcfc6b119529dab5f179811f2a8c1..295988b8b877195adab35f0c750e64986f019ec4 100644 (file)
--- a/src/test/perl/PostgreSQL/Test/Cluster.pm
+++ b/src/test/perl/PostgreSQL/Test/Cluster.pm
@@ -1793,13 +1793,20 @@ sub _get_env
     return (%inst_env);
  }
  
-# Private routine to get an installation path qualified command.
-#
-# IPC::Run maintains a cache, %cmd_cache, mapping commands to paths.  Tests
-# which use nodes spanning more than one postgres installation path need to
-# avoid confusing which installation's binaries get run.  Setting $ENV{PATH} is
-# insufficient, as IPC::Run does not check to see if the path has changed since
-# caching a command.
+=pod
+
+=item $node->installed_command(cmd)
+
+Get an installation path qualified command.
+
+IPC::Run maintains a cache, %cmd_cache, mapping commands to paths.  Tests
+which use nodes spanning more than one postgres installation path need to
+avoid confusing which installation's binaries get run.  Setting $ENV{PATH} is
+insufficient, as IPC::Run does not check to see if the path has changed since
+caching a command.
+
+=cut
+
  sub installed_command
  {
     my ($self, $cmd) = @_;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list

index 6e2ed0c8825a9ed2b3904770b6e2016d0deb285e..9dd65b102544e9c7de89839359936ee589b0936a 100644 (file)
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1731,6 +1731,7 @@ MultiXactMember
  MultiXactOffset
  MultiXactStateData
  MultiXactStatus
+MultiXactWriter
  MultirangeIOData
  MultirangeParseState
  MultirangeType
@@ -1816,6 +1817,7 @@ OffsetVarNodes_context
  Oid
  OidOptions
  OkeysState
+OldMultiXactReader
  OldToNewMapping
  OldToNewMappingData
  OnCommitAction
@@ -2814,6 +2816,7 @@ SlruCtlData
  SlruErrorCause
  SlruPageStatus
  SlruScanCallback
+SlruSegState
  SlruShared
  SlruSharedData
  SlruWriteAll
author	Heikki Linnakangas <heikki.linnakangas@iki.fi>
	Tue, 9 Dec 2025 11:53:03 +0000 (13:53 +0200)
committer	Heikki Linnakangas <heikki.linnakangas@iki.fi>
	Tue, 9 Dec 2025 11:53:03 +0000 (13:53 +0200)
doc/src/sgml/ref/pg_resetwal.sgml		patch \| blob \| blame \| history
src/backend/access/rmgrdesc/mxactdesc.c		patch \| blob \| blame \| history
src/backend/access/rmgrdesc/xlogdesc.c		patch \| blob \| blame \| history
src/backend/access/transam/multixact.c		patch \| blob \| blame \| history
src/backend/access/transam/xlog.c		patch \| blob \| blame \| history
src/backend/access/transam/xlogrecovery.c		patch \| blob \| blame \| history
src/backend/commands/vacuum.c		patch \| blob \| blame \| history
src/backend/postmaster/autovacuum.c		patch \| blob \| blame \| history
src/bin/pg_controldata/pg_controldata.c		patch \| blob \| blame \| history
src/bin/pg_resetwal/pg_resetwal.c		patch \| blob \| blame \| history
src/bin/pg_resetwal/t/001_basic.pl		patch \| blob \| blame \| history
src/bin/pg_upgrade/Makefile		patch \| blob \| blame \| history
src/bin/pg_upgrade/meson.build		patch \| blob \| blame \| history
src/bin/pg_upgrade/multixact_read_v18.c	[new file with mode: 0644]	patch \| blob
src/bin/pg_upgrade/multixact_read_v18.h	[new file with mode: 0644]	patch \| blob
src/bin/pg_upgrade/multixact_rewrite.c	[new file with mode: 0644]	patch \| blob
src/bin/pg_upgrade/pg_upgrade.c		patch \| blob \| blame \| history
src/bin/pg_upgrade/pg_upgrade.h		patch \| blob \| blame \| history
src/bin/pg_upgrade/slru_io.c	[new file with mode: 0644]	patch \| blob
src/bin/pg_upgrade/slru_io.h	[new file with mode: 0644]	patch \| blob
src/bin/pg_upgrade/t/007_multixact_conversion.pl	[new file with mode: 0644]	patch \| blob
src/include/access/multixact.h		patch \| blob \| blame \| history
src/include/access/multixact_internal.h		patch \| blob \| blame \| history
src/include/c.h		patch \| blob \| blame \| history
src/include/catalog/catversion.h		patch \| blob \| blame \| history
src/test/modules/test_slru/t/002_multixact_wraparound.pl		patch \| blob \| blame \| history
src/test/perl/PostgreSQL/Test/BackgroundPsql.pm		patch \| blob \| blame \| history
src/test/perl/PostgreSQL/Test/Cluster.pm		patch \| blob \| blame \| history
src/tools/pgindent/typedefs.list		patch \| blob \| blame \| history