* fill WAL segments; the checkpointer itself doesn't watch for the
  * condition.)
  *
- * Normal termination is by SIGUSR2, which instructs the checkpointer to
- * execute a shutdown checkpoint and then exit(0).  (All backends must be
- * stopped before SIGUSR2 is issued!)  Emergency termination is by SIGQUIT;
- * like any backend, the checkpointer will simply abort and exit on SIGQUIT.
+ * The normal termination sequence is that checkpointer is instructed to
+ * execute the shutdown checkpoint by SIGINT.  After that checkpointer waits
+ * to be terminated via SIGUSR2, which instructs the checkpointer to exit(0).
+ * All backends must be stopped before SIGINT or SIGUSR2 is issued!
+ *
+ * Emergency termination is by SIGQUIT; like any backend, the checkpointer
+ * will simply abort and exit on SIGQUIT.
  *
  * If the checkpointer exits unexpectedly, the postmaster treats that the same
  * as a backend crash: shared memory may be corrupted, so remaining backends
 #include "storage/fd.h"
 #include "storage/ipc.h"
 #include "storage/lwlock.h"
+#include "storage/pmsignal.h"
 #include "storage/proc.h"
 #include "storage/procsignal.h"
 #include "storage/shmem.h"
  * Private state
  */
 static bool ckpt_active = false;
+static volatile sig_atomic_t ShutdownXLOGPending = false;
 
 /* these values are valid when ckpt_active is true: */
 static pg_time_t ckpt_start_time;
 static bool CompactCheckpointerRequestQueue(void);
 static void UpdateSharedMemoryConfig(void);
 
+/* Signal handlers */
+static void ReqShutdownXLOG(SIGNAL_ARGS);
+
 
 /*
  * Main entry point for checkpointer process
     * tell us it's okay to shut down (via SIGUSR2).
     */
    pqsignal(SIGHUP, SignalHandlerForConfigReload);
-   pqsignal(SIGINT, SIG_IGN);
+   pqsignal(SIGINT, ReqShutdownXLOG);
    pqsignal(SIGTERM, SIG_IGN); /* ignore SIGTERM */
    /* SIGQUIT handler was already set up by InitPostmasterChild */
    pqsignal(SIGALRM, SIG_IGN);
     * process during a normal shutdown, and since checkpointer is shut down
     * very late...
     *
-    * Walsenders are shut down after the checkpointer, but currently don't
-    * report stats. If that changes, we need a more complicated solution.
+    * While e.g. walsenders are active after the shutdown checkpoint has been
+    * written (and thus could produce more stats), checkpointer stays around
+    * after the shutdown checkpoint has been written. postmaster will only
+    * signal checkpointer to exit after all processes that could emit stats
+    * have been shut down.
     */
    before_shmem_exit(pgstat_before_server_shutdown, 0);
 
    ProcGlobal->checkpointerProc = MyProcNumber;
 
    /*
-    * Loop forever
+    * Loop until we've been asked to write the shutdown checkpoint or
+    * terminate.
     */
    for (;;)
    {
         * Process any requests or signals received recently.
         */
        AbsorbSyncRequests();
+
        HandleCheckpointerInterrupts();
+       if (ShutdownXLOGPending || ShutdownRequestPending)
+           break;
 
        /*
         * Detect a pending checkpoint request by checking whether the flags
 
            ckpt_active = false;
 
-           /* We may have received an interrupt during the checkpoint. */
+           /*
+            * We may have received an interrupt during the checkpoint and the
+            * latch might have been reset (e.g. in CheckpointWriteDelay).
+            */
            HandleCheckpointerInterrupts();
+           if (ShutdownXLOGPending || ShutdownRequestPending)
+               break;
        }
 
        /* Check for archive_timeout and switch xlog files if necessary. */
                         cur_timeout * 1000L /* convert to ms */ ,
                         WAIT_EVENT_CHECKPOINTER_MAIN);
    }
+
+   /*
+    * From here on, elog(ERROR) should end with exit(1), not send control
+    * back to the sigsetjmp block above.
+    */
+   ExitOnAnyError = true;
+
+   if (ShutdownXLOGPending)
+   {
+       /*
+        * Close down the database.
+        *
+        * Since ShutdownXLOG() creates restartpoint or checkpoint, and
+        * updates the statistics, increment the checkpoint request and flush
+        * out pending statistic.
+        */
+       PendingCheckpointerStats.num_requested++;
+       ShutdownXLOG(0, 0);
+       pgstat_report_checkpointer();
+       pgstat_report_wal(true);
+
+       /*
+        * Tell postmaster that we're done.
+        */
+       SendPostmasterSignal(PMSIGNAL_XLOG_IS_SHUTDOWN);
+       ShutdownXLOGPending = false;
+   }
+
+   /*
+    * Wait until we're asked to shut down. By separating the writing of the
+    * shutdown checkpoint from checkpointer exiting, checkpointer can perform
+    * some should-be-as-late-as-possible work like writing out stats.
+    */
+   for (;;)
+   {
+       /* Clear any already-pending wakeups */
+       ResetLatch(MyLatch);
+
+       HandleCheckpointerInterrupts();
+
+       if (ShutdownRequestPending)
+           break;
+
+       (void) WaitLatch(MyLatch,
+                        WL_LATCH_SET | WL_EXIT_ON_PM_DEATH,
+                        0,
+                        WAIT_EVENT_CHECKPOINTER_SHUTDOWN);
+   }
+
+   /* Normal exit from the checkpointer is here */
+   proc_exit(0);               /* done */
 }
 
 /*
         */
        UpdateSharedMemoryConfig();
    }
-   if (ShutdownRequestPending)
-   {
-       /*
-        * From here on, elog(ERROR) should end with exit(1), not send control
-        * back to the sigsetjmp block above
-        */
-       ExitOnAnyError = true;
-
-       /*
-        * Close down the database.
-        *
-        * Since ShutdownXLOG() creates restartpoint or checkpoint, and
-        * updates the statistics, increment the checkpoint request and flush
-        * out pending statistic.
-        */
-       PendingCheckpointerStats.num_requested++;
-       ShutdownXLOG(0, 0);
-       pgstat_report_checkpointer();
-       pgstat_report_wal(true);
-
-       /* Normal exit from the checkpointer is here */
-       proc_exit(0);           /* done */
-   }
 
    /* Perform logging of memory contexts of this process */
    if (LogMemoryContextPending)
     * in which case we just try to catch up as quickly as possible.
     */
    if (!(flags & CHECKPOINT_IMMEDIATE) &&
+       !ShutdownXLOGPending &&
        !ShutdownRequestPending &&
        !ImmediateCheckpointRequested() &&
        IsCheckpointOnSchedule(progress))
 }
 
 
+/* --------------------------------
+ *     signal handler routines
+ * --------------------------------
+ */
+
+/* SIGINT: set flag to trigger writing of shutdown checkpoint */
+static void
+ReqShutdownXLOG(SIGNAL_ARGS)
+{
+   ShutdownXLOGPending = true;
+   SetLatch(MyLatch);
+}
+
+
 /* --------------------------------
  *     communication with backends
  * --------------------------------
 
                                 * ckpt */
    PM_WAIT_XLOG_ARCHIVAL,      /* waiting for archiver and walsenders to
                                 * finish */
+   PM_WAIT_CHECKPOINTER,       /* waiting for checkpointer to shut down */
    PM_WAIT_DEAD_END,           /* waiting for dead-end children to exit */
    PM_NO_CHILDREN,             /* all important children have exited */
 } PMState;
        {
            ReleasePostmasterChildSlot(CheckpointerPMChild);
            CheckpointerPMChild = NULL;
-           if (EXIT_STATUS_0(exitstatus) && pmState == PM_WAIT_XLOG_SHUTDOWN)
+           if (EXIT_STATUS_0(exitstatus) && pmState == PM_WAIT_CHECKPOINTER)
            {
                /*
                 * OK, we saw normal exit of the checkpointer after it's been
-                * told to shut down.  We expect that it wrote a shutdown
-                * checkpoint.  (If for some reason it didn't, recovery will
-                * occur on next postmaster start.)
+                * told to shut down.  We know checkpointer wrote a shutdown
+                * checkpoint, otherwise we'd still be in
+                * PM_WAIT_XLOG_SHUTDOWN state.
                 *
-                * At this point we should have no normal backend children
-                * left (else we'd not be in PM_WAIT_XLOG_SHUTDOWN state) but
-                * we might have dead-end children to wait for.
-                *
-                * If we have an archiver subprocess, tell it to do a last
-                * archive cycle and quit. Likewise, if we have walsender
-                * processes, tell them to send any remaining WAL and quit.
+                * At this point only dead-end children and logger should be
+                * left.
                 */
-               Assert(Shutdown > NoShutdown);
-
-               /* Waken archiver for the last time */
-               if (PgArchPMChild != NULL)
-                   signal_child(PgArchPMChild, SIGUSR2);
-
-               /*
-                * Waken walsenders for the last time. No regular backends
-                * should be around anymore.
-                */
-               SignalChildren(SIGUSR2, btmask(B_WAL_SENDER));
-
-               UpdatePMState(PM_WAIT_XLOG_ARCHIVAL);
+               UpdatePMState(PM_WAIT_DEAD_END);
+               ConfigurePostmasterWaitSet(false);
+               SignalChildren(SIGTERM, btmask_all_except(B_LOGGER));
            }
            else
            {
 
        case PM_WAIT_XLOG_SHUTDOWN:
        case PM_WAIT_XLOG_ARCHIVAL:
+       case PM_WAIT_CHECKPOINTER:
 
            /*
             * NB: Similar code exists in PostmasterStateMachine()'s handling
                /* Start the checkpointer if not running */
                if (CheckpointerPMChild == NULL)
                    CheckpointerPMChild = StartChildProcess(B_CHECKPOINTER);
-               /* And tell it to shut down */
+               /* And tell it to write the shutdown checkpoint */
                if (CheckpointerPMChild != NULL)
                {
-                   signal_child(CheckpointerPMChild, SIGUSR2);
+                   signal_child(CheckpointerPMChild, SIGINT);
                    UpdatePMState(PM_WAIT_XLOG_SHUTDOWN);
                }
                else
        }
    }
 
+   /*
+    * The state transition from PM_WAIT_XLOG_SHUTDOWN to
+    * PM_WAIT_XLOG_ARCHIVAL is in process_pm_pmsignal(), in response to
+    * PMSIGNAL_XLOG_IS_SHUTDOWN.
+    */
+
    if (pmState == PM_WAIT_XLOG_ARCHIVAL)
    {
        /*
-        * PM_WAIT_XLOG_ARCHIVAL state ends when there's no other children
-        * than dead-end children left. There shouldn't be any regular
-        * backends left by now anyway; what we're really waiting for is
-        * walsenders and archiver.
+        * PM_WAIT_XLOG_ARCHIVAL state ends when there are no children other
+        * than checkpointer, dead-end children and logger left. There
+        * shouldn't be any regular backends left by now anyway; what we're
+        * really waiting for is for walsenders and archiver to exit.
         */
-       if (CountChildren(btmask_all_except(B_LOGGER, B_DEAD_END_BACKEND)) == 0)
+       if (CountChildren(btmask_all_except(B_CHECKPOINTER, B_LOGGER, B_DEAD_END_BACKEND)) == 0)
        {
-           UpdatePMState(PM_WAIT_DEAD_END);
-           ConfigurePostmasterWaitSet(false);
-           SignalChildren(SIGTERM, btmask_all_except(B_LOGGER));
+           UpdatePMState(PM_WAIT_CHECKPOINTER);
+
+           /*
+            * Now that the processes mentioned above are gone, tell
+            * checkpointer to shut down too. That allows checkpointer to
+            * perform some last bits of cleanup without other processes
+            * interfering.
+            */
+           if (CheckpointerPMChild != NULL)
+               signal_child(CheckpointerPMChild, SIGUSR2);
        }
    }
 
+   /*
+    * The state transition from PM_WAIT_CHECKPOINTER to PM_WAIT_DEAD_END is
+    * in process_pm_child_exit().
+    */
+
    if (pmState == PM_WAIT_DEAD_END)
    {
        /*
            PM_TOSTR_CASE(PM_WAIT_XLOG_SHUTDOWN);
            PM_TOSTR_CASE(PM_WAIT_XLOG_ARCHIVAL);
            PM_TOSTR_CASE(PM_WAIT_DEAD_END);
+           PM_TOSTR_CASE(PM_WAIT_CHECKPOINTER);
            PM_TOSTR_CASE(PM_NO_CHILDREN);
    }
 #undef PM_TOSTR_CASE
 static void
 process_pm_pmsignal(void)
 {
+   bool        request_state_update = false;
+
    pending_pm_pmsignal = false;
 
    ereport(DEBUG2,
        WalReceiverRequested = true;
    }
 
+   if (CheckPostmasterSignal(PMSIGNAL_XLOG_IS_SHUTDOWN))
+   {
+       /* Checkpointer completed the shutdown checkpoint */
+       if (pmState == PM_WAIT_XLOG_SHUTDOWN)
+       {
+           /*
+            * If we have an archiver subprocess, tell it to do a last archive
+            * cycle and quit. Likewise, if we have walsender processes, tell
+            * them to send any remaining WAL and quit.
+            */
+           Assert(Shutdown > NoShutdown);
+
+           /* Waken archiver for the last time */
+           if (PgArchPMChild != NULL)
+               signal_child(PgArchPMChild, SIGUSR2);
+
+           /*
+            * Waken walsenders for the last time. No regular backends should
+            * be around anymore.
+            */
+           SignalChildren(SIGUSR2, btmask(B_WAL_SENDER));
+
+           UpdatePMState(PM_WAIT_XLOG_ARCHIVAL);
+       }
+       else if (!FatalError && Shutdown != ImmediateShutdown)
+       {
+           /*
+            * Checkpointer only ought to perform the shutdown checkpoint
+            * during shutdown.  If somehow checkpointer did so in another
+            * situation, we have no choice but to crash-restart.
+            *
+            * It's possible however that we get PMSIGNAL_XLOG_IS_SHUTDOWN
+            * outside of PM_WAIT_XLOG_SHUTDOWN if an orderly shutdown was
+            * "interrupted" by a crash or an immediate shutdown.
+            */
+           ereport(LOG,
+                   (errmsg("WAL was shut down unexpectedly")));
+
+           /*
+            * Doesn't seem likely to help to take send_abort_for_crash into
+            * account here.
+            */
+           HandleFatalError(PMQUIT_FOR_CRASH, false);
+       }
+
+       /*
+        * Need to run PostmasterStateMachine() to check if we already can go
+        * to the next state.
+        */
+       request_state_update = true;
+   }
+
    /*
     * Try to advance postmaster's state machine, if a child requests it.
-    *
+    */
+   if (CheckPostmasterSignal(PMSIGNAL_ADVANCE_STATE_MACHINE))
+   {
+       request_state_update = true;
+   }
+
+   /*
     * Be careful about the order of this action relative to this function's
     * other actions.  Generally, this should be after other actions, in case
     * they have effects PostmasterStateMachine would need to know about.
     * cannot have any (immediate) effect on the state machine, but does
     * depend on what state we're in now.
     */
-   if (CheckPostmasterSignal(PMSIGNAL_ADVANCE_STATE_MACHINE))
+   if (request_state_update)
    {
        PostmasterStateMachine();
    }
    switch (pmState)
    {
        case PM_NO_CHILDREN:
+       case PM_WAIT_CHECKPOINTER:
        case PM_WAIT_DEAD_END:
        case PM_WAIT_XLOG_ARCHIVAL:
        case PM_WAIT_XLOG_SHUTDOWN:
 
 BGWRITER_HIBERNATE "Waiting in background writer process, hibernating."
 BGWRITER_MAIN  "Waiting in main loop of background writer process."
 CHECKPOINTER_MAIN  "Waiting in main loop of checkpointer process."
+CHECKPOINTER_SHUTDOWN  "Waiting for checkpointer process to be terminated."
 LOGICAL_APPLY_MAIN "Waiting in main loop of logical replication apply process."
 LOGICAL_LAUNCHER_MAIN  "Waiting in main loop of logical replication launcher process."
 LOGICAL_PARALLEL_APPLY_MAIN    "Waiting in main loop of logical replication parallel apply process."
 
    PMSIGNAL_BACKGROUND_WORKER_CHANGE,  /* background worker state change */
    PMSIGNAL_START_WALRECEIVER, /* start a walreceiver */
    PMSIGNAL_ADVANCE_STATE_MACHINE, /* advance postmaster's state machine */
+   PMSIGNAL_XLOG_IS_SHUTDOWN,  /* ShutdownXLOG() completed */
 } PMSignalReason;
 
-#define NUM_PMSIGNALS (PMSIGNAL_ADVANCE_STATE_MACHINE+1)
+#define NUM_PMSIGNALS (PMSIGNAL_XLOG_IS_SHUTDOWN+1)
 
 /*
  * Reasons why the postmaster would send SIGQUIT to its children.