Revert refactoring of restore command code to shell_restore.c
authorMichael Paquier <michael@paquier.xyz>
Sun, 5 Feb 2023 23:28:42 +0000 (08:28 +0900)
committerMichael Paquier <michael@paquier.xyz>
Sun, 5 Feb 2023 23:28:42 +0000 (08:28 +0900)
This reverts commits 24c35ec and 57169ad.  PreRestoreCommand() and
PostRestoreCommand() need to be put closer to the system() call calling
a restore_command, as they enable in_restore_command for the startup
process which would in turn trigger an immediate proc_exit() in the
SIGTERM handler.  Perhaps we could get rid of this behavior entirely,
but 24c35ec has made the window where the flag is enabled much larger
than it was, and any Postgres-like actions (palloc, etc.) taken by code
paths while the flag is enabled could lead to more severe issues in the
shutdown processing.

Note that curculio has showed that there are much more problems in this
area, unrelated to this change, actually, hence the issues related to
that had better be addressed first.  Keeping the code of HEAD in line
with the stable branches should make that a bit easier.

Per discussion with Andres Freund and Nathan Bossart.

Discussion: https://postgr.es/m/Y979NR3U5VnWrTwB@paquier.xyz

12 files changed:
src/backend/access/transam/Makefile
src/backend/access/transam/meson.build
src/backend/access/transam/shell_restore.c [deleted file]
src/backend/access/transam/xlog.c
src/backend/access/transam/xlogarchive.c
src/common/Makefile
src/common/archive.c [new file with mode: 0644]
src/common/meson.build
src/fe_utils/archive.c
src/include/access/xlogarchive.h
src/include/common/archive.h [new file with mode: 0644]
src/tools/msvc/Mkvcbuild.pm

index 099c315d0327b8a539fb273f940a9c86c16aa7ec..661c55a9db789760b8ec95e22b1d02922d3a2671 100644 (file)
@@ -19,7 +19,6 @@ OBJS = \
        multixact.o \
        parallel.o \
        rmgr.o \
-       shell_restore.o \
        slru.o \
        subtrans.o \
        timeline.o \
index 3031c2f6cfbbaefdf08f068d523cb8d78d011759..8920c1bfce21fbeaa09ab3b70b00cc9219a53070 100644 (file)
@@ -7,7 +7,6 @@ backend_sources += files(
   'multixact.c',
   'parallel.c',
   'rmgr.c',
-  'shell_restore.c',
   'slru.c',
   'subtrans.c',
   'timeline.c',
diff --git a/src/backend/access/transam/shell_restore.c b/src/backend/access/transam/shell_restore.c
deleted file mode 100644 (file)
index 8458209..0000000
+++ /dev/null
@@ -1,171 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * shell_restore.c
- *             Recovery functions for a user-specified shell command.
- *
- * These recovery functions use a user-specified shell command (e.g. based
- * on the GUC restore_command).
- *
- * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * src/backend/access/transam/shell_restore.c
- *
- *-------------------------------------------------------------------------
- */
-
-#include "postgres.h"
-
-#include <signal.h>
-
-#include "access/xlogarchive.h"
-#include "access/xlogrecovery.h"
-#include "common/percentrepl.h"
-#include "storage/ipc.h"
-#include "utils/wait_event.h"
-
-static bool ExecuteRecoveryCommand(const char *command,
-                                                                  const char *commandName,
-                                                                  bool failOnSignal,
-                                                                  bool exitOnSigterm,
-                                                                  uint32 wait_event_info,
-                                                                  int fail_elevel);
-
-/*
- * Attempt to execute a shell-based restore command.
- *
- * Returns true if the command has succeeded, false otherwise.
- */
-bool
-shell_restore(const char *file, const char *path,
-                         const char *lastRestartPointFileName)
-{
-       char       *nativePath = pstrdup(path);
-       char       *cmd;
-       bool            ret;
-
-       /* Build the restore command to execute */
-       make_native_path(nativePath);
-       cmd = replace_percent_placeholders(recoveryRestoreCommand,
-                                                                          "restore_command", "frp", file,
-                                                                          lastRestartPointFileName,
-                                                                          nativePath);
-       pfree(nativePath);
-
-       /*
-        * Remember, we rollforward UNTIL the restore fails so failure here is
-        * just part of the process... that makes it difficult to determine
-        * whether the restore failed because there isn't an archive to restore,
-        * or because the administrator has specified the restore program
-        * incorrectly.  We have to assume the former.
-        *
-        * However, if the failure was due to any sort of signal, it's best to
-        * punt and abort recovery.  (If we "return false" here, upper levels will
-        * assume that recovery is complete and start up the database!) It's
-        * essential to abort on child SIGINT and SIGQUIT, because per spec
-        * system() ignores SIGINT and SIGQUIT while waiting; if we see one of
-        * those it's a good bet we should have gotten it too.
-        *
-        * On SIGTERM, assume we have received a fast shutdown request, and exit
-        * cleanly. It's pure chance whether we receive the SIGTERM first, or the
-        * child process. If we receive it first, the signal handler will call
-        * proc_exit, otherwise we do it here. If we or the child process received
-        * SIGTERM for any other reason than a fast shutdown request, postmaster
-        * will perform an immediate shutdown when it sees us exiting
-        * unexpectedly.
-        *
-        * We treat hard shell errors such as "command not found" as fatal, too.
-        */
-       ret = ExecuteRecoveryCommand(cmd, "restore_command",
-                                                                true,  /* failOnSignal */
-                                                                true,  /* exitOnSigterm */
-                                                                WAIT_EVENT_RESTORE_COMMAND, DEBUG2);
-       pfree(cmd);
-
-       return ret;
-}
-
-/*
- * Attempt to execute a shell-based archive cleanup command.
- */
-void
-shell_archive_cleanup(const char *lastRestartPointFileName)
-{
-       char       *cmd;
-
-       cmd = replace_percent_placeholders(archiveCleanupCommand,
-                                                                          "archive_cleanup_command",
-                                                                          "r", lastRestartPointFileName);
-       (void) ExecuteRecoveryCommand(cmd, "archive_cleanup_command", false, false,
-                                                                 WAIT_EVENT_ARCHIVE_CLEANUP_COMMAND, WARNING);
-       pfree(cmd);
-}
-
-/*
- * Attempt to execute a shell-based end-of-recovery command.
- */
-void
-shell_recovery_end(const char *lastRestartPointFileName)
-{
-       char       *cmd;
-
-       cmd = replace_percent_placeholders(recoveryEndCommand,
-                                                                          "recovery_end_command",
-                                                                          "r", lastRestartPointFileName);
-       (void) ExecuteRecoveryCommand(cmd, "recovery_end_command", true, false,
-                                                                 WAIT_EVENT_RECOVERY_END_COMMAND, WARNING);
-       pfree(cmd);
-}
-
-/*
- * Attempt to execute an external shell command during recovery.
- *
- * 'command' is the shell command to be executed, 'commandName' is a
- * human-readable name describing the command emitted in the logs. If
- * 'failOnSignal' is true and the command is killed by a signal, a FATAL
- * error is thrown. Otherwise, 'fail_elevel' is used for the log message.
- * If 'exitOnSigterm' is true and the command is killed by SIGTERM, we exit
- * immediately.
- *
- * Returns whether the command succeeded.
- */
-static bool
-ExecuteRecoveryCommand(const char *command, const char *commandName,
-                                          bool failOnSignal, bool exitOnSigterm,
-                                          uint32 wait_event_info, int fail_elevel)
-{
-       int                     rc;
-
-       Assert(command && commandName);
-
-       ereport(DEBUG3,
-                       (errmsg_internal("executing %s \"%s\"", commandName, command)));
-
-       /*
-        * execute the constructed command
-        */
-       fflush(NULL);
-       pgstat_report_wait_start(wait_event_info);
-       rc = system(command);
-       pgstat_report_wait_end();
-
-       if (rc != 0)
-       {
-               if (exitOnSigterm && wait_result_is_signal(rc, SIGTERM))
-                       proc_exit(1);
-
-               /*
-                * If the failure was due to any sort of signal, it's best to punt and
-                * abort recovery.  See comments in shell_restore().
-                */
-               ereport((failOnSignal && wait_result_is_any_signal(rc, true)) ? FATAL : fail_elevel,
-               /*------
-                  translator: First %s represents a postgresql.conf parameter name like
-                 "recovery_end_command", the 2nd is the value of that parameter, the
-                 third an already translated error message. */
-                               (errmsg("%s \"%s\": %s", commandName,
-                                               command, wait_result_to_str(rc))));
-       }
-
-       return (rc == 0);
-}
index fb4c860bdea035715a2ce2c69007533b951fa307..f9f0f6db8d1d1b500e0d6b426102921daa9b399a 100644 (file)
@@ -692,7 +692,6 @@ static char *GetXLogBuffer(XLogRecPtr ptr, TimeLineID tli);
 static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos);
 static XLogRecPtr XLogBytePosToEndRecPtr(uint64 bytepos);
 static uint64 XLogRecPtrToBytePos(XLogRecPtr ptr);
-static void GetOldestRestartPointFileName(char *fname);
 
 static void WALInsertLockAcquire(void);
 static void WALInsertLockAcquireExclusive(void);
@@ -4890,12 +4889,10 @@ CleanupAfterArchiveRecovery(TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog,
         * Execute the recovery_end_command, if any.
         */
        if (recoveryEndCommand && strcmp(recoveryEndCommand, "") != 0)
-       {
-               char            lastRestartPointFname[MAXFNAMELEN];
-
-               GetOldestRestartPointFileName(lastRestartPointFname);
-               shell_recovery_end(lastRestartPointFname);
-       }
+               ExecuteRecoveryCommand(recoveryEndCommand,
+                                                          "recovery_end_command",
+                                                          true,
+                                                          WAIT_EVENT_RECOVERY_END_COMMAND);
 
        /*
         * We switched to a new timeline. Clean up segments on the old timeline.
@@ -7312,12 +7309,10 @@ CreateRestartPoint(int flags)
         * Finally, execute archive_cleanup_command, if any.
         */
        if (archiveCleanupCommand && strcmp(archiveCleanupCommand, "") != 0)
-       {
-               char            lastRestartPointFname[MAXFNAMELEN];
-
-               GetOldestRestartPointFileName(lastRestartPointFname);
-               shell_archive_cleanup(lastRestartPointFname);
-       }
+               ExecuteRecoveryCommand(archiveCleanupCommand,
+                                                          "archive_cleanup_command",
+                                                          false,
+                                                          WAIT_EVENT_ARCHIVE_CLEANUP_COMMAND);
 
        return true;
 }
@@ -8894,22 +8889,6 @@ GetOldestRestartPoint(XLogRecPtr *oldrecptr, TimeLineID *oldtli)
        LWLockRelease(ControlFileLock);
 }
 
-/*
- * Returns the WAL file name for the last checkpoint or restartpoint.  This is
- * the oldest WAL file that we still need if we have to restart recovery.
- */
-static void
-GetOldestRestartPointFileName(char *fname)
-{
-       XLogRecPtr      restartRedoPtr;
-       TimeLineID      restartTli;
-       XLogSegNo       restartSegNo;
-
-       GetOldestRestartPoint(&restartRedoPtr, &restartTli);
-       XLByteToSeg(restartRedoPtr, restartSegNo, wal_segment_size);
-       XLogFileName(fname, restartTli, restartSegNo, wal_segment_size);
-}
-
 /* Thin wrapper around ShutdownWalRcv(). */
 void
 XLogShutdownWalRcv(void)
index 4b89addf9762d2d9b39de80909e297f4a6a8c572..fcc87ff44fd26f02a43fea0d98d48ca3c084aa07 100644 (file)
@@ -22,6 +22,8 @@
 #include "access/xlog.h"
 #include "access/xlog_internal.h"
 #include "access/xlogarchive.h"
+#include "common/archive.h"
+#include "common/percentrepl.h"
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "postmaster/startup.h"
@@ -55,8 +57,9 @@ RestoreArchivedFile(char *path, const char *xlogfname,
                                        bool cleanupEnabled)
 {
        char            xlogpath[MAXPGPATH];