From 5bf2280f7ef62315b4edaecd81ec64b96f1464fb Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 5 Feb 2009 13:21:20 +0200 Subject: [PATCH] Bring back startup checkpoints. Plus some other small changes --- src/backend/access/transam/xlog.c | 200 ++++++++---------------------- src/backend/postmaster/bgwriter.c | 13 -- src/include/access/xlog.h | 11 +- src/include/catalog/pg_control.h | 2 +- 4 files changed, 55 insertions(+), 171 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 961bcf3c43..0f647de5aa 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -422,7 +422,7 @@ static XLogRecPtr EndRecPtr; /* end+1 of last record read. Also in shared mem */ static XLogRecord *nextRecord = NULL; static TimeLineID lastPageTLI = 0; static XLogRecPtr minRecoveryPoint; /* local copy of ControlFile->minRecoveryPoint */ -static bool updateMinRecoveryPoint = true; +static bool updateMinRecoveryPoint = true; static bool InRedo = false; @@ -440,7 +440,6 @@ static void XLogArchiveCleanup(const char *xlog); static void readRecoveryCommandFile(void); static void exitArchiveRecovery(TimeLineID endTLI, uint32 endLogId, uint32 endLogSeg); -static void exitRecovery(void); static bool recoveryStopsHere(XLogRecord *record, bool *includeThis); static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags); @@ -526,10 +525,9 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata) bool updrqst; bool doPageWrites; bool isLogSwitch = (rmid == RM_XLOG_ID && info == XLOG_SWITCH); - bool isRecoveryEnd = (rmid == RM_XLOG_ID && info == XLOG_RECOVERY_END); /* cross-check on whether we should be here or not */ - if (IsRecoveryProcessingMode() && !isRecoveryEnd) + if (IsRecoveryProcessingMode()) elog(FATAL, "cannot make new WAL entries during recovery"); /* info's high bits are reserved for use by me */ @@ -1826,7 +1824,10 @@ XLogFlush(XLogRecPtr record) XLogRecPtr WriteRqstPtr; XLogwrtRqst WriteRqst; - /* During REDO, we don't try to flush the WAL, but update minRecoveryPoint instead */ + /* + * During REDO, we don't try to flush the WAL, but update minRecoveryPoint + * instead. + */ if (IsRecoveryProcessingMode()) { UpdateMinRecoveryPoint(record); @@ -1930,7 +1931,7 @@ XLogFlush(XLogRecPtr record) * and so we will not force a restart for a bad LSN on a data page. */ if (XLByteLT(LogwrtResult.Flush, record)) - elog(ERROR, + elog(InRecovery ? WARNING : ERROR, "xlog flush request %X/%X is not satisfied --- flushed only to %X/%X", record.xlogid, record.xrecoff, LogwrtResult.Flush.xlogid, LogwrtResult.Flush.xrecoff); @@ -2508,6 +2509,7 @@ XLogFileRead(uint32 log, uint32 seg, int emode) snprintf(activitymsg, sizeof(activitymsg), "recovering %s", xlogfname); set_ps_display(activitymsg, false); + return fd; } if (errno != ENOENT) /* unexpected failure? */ @@ -2788,7 +2790,7 @@ RestoreArchivedFile(char *path, const char *xlogfname, */ if (shutdown_requested && InRedo) { - /* XXX: We should update minRecoveryPoint to the exact value here */ + /* XXX: Is EndRecPtr always the right value? */ UpdateMinRecoveryPoint(EndRecPtr); proc_exit(0); } @@ -4835,13 +4837,15 @@ exitArchiveRecovery(TimeLineID endTLI, uint32 endLogId, uint32 endLogSeg) unlink(recoveryPath); /* ignore any error */ /* - * As of 8.4 we no longer rename the recovery.conf file out of the - * way until after we have performed a full checkpoint. This ensures - * that any crash between now and the end of the checkpoint does not - * attempt to restart from a WAL file that is no longer available to us. - * As soon as we remove recovery.conf we lose our recovery_command and - * cannot reaccess WAL files from the archive. + * Rename the config file out of the way, so that we don't accidentally + * re-enter archive recovery mode in a subsequent crash. */ + unlink(RECOVERY_COMMAND_DONE); + if (rename(RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE) != 0) + ereport(FATAL, + (errcode_for_file_access(), + errmsg("could not rename file \"%s\" to \"%s\": %m", + RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE))); ereport(LOG, (errmsg("archive recovery complete"))); @@ -4977,7 +4981,6 @@ StartupXLOG(void) bool wasShutdown; bool reachedStopPoint = false; bool reachedMinRecoveryPoint = false; - bool performedRecovery = false; bool haveBackupLabel = false; XLogRecPtr RecPtr, LastRec, @@ -5331,24 +5334,14 @@ StartupXLOG(void) { /* * We were requested to exit without finishing recovery. - * - * XXX: We should update minRecoveryPoint to the exact - * value here. */ - UpdateMinRecoveryPoint(EndRecPtr); + UpdateMinRecoveryPoint(ReadRecPtr); proc_exit(0); } /* * Have we reached our safe starting point? If so, we can - * signal postmaster to enter consistent recovery mode. - * XXX - * There are two points in the log we must pass. The first is - * the minRecoveryPoint, which is the LSN at the time the - * base backup was taken that we are about to rollfoward from. - * If recovery has ever crashed or was stopped there is - * another point also: minSafeStartPoint, which is the - * latest LSN that recovery could have reached prior to crash. + * tell postmaster that the database is consistent now. */ if (!reachedMinRecoveryPoint && XLByteLE(minRecoveryPoint, EndRecPtr)) @@ -5437,7 +5430,7 @@ StartupXLOG(void) * Complain if we did not roll forward far enough to render the backup * dump consistent. */ - if (InRecovery && !reachedMinRecoveryPoint) + if (InRecovery && XLByteLT(EndOfLog, minRecoveryPoint)) { if (reachedStopPoint) /* stopped because of stop request */ ereport(FATAL, @@ -5539,6 +5532,12 @@ StartupXLOG(void) /* Pre-scan prepared transactions to find out the range of XIDs present */ oldestActiveXID = PrescanPreparedTransactions(); + /* + * Allow writing WAL for us. But not for other backends! That's done + * after writing the shutdown checkpoint and finishing recovery. + */ + LocalRecoveryProcessingMode = false; + if (InRecovery) { int rmid; @@ -5559,14 +5558,30 @@ StartupXLOG(void) XLogCheckInvalidPages(); /* - * Finally exit recovery and mark that in WAL. Pre-8.4 we wrote - * a shutdown checkpoint here, but we ask bgwriter to do that now. + * Perform a checkpoint to update all our recovery activity to disk. + * + * Note that we write a shutdown checkpoint rather than an on-line + * one. This is not particularly critical, but since we may be + * assigning a new TLI, using a shutdown checkpoint allows us to have + * the rule that TLI only changes in shutdown checkpoints, which + * allows some extra error checking in xlog_redo. */ - exitRecovery(); - - performedRecovery = true; + CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE); } + /* + * Preallocate additional log files, if wanted. + */ + PreallocXlogFiles(EndOfLog); + + InRecovery = false; + + LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); + ControlFile->state = DB_IN_PRODUCTION; + ControlFile->time = (pg_time_t) time(NULL); + UpdateControlFile(); + LWLockRelease(ControlFileLock); + /* start the archive_timeout timer running */ XLogCtl->Write.lastSegSwitchTime = (pg_time_t) time(NULL); @@ -5605,38 +5620,9 @@ StartupXLOG(void) } /* - * If we had to replay any WAL records, request a checkpoint. This isn't - * strictly necessary: if we crash now, the recovery will simply restart - * from the same point as this time (or from the last restartpoint). The - * control file is left in DB_IN_*_RECOVERY state; the first checkpoint - * will change that to DB_IN_PRODUCTION. + * All done. Allow others to write WAL. */ - if (performedRecovery) - { - /* - * Okay, we can come up now. Allow others to write WAL. - */ - XLogCtl->SharedRecoveryProcessingMode = false; - - RequestCheckpoint(CHECKPOINT_FORCE | CHECKPOINT_IMMEDIATE | - CHECKPOINT_STARTUP); - } - else - { - /* - * No recovery, so let's just get on with it. - */ - LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); - ControlFile->state = DB_IN_PRODUCTION; - ControlFile->time = (pg_time_t) time(NULL); - UpdateControlFile(); - LWLockRelease(ControlFileLock); - - /* - * Okay, we're officially UP. - */ - XLogCtl->SharedRecoveryProcessingMode = false; - } + XLogCtl->SharedRecoveryProcessingMode = false; } /* @@ -5946,7 +5932,6 @@ LogCheckpointStart(int flags, bool restartpoint) elog(LOG, msg, (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "", - (flags & CHECKPOINT_STARTUP) ? " startup" : "", (flags & CHECKPOINT_IMMEDIATE) ? " immediate" : "", (flags & CHECKPOINT_FORCE) ? " force" : "", (flags & CHECKPOINT_WAIT) ? " wait" : "", @@ -6030,7 +6015,6 @@ CreateCheckPoint(int flags) uint32 _logSeg; TransactionId *inCommitXids; int nInCommit; - bool leavingArchiveRecovery; /* shouldn't happen */ if (IsRecoveryProcessingMode()) @@ -6044,13 +6028,6 @@ CreateCheckPoint(int flags) */ LWLockAcquire(CheckpointLock, LW_EXCLUSIVE); - /* - * Find out if this is the first checkpoint after archive recovery. - */ - LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); - leavingArchiveRecovery = (ControlFile->state == DB_IN_ARCHIVE_RECOVERY); - LWLockRelease(ControlFileLock); - /* * Prepare to accumulate statistics. * @@ -6284,10 +6261,6 @@ CreateCheckPoint(int flags) * if this is the first checkpoint after recovery. */ LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); - if (shutdown) - ControlFile->state = DB_SHUTDOWNED; - else - ControlFile->state = DB_IN_PRODUCTION; ControlFile->prevCheckPoint = ControlFile->checkPoint; ControlFile->checkPoint = ProcLastRecPtr; ControlFile->checkPointCopy = checkPoint; @@ -6295,21 +6268,6 @@ CreateCheckPoint(int flags) UpdateControlFile(); LWLockRelease(ControlFileLock); - if (leavingArchiveRecovery) - { - /* - * Rename the config file out of the way, so that we don't accidentally - * re-enter archive recovery mode in a subsequent crash. Prior to - * 8.4 this step was performed at end of exitArchiveRecovery(). - */ - unlink(RECOVERY_COMMAND_DONE); - if (rename(RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE) != 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not rename file \"%s\" to \"%s\": %m", - RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE))); - } - /* Update shared-memory copy of checkpoint XID/epoch */ { /* use volatile pointer to prevent code rearrangement */ @@ -6588,39 +6546,6 @@ RequestXLogSwitch(void) return RecPtr; } -/* - * exitRecovery() - * - * Exit recovery state and write a XLOG_RECOVERY_END record. This is the - * only record type that can record a change of timelineID. We assume - * caller has already set ThisTimeLineID, if appropriate. - */ -static void -exitRecovery(void) -{ - XLogRecData rdata; - - rdata.buffer = InvalidBuffer; - rdata.data = (char *) (&ThisTimeLineID); - rdata.len = sizeof(TimeLineID); - rdata.next = NULL; - - /* - * This is the only type of WAL message that can be inserted during - * recovery. This ensures that we don't allow others to get access - * until after we have changed state. - */ - (void) XLogInsert(RM_XLOG_ID, XLOG_RECOVERY_END, &rdata); - - /* - * We don't XLogFlush() here otherwise we'll end up zeroing the WAL - * file ourselves. So just let bgwriter's forthcoming checkpoint do - * that for us. - */ - - InRecovery = false; -} - /* * XLOG resource manager's routines * @@ -6669,33 +6594,6 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record) RecoveryRestartPoint(&checkPoint); } - else if (info == XLOG_RECOVERY_END) - { - TimeLineID tli; - - memcpy(&tli, XLogRecGetData(record), sizeof(TimeLineID)); - - /* - * TLI may change when recovery ends, but it shouldn't decrease. - * - * This is the only WAL record that can tell us to change timelineID - * while we process WAL records. - * - * We can *choose* to stop recovery at any point, generating a - * new timelineID which is recorded using this record type. - */ - if (tli != ThisTimeLineID) - { - if (tli < ThisTimeLineID || - !list_member_int(expectedTLIs, - (int) tli)) - ereport(PANIC, - (errmsg("unexpected timeline ID %u (after %u) at recovery end record", - tli, ThisTimeLineID))); - /* Following WAL records should be run with new TLI */ - ThisTimeLineID = tli; - } - } else if (info == XLOG_CHECKPOINT_ONLINE) { CheckPoint checkPoint; diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c index 4c8c54c587..d38e0c6452 100644 --- a/src/backend/postmaster/bgwriter.c +++ b/src/backend/postmaster/bgwriter.c @@ -889,7 +889,6 @@ BgWriterShmemInit(void) * * flags is a bitwise OR of the following: * CHECKPOINT_IS_SHUTDOWN: checkpoint is for database shutdown. - * CHECKPOINT_IS_STARTUP: checkpoint is for database startup. * CHECKPOINT_IMMEDIATE: finish the checkpoint ASAP, * ignoring checkpoint_completion_target parameter. * CHECKPOINT_FORCE: force a checkpoint even if no XLOG activity has occured @@ -956,18 +955,6 @@ RequestCheckpoint(int flags) { if (BgWriterShmem->bgwriter_pid == 0) { - /* - * The only difference between a startup checkpoint and a normal - * online checkpoint is that it's quite normal for the bgwriter - * to not be up yet when the startup checkpoint is requested. - * (it might be, though). That's ok, background writer will - * perform the checkpoint as soon as it starts up. - */ - if (flags & CHECKPOINT_STARTUP) - { - Assert(!(flags & CHECKPOINT_WAIT)); - break; - } if (ntries >= 20) /* max wait 2.0 sec */ { elog((flags & CHECKPOINT_WAIT) ? ERROR : LOG, diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index b97a6afbf0..2a9ed7078e 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -168,14 +168,13 @@ extern bool XLOG_DEBUG; /* These directly affect the behavior of CreateCheckPoint and subsidiaries */ #define CHECKPOINT_IS_SHUTDOWN 0x0001 /* Checkpoint is for shutdown */ -#define CHECKPOINT_IS_STARTUP 0x0002 /* Startup checkpoint */ -#define CHECKPOINT_IMMEDIATE 0x0003 /* Do it without delays */ -#define CHECKPOINT_FORCE 0x0008 /* Force even if no activity */ +#define CHECKPOINT_IMMEDIATE 0x0002 /* Do it without delays */ +#define CHECKPOINT_FORCE 0x0004 /* Force even if no activity */ /* These are important to RequestCheckpoint */ -#define CHECKPOINT_WAIT 0x0010 /* Wait for completion */ +#define CHECKPOINT_WAIT 0x0008 /* Wait for completion */ /* These indicate the cause of a checkpoint request */ -#define CHECKPOINT_CAUSE_XLOG 0x0020 /* XLOG consumption */ -#define CHECKPOINT_CAUSE_TIME 0x0040 /* Elapsed time */ +#define CHECKPOINT_CAUSE_XLOG 0x0010 /* XLOG consumption */ +#define CHECKPOINT_CAUSE_TIME 0x0020 /* Elapsed time */ /* Checkpoint statistics */ typedef struct CheckpointStatsData diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index 275fc1dddf..400f32c749 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -46,7 +46,7 @@ typedef struct CheckPoint #define XLOG_NOOP 0x20 #define XLOG_NEXTOID 0x30 #define XLOG_SWITCH 0x40 -#define XLOG_RECOVERY_END 0x50 + /* System status indicator */ typedef enum DBState -- 2.39.5