From 6aad888295d1a6bebe53f2e2c2850bf3f8dc0d58 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Wed, 28 Jan 2009 10:04:35 +0200 Subject: [PATCH] Comment changes. Inline GetRedoLocationForCheckpoint into CreateCheckPoint again. --- src/backend/access/transam/xlog.c | 133 ++++++++++------------------ src/backend/postmaster/bgwriter.c | 30 ++++--- src/backend/postmaster/postmaster.c | 27 +++--- 3 files changed, 80 insertions(+), 110 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index b7a1e3504b..226e96953d 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -119,7 +119,15 @@ CheckpointStatsData CheckpointStats; */ TimeLineID ThisTimeLineID = 0; -/* Are we doing recovery from XLOG? */ +/* + * Are we doing recovery from XLOG? + * + * This is only ever true in the startup process, when it's replaying WAL. + * It's used in functions that need to act differently when called from a + * redo function (e.g skip WAL logging). To check whether the system is in + * recovery regardless of what process you're running in, use + * IsRecoveryProcessingMode(). + */ bool InRecovery = false; /* Are we recovering using offline XLOG archives? */ @@ -251,30 +259,10 @@ static XLogRecPtr RedoRecPtr; * ControlFileLock: must be held to read/update control file or create * new log file. * - * CheckpointLock: must be held to do a checkpoint or restartpoint, ensuring - * we get just one of those at any time. In 8.4+ recovery, both startup and - * bgwriter processes may take restartpoints, so this locking must be strict - * to ensure there are no mistakes. + * CheckpointLock: must be held to do a checkpoint (ensures only one + * checkpointer at a time; currently, with all checkpoints done by the + * bgwriter, this is just pro forma). * - * In 8.4 we progress through a number of states at startup. Initially, the - * postmaster is in PM_STARTUP state and spawns the Startup process. We then - * progress until the database is in a consistent state, then if we are in - * InArchiveRecovery we go into PM_RECOVERY state. The bgwriter then starts - * up and takes over responsibility for performing restartpoints. We then - * progress until the end of recovery when we enter PM_RUN state upon - * termination of the Startup process. In summary: - * - * PM_STARTUP state: Startup process performs restartpoints - * PM_RECOVERY state: bgwriter process performs restartpoints - * PM_RUN state: bgwriter process performs checkpoints - * - * These transitions are fairly delicate, with many things that need to - * happen at the same time in order to change state successfully throughout - * the system. Changing PM_STARTUP to PM_RECOVERY only occurs when we can - * prove the databases are in a consistent state. Changing from PM_RECOVERY - * to PM_RUN happens whenever recovery ends, which could be forced upon us - * externally or it can occur becasue of damage or termination of the WAL - * sequence. *---------- */ @@ -344,15 +332,7 @@ typedef struct XLogCtlData /* * SharedRecoveryProcessingMode indicates if we're still in crash or - * archive recovery. You should use IsRecoveryProcessingMode() instead - * of peeking at this variable directly, because it uses a cached value - * after we exit recovery. - * - * We also retain a local state variable InRecovery. InRecovery=true - * means the code is being executed by Startup process and therefore - * always during Recovery Processing Mode. This allows us to identify - * code executed *during* Recovery Processing Mode but not necessarily - * by Startup process itself. + * archive recovery. It's checked by IsRecoveryProcessingMode() */ bool SharedRecoveryProcessingMode; @@ -455,7 +435,6 @@ static void exitArchiveRecovery(TimeLineID endTLI, static void exitRecovery(void); static bool recoveryStopsHere(XLogRecord *record, bool *includeThis); static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags); -static XLogRecPtr GetRedoLocationForCheckpoint(void); static bool XLogCheckBuffer(XLogRecData *rdata, bool doPageWrites, XLogRecPtr *lsn, BkpBlock *bkpb); @@ -1789,6 +1768,7 @@ XLogFlush(XLogRecPtr record) XLogRecPtr WriteRqstPtr; XLogwrtRqst WriteRqst; + /* Disabled during REDO */ if (IsRecoveryProcessingMode()) return; @@ -5939,6 +5919,7 @@ CreateCheckPoint(int flags) XLogRecPtr recptr; XLogCtlInsert *Insert = &XLogCtl->Insert; XLogRecData rdata; + uint32 freespace; uint32 _logId; uint32 _logSeg; TransactionId *inCommitXids; @@ -5946,8 +5927,9 @@ CreateCheckPoint(int flags) /* * Acquire CheckpointLock to ensure only one checkpoint happens at a time. - * That shouldn't be happening, but checkpoints are an important aspect - * of our resilience, so we take no chances. + * (This is just pro forma, since in the present system structure there is + * only one process that is allowed to issue checkpoints at any given + * time.) */ LWLockAcquire(CheckpointLock, LW_EXCLUSIVE); @@ -6036,7 +6018,34 @@ CreateCheckPoint(int flags) * the buffer flush work. Those XLOG records are logically after the * checkpoint, even though physically before it. Got that? */ - checkPoint.redo = GetRedoLocationForCheckpoint(); + freespace = INSERT_FREESPACE(Insert); + if (freespace < SizeOfXLogRecord) + { + (void) AdvanceXLInsertBuffer(false); + /* OK to ignore update return flag, since we will do flush anyway */ + freespace = INSERT_FREESPACE(Insert); + } + INSERT_RECPTR(checkPoint.redo, Insert, Insert->curridx); + + /* + * Here we update the shared RedoRecPtr for future XLogInsert calls; this + * must be done while holding the insert lock AND the info_lck. + * + * Note: if we fail to complete the checkpoint, RedoRecPtr will be left + * pointing past where it really needs to point. This is okay; the only + * consequence is that XLogInsert might back up whole buffers that it + * didn't really need to. We can't postpone advancing RedoRecPtr because + * XLogInserts that happen while we are dumping buffers must assume that + * their buffer changes are not included in the checkpoint. + */ + { + /* use volatile pointer to prevent code rearrangement */ + volatile XLogCtlData *xlogctl = XLogCtl; + + SpinLockAcquire(&xlogctl->info_lck); + RedoRecPtr = xlogctl->Insert.RedoRecPtr = checkPoint.redo; + SpinLockRelease(&xlogctl->info_lck); + } /* * Now we can release WAL insert lock, allowing other xacts to proceed @@ -6161,18 +6170,15 @@ CreateCheckPoint(int flags) * that this is executed by bgwriter after the death of Startup process. */ LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); - if (shutdown) ControlFile->state = DB_SHUTDOWNED; else ControlFile->state = DB_IN_PRODUCTION; - ControlFile->prevCheckPoint = ControlFile->checkPoint; ControlFile->checkPoint = ProcLastRecPtr; ControlFile->checkPointCopy = checkPoint; ControlFile->time = (pg_time_t) time(NULL); UpdateControlFile(); - LWLockRelease(ControlFileLock); /* Update shared-memory copy of checkpoint XID/epoch */ @@ -6235,51 +6241,6 @@ CreateCheckPoint(int flags) LWLockRelease(CheckpointLock); } -/* - * GetRedoLocationForCheckpoint() - * - * When !IsRecoveryProcessingMode() this must be called while holding - * WALInsertLock. - */ -static XLogRecPtr -GetRedoLocationForCheckpoint() -{ - XLogCtlInsert *Insert = &XLogCtl->Insert; - uint32 freespace; - XLogRecPtr redo; - - freespace = INSERT_FREESPACE(Insert); - if (freespace < SizeOfXLogRecord) - { - (void) AdvanceXLInsertBuffer(false); - /* OK to ignore update return flag, since we will do flush anyway */ - freespace = INSERT_FREESPACE(Insert); - } - INSERT_RECPTR(redo, Insert, Insert->curridx); - - /* - * Here we update the shared RedoRecPtr for future XLogInsert calls; this - * must be done while holding the insert lock AND the info_lck. - * - * Note: if we fail to complete the checkpoint, RedoRecPtr will be left - * pointing past where it really needs to point. This is okay; the only - * consequence is that XLogInsert might back up whole buffers that it - * didn't really need to. We can't postpone advancing RedoRecPtr because - * XLogInserts that happen while we are dumping buffers must assume that - * their buffer changes are not included in the checkpoint. - */ - { - /* use volatile pointer to prevent code rearrangement */ - volatile XLogCtlData *xlogctl = XLogCtl; - - SpinLockAcquire(&xlogctl->info_lck); - RedoRecPtr = xlogctl->Insert.RedoRecPtr = redo; - SpinLockRelease(&xlogctl->info_lck); - } - - return redo; -} - /* * Flush all data in shared memory to disk, and fsync * @@ -6551,7 +6512,7 @@ exitRecovery(void) } /* - * XLOG resource manager's routines. + * XLOG resource manager's routines * * Definitions of message info are in include/catalog/pg_control.h, * though not all messages relate to control file processing. diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c index 8cbb7a3744..f9ab290823 100644 --- a/src/backend/postmaster/bgwriter.c +++ b/src/backend/postmaster/bgwriter.c @@ -362,10 +362,19 @@ BackgroundWriterMain(void) BgWriterRecoveryMode = IsRecoveryProcessingMode(); if (BgWriterRecoveryMode) - elog(DEBUG1, "bgwriter starting during recovery, pid = %u", - BgWriterShmem->bgwriter_pid); + elog(DEBUG1, "bgwriter starting during recovery"); + else + InitXLOGAccess(); - /* If someone requested a checkpoint before we started up, process that */ + /* + * If someone requested a checkpoint before we started up, process that. + * + * This check exists primarily for crash recovery: after the startup + * process is finished with WAL replay, it will request a checkpoint, but + * the background writer might not have started yet. This check will + * actually not notice a checkpoint that's been requested without any + * flags, but it's good enough for the startup checkpoint. + */ SpinLockAcquire(&bgs->ckpt_lck); if (bgs->ckpt_flags) checkpoint_requested = true; @@ -417,6 +426,13 @@ BackgroundWriterMain(void) /* Normal exit from the bgwriter is here */ proc_exit(0); /* done */ } + if (BgWriterRecoveryMode && !IsRecoveryProcessingMode()) + { + elog(DEBUG1, "bgwriter changing from recovery to normal mode"); + + InitXLOGAccess(); + BgWriterRecoveryMode = false; + } /* * Force a checkpoint if too much time has elapsed since the last one. @@ -434,14 +450,6 @@ BackgroundWriterMain(void) flags |= CHECKPOINT_CAUSE_TIME; } - if (BgWriterRecoveryMode && !IsRecoveryProcessingMode()) - { - elog(DEBUG2, "bgwriter changing from recovery to normal mode"); - - InitXLOGAccess(); - BgWriterRecoveryMode = false; - } - /* * Do a checkpoint if requested, otherwise do one cycle of * dirty-buffer writing. diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 0c0e5e4507..221c9b2aac 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -228,17 +228,18 @@ static bool FatalError = false; /* T if recovering from backend crash */ /* * We use a simple state machine to control startup, shutdown, and - * crash recovery (which is rather like shutdown followed by startup). + * recovery. * - * Recovery is split into two phases: crash recovery and archive recovery. - * The startup process begins with crash recovery, replaying WAL until - * a self-consistent database state is reached. At that point, it signals - * postmaster, and we switch to archive recovery phase. The background - * writer is launched, and we can start accepting connections to perform - * read-only queries, while the startup process continues applying WAL. - * When the startup process exits, we switch to PM_RUN state. The startup - * process can also skip the archive recovery altogether, as it will during - * normal startup when there's no recovery to be done, for example. + * Recovery is split into two phases: crash recovery and consistent (archive) + * recovery. The startup process begins with crash recovery, replaying WAL + * until a self-consistent database state is reached. At that point, it + * signals postmaster, and we switch to consistent recovery phase. The + * background writer is launched, while the startup process continues + * applying WAL. We could start accepting connections to perform read-only + * queries at this point, if we had the infrastructure to do that. When the + * startup process exits, we switch to PM_RUN state. The startup process can + * also skip the consistent recovery altogether, as it will during normal + * startup when there's no recovery to be done, for example. * * Normal child backends can only be launched when we are in PM_RUN state. * (We also allow it in PM_WAIT_BACKUP state, but only for superusers.) @@ -264,7 +265,7 @@ typedef enum { PM_INIT, /* postmaster starting */ PM_STARTUP, /* waiting for startup subprocess */ - PM_RECOVERY, /* archive recovery mode */ + PM_RECOVERY, /* consistent recovery mode */ PM_RUN, /* normal "database is alive" state */ PM_WAIT_BACKUP, /* waiting for online backup mode to end */ PM_WAIT_BACKENDS, /* waiting for live backends to exit */ @@ -2169,8 +2170,8 @@ reaper(SIGNAL_ARGS) /* * Crank up the background writer, if we didn't do that already - * when we entered archive recovery phase. It doesn't matter if - * this fails, we'll just try again later. + * when we entered consistent recovery phase. It doesn't matter + * if this fails, we'll just try again later. */ if (BgWriterPID == 0) BgWriterPID = StartBackgroundWriter(); -- 2.39.5