From e57cd7f0a1622138d5489477e0625741e036e940 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Mon, 12 Apr 2010 09:52:29 +0000 Subject: [PATCH] Change the logic to decide when to delete old WAL segments, so that it doesn't take into account how far the WAL senders are. This way a hung WAL sender doesn't prevent old WAL segments from being recycled/removed in the primary, ultimately causing the disk to fill up. Instead add standby_keep_segments setting to control how many old WAL segments are kept in the primary. This also makes it more reliable to use streaming replication without WAL archiving, assuming that you set standby_keep_segments high enough. --- doc/src/sgml/config.sgml | 30 ++++++- doc/src/sgml/high-availability.sgml | 9 +- src/backend/access/transam/xlog.c | 90 ++++++++++++++----- src/backend/replication/walsender.c | 62 +++++++++++-- src/backend/utils/misc/guc.c | 11 ++- src/backend/utils/misc/postgresql.conf.sample | 1 + src/include/access/xlog.h | 4 +- 7 files changed, 174 insertions(+), 33 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 81b0ba3445..0ca5e402e0 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -1,4 +1,4 @@ - + Server Configuration @@ -1823,6 +1823,34 @@ archive_command = 'copy "%p" "C:\\server\\archivedir\\%f"' # Windows + + + standby_keep_segments (integer) + + standby_keep_segments configuration parameter + + + + Specifies the number of log file segments kept in pg_xlog + directory, in case a standby server needs to fetch them via streaming + replciation. Each segment is normally 16 megabytes. If a standby + server connected to the primary falls behind more than + standby_keep_segments segments, the primary might remove + a WAL segment still needed by the standby and the replication + connection will be terminated. + + This sets only the minimum number of segments retained for standby + purposes, the system might need to retain more segments for WAL + archival or to recover from a checkpoint. If standby_keep_segments + is zero (the default), the system doesn't keep any extra segments + for standby purposes, and the number of old WAL segments available + for standbys is determined based only on the location of the previous + checkpoint and status of WAL archival. + This parameter can only be set in the postgresql.conf + file or on the server command line. + + + diff --git a/doc/src/sgml/high-availability.sgml b/doc/src/sgml/high-availability.sgml index 13b783bc86..cff0339b52 100644 --- a/doc/src/sgml/high-availability.sgml +++ b/doc/src/sgml/high-availability.sgml @@ -1,4 +1,4 @@ - + High Availability, Load Balancing, and Replication @@ -732,7 +732,12 @@ trigger_file = '/path/to/trigger_file' Streaming replication relies on file-based continuous archiving for making the base backup and for allowing the standby to catch up if it is disconnected from the primary for long enough for the primary to - delete old WAL files still required by the standby. + delete old WAL files still required by the standby. It is possible + to use streaming replication without WAL archiving, but if a standby + falls behind too much, the primary will delete old WAL files still + needed by the standby, and the standby will have to be manually restored + from a base backup. You can control how long the primary retains old WAL + segments using the standby_keep_segments setting. diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 12392f8cfc..c5b7f7a98c 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.391 2010/04/07 10:58:49 heikki Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.392 2010/04/12 09:52:29 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -66,6 +66,7 @@ /* User-settable parameters */ int CheckPointSegments = 3; +int StandbySegments = 0; int XLOGbuffers = 8; int XLogArchiveTimeout = 0; bool XLogArchiveMode = false; @@ -356,6 +357,8 @@ typedef struct XLogCtlData uint32 ckptXidEpoch; /* nextXID & epoch of latest checkpoint */ TransactionId ckptXid; XLogRecPtr asyncCommitLSN; /* LSN of newest async commit */ + uint32 lastRemovedLog; /* latest removed/recycled XLOG segment */ + uint32 lastRemovedSeg; /* Protected by WALWriteLock: */ XLogCtlWrite Write; @@ -3149,6 +3152,22 @@ PreallocXlogFiles(XLogRecPtr endptr) } } +/* + * Get the log/seg of the latest removed or recycled WAL segment. + * Returns 0 if no WAL segments have been removed since startup. + */ +void +XLogGetLastRemoved(uint32 *log, uint32 *seg) +{ + /* use volatile pointer to prevent code rearrangement */ + volatile XLogCtlData *xlogctl = XLogCtl; + + SpinLockAcquire(&xlogctl->info_lck); + *log = xlogctl->lastRemovedLog; + *seg = xlogctl->lastRemovedSeg; + SpinLockRelease(&xlogctl->info_lck); +} + /* * Recycle or remove all log files older or equal to passed log/seg# * @@ -3170,6 +3189,20 @@ RemoveOldXlogFiles(uint32 log, uint32 seg, XLogRecPtr endptr) char newpath[MAXPGPATH]; #endif struct stat statbuf; + /* use volatile pointer to prevent code rearrangement */ + volatile XLogCtlData *xlogctl = XLogCtl; + + /* Update the last removed location in shared memory first */ + SpinLockAcquire(&xlogctl->info_lck); + if (log > xlogctl->lastRemovedLog || + (log == xlogctl->lastRemovedLog && seg > xlogctl->lastRemovedSeg)) + { + xlogctl->lastRemovedLog = log; + xlogctl->lastRemovedSeg = seg; + } + SpinLockRelease(&xlogctl->info_lck); + + elog(DEBUG1, "removing WAL segments older than %X/%X", log, seg); /* * Initialize info about where to try to recycle to. We allow recycling @@ -7172,36 +7205,51 @@ CreateCheckPoint(int flags) smgrpostckpt(); /* - * If there's connected standby servers doing XLOG streaming, don't delete - * XLOG files that have not been streamed to all of them yet. This does - * nothing to prevent them from being deleted when the standby is - * disconnected (e.g because of network problems), but at least it avoids - * an open replication connection from failing because of that. + * Delete old log files (those no longer needed even for previous + * checkpoint or the standbys in XLOG streaming). */ - if ((_logId || _logSeg) && max_wal_senders > 0) + if (_logId || _logSeg) { - XLogRecPtr oldest; - uint32 log; - uint32 seg; - - oldest = GetOldestWALSendPointer(); - if (oldest.xlogid != 0 || oldest.xrecoff != 0) + /* + * Calculate the last segment that we need to retain because of + * standby_keep_segments, by subtracting StandbySegments from the + * new checkpoint location. + */ + if (StandbySegments > 0) { - XLByteToSeg(oldest, log, seg); + uint32 log; + uint32 seg; + int d_log; + int d_seg; + + XLByteToSeg(recptr, log, seg); + + d_seg = StandbySegments % XLogSegsPerFile; + d_log = StandbySegments / XLogSegsPerFile; + if (seg < d_seg) + { + d_log += 1; + seg = seg - d_seg + XLogSegsPerFile; + } + else + seg = seg - d_seg; + /* avoid underflow, don't go below (0,1) */ + if (log < d_log || (log == d_log && seg == 0)) + { + log = 0; + seg = 1; + } + else + log = log - d_log; + + /* don't delete WAL segments newer than the calculated segment */ if (log < _logId || (log == _logId && seg < _logSeg)) { _logId = log; _logSeg = seg; } } - } - /* - * Delete old log files (those no longer needed even for previous - * checkpoint or the standbys in XLOG streaming). - */ - if (_logId || _logSeg) - { PrevLogSeg(_logId, _logSeg); RemoveOldXlogFiles(_logId, _logSeg, recptr); } diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index e04e5ba65c..aa8fbc1a40 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -30,7 +30,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/replication/walsender.c,v 1.14 2010/04/01 00:43:29 rhaas Exp $ + * $PostgreSQL: pgsql/src/backend/replication/walsender.c,v 1.15 2010/04/12 09:52:29 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -508,6 +508,10 @@ XLogRead(char *buf, XLogRecPtr recptr, Size nbytes) { char path[MAXPGPATH]; uint32 startoff; + uint32 lastRemovedLog; + uint32 lastRemovedSeg; + uint32 log; + uint32 seg; while (nbytes > 0) { @@ -527,10 +531,27 @@ XLogRead(char *buf, XLogRecPtr recptr, Size nbytes) sendFile = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0); if (sendFile < 0) - ereport(FATAL, /* XXX: Why FATAL? */ - (errcode_for_file_access(), - errmsg("could not open file \"%s\" (log file %u, segment %u): %m", - path, sendId, sendSeg))); + { + /* + * If the file is not found, assume it's because the + * standby asked for a too old WAL segment that has already + * been removed or recycled. + */ + if (errno == ENOENT) + { + char filename[MAXFNAMELEN]; + XLogFileName(filename, ThisTimeLineID, sendId, sendSeg); + ereport(ERROR, + (errcode_for_file_access(), + errmsg("requested WAL segment %s has already been removed", + filename))); + } + else + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open file \"%s\" (log file %u, segment %u): %m", + path, sendId, sendSeg))); + } sendOff = 0; } @@ -538,7 +559,7 @@ XLogRead(char *buf, XLogRecPtr recptr, Size nbytes) if (sendOff != startoff) { if (lseek(sendFile, (off_t) startoff, SEEK_SET) < 0) - ereport(FATAL, + ereport(ERROR, (errcode_for_file_access(), errmsg("could not seek in log file %u, segment %u to offset %u: %m", sendId, sendSeg, startoff))); @@ -553,7 +574,7 @@ XLogRead(char *buf, XLogRecPtr recptr, Size nbytes) readbytes = read(sendFile, buf, segbytes); if (readbytes <= 0) - ereport(FATAL, + ereport(ERROR, (errcode_for_file_access(), errmsg("could not read from log file %u, segment %u, offset %u, " "length %lu: %m", @@ -566,6 +587,26 @@ XLogRead(char *buf, XLogRecPtr recptr, Size nbytes) nbytes -= readbytes; buf += readbytes; } + + /* + * After reading into the buffer, check that what we read was valid. + * We do this after reading, because even though the segment was present + * when we opened it, it might get recycled or removed while we read it. + * The read() succeeds in that case, but the data we tried to read might + * already have been overwritten with new WAL records. + */ + XLogGetLastRemoved(&lastRemovedLog, &lastRemovedSeg); + XLByteToPrevSeg(recptr, log, seg); + if (log < lastRemovedLog || + (log == lastRemovedLog && seg <= lastRemovedSeg)) + { + char filename[MAXFNAMELEN]; + XLogFileName(filename, ThisTimeLineID, log, seg); + ereport(ERROR, + (errcode_for_file_access(), + errmsg("requested WAL segment %s has already been removed", + filename))); + } } /* @@ -801,6 +842,12 @@ WalSndShmemInit(void) } } +/* + * This isn't currently used for anything. Monitoring tools might be + * interested in the future, and we'll need something like this in the + * future for synchronous replication. + */ +#ifdef NOT_USED /* * Returns the oldest Send position among walsenders. Or InvalidXLogRecPtr * if none. @@ -834,3 +881,4 @@ GetOldestWALSendPointer(void) } return oldest; } +#endif diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 5f8cc49489..9d72a0e573 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -10,7 +10,7 @@ * Written by Peter Eisentraut . * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.546 2010/04/01 00:43:29 rhaas Exp $ + * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.547 2010/04/12 09:52:29 heikki Exp $ * *-------------------------------------------------------------------- */ @@ -1647,6 +1647,15 @@ static struct config_int ConfigureNamesInt[] = 0, 0, 60, NULL, NULL }, + { + {"standby_keep_segments", PGC_SIGHUP, WAL_CHECKPOINTS, + gettext_noop("Sets the number of WAL files held for standby servers"), + NULL + }, + &StandbySegments, + 0, 0, INT_MAX, NULL, NULL + }, + { {"checkpoint_segments", PGC_SIGHUP, WAL_CHECKPOINTS, gettext_noop("Sets the maximum distance in log segments between automatic WAL checkpoints."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 02f1df0103..48c09d1467 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -193,6 +193,7 @@ #max_wal_senders = 0 # max number of walsender processes #wal_sender_delay = 200ms # 1-10000 milliseconds +#standby_keep_segments = 0 # in logfile segments, 16MB each; 0 disables #------------------------------------------------------------------------------ diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 9a66e9134d..de7406a808 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.105 2010/04/01 00:43:29 rhaas Exp $ + * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.106 2010/04/12 09:52:29 heikki Exp $ */ #ifndef XLOG_H #define XLOG_H @@ -187,6 +187,7 @@ extern XLogRecPtr XactLastRecEnd; /* these variables are GUC parameters related to XLOG */ extern int CheckPointSegments; +extern int StandbySegments; extern int XLOGbuffers; extern bool XLogArchiveMode; extern char *XLogArchiveCommand; @@ -267,6 +268,7 @@ extern int XLogFileInit(uint32 log, uint32 seg, extern int XLogFileOpen(uint32 log, uint32 seg); +extern void XLogGetLastRemoved(uint32 *log, uint32 *seg); extern void XLogSetAsyncCommitLSN(XLogRecPtr record); extern void RestoreBkpBlocks(XLogRecPtr lsn, XLogRecord *record, bool cleanup); -- 2.39.5