Treat 2PC commit/abort the same as regular xacts in recovery.
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>
Tue, 29 Jul 2014 07:33:15 +0000 (10:33 +0300)
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>
Tue, 29 Jul 2014 08:58:09 +0000 (11:58 +0300)
There were several oversights in recovery code where COMMIT/ABORT PREPARED
records were ignored:

* pg_last_xact_replay_timestamp() (wasn't updated for 2PC commits)
* recovery_min_apply_delay (2PC commits were applied immediately)
* recovery_target_xid (recovery would not stop if the XID used 2PC)

The first of those was reported by Sergiy Zuban in bug #11032, analyzed by
Tom Lane and Andres Freund. The bug was always there, but was masked before
commit d19bd29f07aef9e508ff047d128a4046cc8bc1e2, because COMMIT PREPARED
always created an extra regular transaction that was WAL-logged.

Backpatch to all supported versions (older versions didn't have all the
features and therefore didn't have all of the above bugs).

src/backend/access/transam/xlog.c
src/include/access/xact.h

index da5ba51a1d0e4edfc85a6369f61aa50b4f950715..89b2fb66f8b1d19ddca928e79888d7dc46a55524 100644 (file)
@@ -5648,6 +5648,7 @@ recoveryStopsHere(XLogRecord *record, bool *includeThis)
    bool        stopsHere;
    uint8       record_info;
    TimestampTz recordXtime;
+   TransactionId recordXid;
    char        recordRPName[MAXFNAMELEN];
 
    /* We only consider stopping at COMMIT, ABORT or RESTORE POINT records */
@@ -5660,6 +5661,15 @@ recoveryStopsHere(XLogRecord *record, bool *includeThis)
 
        recordXactCommitData = (xl_xact_commit *) XLogRecGetData(record);
        recordXtime = recordXactCommitData->xact_time;
+       recordXid = record->xl_xid;
+   }
+   else if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT_PREPARED)
+   {
+       xl_xact_commit_prepared *recordXactCommitData;
+
+       recordXactCommitData = (xl_xact_commit_prepared *) XLogRecGetData(record);
+       recordXtime = recordXactCommitData->crec.xact_time;
+       recordXid = recordXactCommitData->xid;
    }
    else if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_ABORT)
    {
@@ -5667,6 +5677,15 @@ recoveryStopsHere(XLogRecord *record, bool *includeThis)
 
        recordXactAbortData = (xl_xact_abort *) XLogRecGetData(record);
        recordXtime = recordXactAbortData->xact_time;
+       recordXid = record->xl_xid;
+   }
+   else if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_ABORT_PREPARED)
+   {
+       xl_xact_abort_prepared *recordXactAbortData;
+
+       recordXactAbortData = (xl_xact_abort_prepared *) XLogRecGetData(record);
+       recordXtime = recordXactAbortData->arec.xact_time;
+       recordXid = recordXactAbortData->xid;
    }
    else if (record->xl_rmid == RM_XLOG_ID && record_info == XLOG_RESTORE_POINT)
    {
@@ -5674,6 +5693,7 @@ recoveryStopsHere(XLogRecord *record, bool *includeThis)
 
        recordRestorePointData = (xl_restore_point *) XLogRecGetData(record);
        recordXtime = recordRestorePointData->rp_time;
+       recordXid = InvalidTransactionId;
        strlcpy(recordRPName, recordRestorePointData->rp_name, MAXFNAMELEN);
    }
    else
@@ -5702,7 +5722,7 @@ recoveryStopsHere(XLogRecord *record, bool *includeThis)
         * they complete. A higher numbered xid will complete before you about
         * 50% of the time...
         */
-       stopsHere = (record->xl_xid == recoveryTargetXid);
+       stopsHere = (recordXid == recoveryTargetXid);
        if (stopsHere)
            *includeThis = recoveryTargetInclusive;
    }
@@ -5737,11 +5757,12 @@ recoveryStopsHere(XLogRecord *record, bool *includeThis)
 
    if (stopsHere)
    {
-       recoveryStopXid = record->xl_xid;
+       recoveryStopXid = recordXid;
        recoveryStopTime = recordXtime;
        recoveryStopAfter = *includeThis;
 
-       if (record_info == XLOG_XACT_COMMIT)
+       if (record_info == XLOG_XACT_COMMIT ||
+           record_info == XLOG_XACT_COMMIT_PREPARED)
        {
            if (recoveryStopAfter)
                ereport(LOG,
@@ -5754,7 +5775,8 @@ recoveryStopsHere(XLogRecord *record, bool *includeThis)
                                recoveryStopXid,
                                timestamptz_to_str(recoveryStopTime))));
        }
-       else if (record_info == XLOG_XACT_ABORT)
+       else if (record_info == XLOG_XACT_ABORT ||
+                record_info == XLOG_XACT_ABORT_PREPARED)
        {
            if (recoveryStopAfter)
                ereport(LOG,
index cb440d41f1401f69698481e2c175eb903f879fed..b95c0b165125394a606a302ded1a5054a4ff89f4 100644 (file)
@@ -165,8 +165,7 @@ typedef struct xl_xact_abort
 /*
  * COMMIT_PREPARED and ABORT_PREPARED are identical to COMMIT/ABORT records
  * except that we have to store the XID of the prepared transaction explicitly
- * --- the XID in the record header will be for the transaction doing the
- * COMMIT PREPARED or ABORT PREPARED command.
+ * --- the XID in the record header will be invalid.
  */
 
 typedef struct xl_xact_commit_prepared