Change CRCs in WAL records from 64bit to 32bit for performance reasons.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 2 Jun 2005 05:55:29 +0000 (05:55 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 2 Jun 2005 05:55:29 +0000 (05:55 +0000)
Instead of a separate CRC on each backup block, include backup blocks
in their parent WAL record's CRC; this is important to ensure that the
backup block really goes with the WAL record, ie there was not a page
tear right at the start of the backup block.  Implement a simple form
of compression of backup blocks: drop any run of zeroes starting at
pd_lower, so as not to store the unused 'hole' that commonly exists in
PG heap and index pages.  Tweak PageRepairFragmentation and related
routines to ensure they keep the unused space zeroed, so that the above
compression method remains effective.  All per recent discussions.

src/backend/access/nbtree/nbtpage.c
src/backend/access/nbtree/nbtxlog.c
src/backend/access/transam/xlog.c
src/backend/storage/page/bufpage.c
src/backend/utils/hash/pg_crc.c
src/bin/pg_controldata/pg_controldata.c
src/bin/pg_resetxlog/pg_resetxlog.c
src/include/access/xlog.h
src/include/access/xlog_internal.h
src/include/catalog/pg_control.h
src/include/utils/pg_crc.h

index b206008afe348be38bfbb5e240dd88e525946cc6..d5cedd8c35636c67e06aa0d11c840b8efaacade8 100644 (file)
@@ -113,6 +113,13 @@ _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level)
 
        metaopaque = (BTPageOpaque) PageGetSpecialPointer(page);
        metaopaque->btpo_flags = BTP_META;
+
+       /*
+        * Set pd_lower just past the end of the metadata.  This is not
+        * essential but it makes the page look compressible to xlog.c.
+        */
+       ((PageHeader) page)->pd_lower =
+               ((char *) metad + sizeof(BTMetaPageData)) - (char *) page;
 }
 
 /*
index ba4da69fa08dbbde08a6823344ce33d39e47a06d..351450fae906d0605bc55fba2b832c44143b487f 100644 (file)
@@ -135,6 +135,13 @@ _bt_restore_meta(Relation reln, XLogRecPtr lsn,
        pageop = (BTPageOpaque) PageGetSpecialPointer(metapg);
        pageop->btpo_flags = BTP_META;
 
+       /*
+        * Set pd_lower just past the end of the metadata.  This is not
+        * essential but it makes the page look compressible to xlog.c.
+        */
+       ((PageHeader) metapg)->pd_lower =
+               ((char *) md + sizeof(BTMetaPageData)) - (char *) metapg;
+
        PageSetLSN(metapg, lsn);
        PageSetTLI(metapg, ThisTimeLineID);
        LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
index 6348d46c0b217de55e540090bdea6a90db7447c5..312584b47557c63e3f39c550e3305bd05f4121f6 100644 (file)
@@ -434,6 +434,7 @@ static void exitArchiveRecovery(TimeLineID endTLI,
                                        uint32 endLogId, uint32 endLogSeg);
 static bool recoveryStopsHere(XLogRecord *record, bool *includeThis);
 
+static void SetBkpBlock(BkpBlock *bkpb, Buffer buffer);
 static bool AdvanceXLInsertBuffer(void);
 static void XLogWrite(XLogwrtRqst WriteRqst);
 static int XLogFileInit(uint32 log, uint32 seg,
@@ -499,8 +500,10 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
        bool            dtbuf_bkp[XLR_MAX_BKP_BLOCKS];
        BkpBlock        dtbuf_xlg[XLR_MAX_BKP_BLOCKS];
        XLogRecPtr      dtbuf_lsn[XLR_MAX_BKP_BLOCKS];
-       XLogRecData dtbuf_rdt[2 * XLR_MAX_BKP_BLOCKS];
-       crc64           rdata_crc;
+       XLogRecData dtbuf_rdt1[XLR_MAX_BKP_BLOCKS];
+       XLogRecData dtbuf_rdt2[XLR_MAX_BKP_BLOCKS];
+       XLogRecData dtbuf_rdt3[XLR_MAX_BKP_BLOCKS];
+       pg_crc32        rdata_crc;
        uint32          len,
                                write_len;
        unsigned        i;
@@ -531,8 +534,10 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
        /*
         * Here we scan the rdata list, determine which buffers must be backed
         * up, and compute the CRC values for the data.  Note that the record
-        * header isn't added into the CRC yet since we don't know the final
-        * length or info bits quite yet.
+        * header isn't added into the CRC initially since we don't know the
+        * final length or info bits quite yet.  Thus, the CRC will represent
+        * the CRC of the whole record in the order "rdata, then backup blocks,
+        * then record header".
         *
         * We may have to loop back to here if a race condition is detected
         * below. We could prevent the race by doing all this work while
@@ -553,7 +558,7 @@ begin:;
                dtbuf_bkp[i] = false;
        }
 
-       INIT_CRC64(rdata_crc);
+       INIT_CRC32(rdata_crc);
        len = 0;
        for (rdt = rdata;;)
        {
@@ -561,7 +566,7 @@ begin:;
                {
                        /* Simple data, just include it */
                        len += rdt->len;
-                       COMP_CRC64(rdata_crc, rdt->data, rdt->len);
+                       COMP_CRC32(rdata_crc, rdt->data, rdt->len);
                }
                else
                {
@@ -576,7 +581,7 @@ begin:;
                                        else if (rdt->data)
                                        {
                                                len += rdt->len;
-                                               COMP_CRC64(rdata_crc, rdt->data, rdt->len);
+                                               COMP_CRC32(rdata_crc, rdt->data, rdt->len);
                                        }
                                        break;
                                }
@@ -591,26 +596,14 @@ begin:;
                                        dtbuf_lsn[i] = *((XLogRecPtr *) BufferGetBlock(rdt->buffer));
                                        if (XLByteLE(dtbuf_lsn[i], RedoRecPtr))
                                        {
-                                               crc64           dtcrc;
-
                                                dtbuf_bkp[i] = true;
+                                               SetBkpBlock(&(dtbuf_xlg[i]), rdt->buffer);
                                                rdt->data = NULL;
-                                               INIT_CRC64(dtcrc);
-                                               COMP_CRC64(dtcrc,
-                                                                  BufferGetBlock(dtbuf[i]),
-                                                                  BLCKSZ);
-                                               dtbuf_xlg[i].node = BufferGetFileNode(dtbuf[i]);
-                                               dtbuf_xlg[i].block = BufferGetBlockNumber(dtbuf[i]);
-                                               COMP_CRC64(dtcrc,
-                                                               (char *) &(dtbuf_xlg[i]) + sizeof(crc64),
-                                                                  sizeof(BkpBlock) - sizeof(crc64));
-                                               FIN_CRC64(dtcrc);
-                                               dtbuf_xlg[i].crc = dtcrc;
                                        }
                                        else if (rdt->data)
                                        {
                                                len += rdt->len;
-                                               COMP_CRC64(rdata_crc, rdt->data, rdt->len);
+                                               COMP_CRC32(rdata_crc, rdt->data, rdt->len);
                                        }
                                        break;
                                }
@@ -625,6 +618,39 @@ begin:;
                rdt = rdt->next;
        }
 
+       /*
+        * Now add the backup block headers and data into the CRC
+        */
+       for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
+       {
+               if (dtbuf_bkp[i])
+               {
+                       BkpBlock   *bkpb = &(dtbuf_xlg[i]);
+                       char       *page;
+
+                       COMP_CRC32(rdata_crc,
+                                          (char *) bkpb,
+                                          sizeof(BkpBlock));
+                       page = (char *) BufferGetBlock(dtbuf[i]);
+                       if (bkpb->hole_length == 0)
+                       {
+                               COMP_CRC32(rdata_crc,
+                                                  page,
+                                                  BLCKSZ);
+                       }
+                       else
+                       {
+                               /* must skip the hole */
+                               COMP_CRC32(rdata_crc,
+                                                  page,
+                                                  bkpb->hole_offset);
+                               COMP_CRC32(rdata_crc,
+                                                  page + (bkpb->hole_offset + bkpb->hole_length),
+                                                  BLCKSZ - (bkpb->hole_offset + bkpb->hole_length));
+                       }
+               }
+       }
+
        /*
         * NOTE: the test for len == 0 here is somewhat fishy, since in theory
         * all of the rmgr data might have been suppressed in favor of backup
@@ -713,23 +739,49 @@ begin:;
        write_len = len;
        for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
        {
+               BkpBlock   *bkpb;
+               char       *page;
+
                if (dtbuf[i] == InvalidBuffer || !(dtbuf_bkp[i]))
                        continue;
 
                info |= XLR_SET_BKP_BLOCK(i);
 
-               rdt->next = &(dtbuf_rdt[2 * i]);
+               bkpb = &(dtbuf_xlg[i]);
+               page = (char *) BufferGetBlock(dtbuf[i]);
+
+               rdt->next = &(dtbuf_rdt1[i]);
+               rdt = rdt->next;
 
-               dtbuf_rdt[2 * i].data = (char *) &(dtbuf_xlg[i]);
-               dtbuf_rdt[2 * i].len = sizeof(BkpBlock);
+               rdt->data = (char *) bkpb;
+               rdt->len = sizeof(BkpBlock);
                write_len += sizeof(BkpBlock);
 
-               rdt = dtbuf_rdt[2 * i].next = &(dtbuf_rdt[2 * i + 1]);
+               rdt->next = &(dtbuf_rdt2[i]);
+               rdt = rdt->next;
 
-               dtbuf_rdt[2 * i + 1].data = (char *) BufferGetBlock(dtbuf[i]);
-               dtbuf_rdt[2 * i + 1].len = BLCKSZ;
-               write_len += BLCKSZ;
-               dtbuf_rdt[2 * i + 1].next = NULL;
+               if (bkpb->hole_length == 0)
+               {
+                       rdt->data = page;
+                       rdt->len = BLCKSZ;
+                       write_len += BLCKSZ;
+                       rdt->next = NULL;
+               }
+               else
+               {
+                       /* must skip the hole */
+                       rdt->data = page;
+                       rdt->len = bkpb->hole_offset;
+                       write_len += bkpb->hole_offset;
+
+                       rdt->next = &(dtbuf_rdt3[i]);
+                       rdt = rdt->next;
+
+                       rdt->data = page + (bkpb->hole_offset + bkpb->hole_length);
+                       rdt->len = BLCKSZ - (bkpb->hole_offset + bkpb->hole_length);
+                       write_len += rdt->len;
+                       rdt->next = NULL;
+               }
        }
 
        /*
@@ -752,14 +804,15 @@ begin:;
 
        record->xl_prev = Insert->PrevRecord;
        record->xl_xid = GetCurrentTransactionIdIfAny();
+       record->xl_tot_len = SizeOfXLogRecord + write_len;
        record->xl_len = len;           /* doesn't include backup blocks */
        record->xl_info = info;
        record->xl_rmid = rmid;
 
-       /* Now we can finish computing the main CRC */
-       COMP_CRC64(rdata_crc, (char *) record + sizeof(crc64),
-                          SizeOfXLogRecord - sizeof(crc64));
-       FIN_CRC64(rdata_crc);
+       /* Now we can finish computing the record's CRC */
+       COMP_CRC32(rdata_crc, (char *) record + sizeof(pg_crc32),
+                          SizeOfXLogRecord - sizeof(pg_crc32));
+       FIN_CRC32(rdata_crc);
        record->xl_crc = rdata_crc;
 
        /* Compute record's XLOG location */
@@ -884,6 +937,46 @@ begin:;
        return (RecPtr);
 }
 
+/*
+ * Fill a BkpBlock struct given a buffer containing the page to be saved
+ *
+ * This is nontrivial only because it has to decide whether to apply "hole
+ * compression".
+ */
+static void
+SetBkpBlock(BkpBlock *bkpb, Buffer buffer)
+{
+       PageHeader      page;
+       uint16          offset;
+       uint16          length;
+
+       /* Save page identity info */
+       bkpb->node = BufferGetFileNode(buffer);
+       bkpb->block = BufferGetBlockNumber(buffer);
+
+       /* Test whether there is a "hole" containing zeroes in the page */
+       page = (PageHeader) BufferGetBlock(buffer);
+       offset = page->pd_lower;
+       /* Check if pd_lower appears sane at all */
+       if (offset >= SizeOfPageHeaderData && offset < BLCKSZ)
+       {
+               char   *spd = (char *) page + offset;
+               char   *epd = (char *) page + BLCKSZ;
+               char   *pd = spd;
+
+               while (pd < epd && *pd == '\0')
+                       pd++;
+
+               length = pd - spd;
+               if (length == 0)
+                       offset = 0;
+       }
+       else
+               offset = length = 0;
+       bkpb->hole_offset = offset;
+       bkpb->hole_length = length;
+}
+
 /*
  * XLogArchiveNotify
  *
@@ -2276,7 +2369,7 @@ RestoreBkpBlocks(XLogRecord *record, XLogRecPtr lsn)
                if (!(record->xl_info & XLR_SET_BKP_BLOCK(i)))
                        continue;
 
-               memcpy((char *) &bkpb, blk, sizeof(BkpBlock));
+               memcpy(&bkpb, blk, sizeof(BkpBlock));
                blk += sizeof(BkpBlock);
 
                reln = XLogOpenRelation(true, record->xl_rmid, bkpb.node);
@@ -2287,7 +2380,21 @@ RestoreBkpBlocks(XLogRecord *record, XLogRecPtr lsn)
                        if (BufferIsValid(buffer))
                        {
                                page = (Page) BufferGetPage(buffer);
-                               memcpy((char *) page, blk, BLCKSZ);
+
+                               if (bkpb.hole_length == 0)
+                               {
+                                       memcpy((char *) page, blk, BLCKSZ);
+                               }
+                               else
+                               {
+                                       /* must zero-fill the hole */
+                                       MemSet((char *) page, 0, BLCKSZ);
+                                       memcpy((char *) page, blk, bkpb.hole_offset);
+                                       memcpy((char *) page + (bkpb.hole_offset + bkpb.hole_length),
+                                                  blk + bkpb.hole_offset,
+                                                  BLCKSZ - (bkpb.hole_offset + bkpb.hole_length));
+                               }
+
                                PageSetLSN(page, lsn);
                                PageSetTLI(page, ThisTimeLineID);
                                LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
@@ -2295,7 +2402,7 @@ RestoreBkpBlocks(XLogRecord *record, XLogRecPtr lsn)
                        }
                }
 
-               blk += BLCKSZ;
+               blk += BLCKSZ - bkpb.hole_length;
        }
 }
 
@@ -2309,53 +2416,61 @@ RestoreBkpBlocks(XLogRecord *record, XLogRecPtr lsn)
 static bool
 RecordIsValid(XLogRecord *record, XLogRecPtr recptr, int emode)
 {
-       crc64           crc;
-       crc64           cbuf;
+       pg_crc32        crc;
        int                     i;
        uint32          len = record->xl_len;
+       BkpBlock        bkpb;
        char       *blk;
 
-       /* Check CRC of rmgr data and record header */
-       INIT_CRC64(crc);
-       COMP_CRC64(crc, XLogRecGetData(record), len);
-       COMP_CRC64(crc, (char *) record + sizeof(crc64),
-                          SizeOfXLogRecord - sizeof(crc64));
-       FIN_CRC64(crc);
+       /* First the rmgr data */
+       INIT_CRC32(crc);
+       COMP_CRC32(crc, XLogRecGetData(record), len);
 
-       if (!EQ_CRC64(record->xl_crc, crc))
-       {
-               ereport(emode,
-                               (errmsg("incorrect resource manager data checksum in record at %X/%X",
-                                               recptr.xlogid, recptr.xrecoff)));
-               return (false);
-       }
-
-       /* Check CRCs of backup blocks, if any */
+       /* Add in the backup blocks, if any */
        blk = (char *) XLogRecGetData(record) + len;
        for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
        {
+               uint32  blen;
+
                if (!(record->xl_info & XLR_SET_BKP_BLOCK(i)))
                        continue;
 
-               INIT_CRC64(crc);
-               COMP_CRC64(crc, blk + sizeof(BkpBlock), BLCKSZ);
-               COMP_CRC64(crc, blk + sizeof(crc64),
-                                  sizeof(BkpBlock) - sizeof(crc64));
-               FIN_CRC64(crc);
-               memcpy((char *) &cbuf, blk, sizeof(crc64));             /* don't assume
-                                                                                                                * alignment */
-
-               if (!EQ_CRC64(cbuf, crc))
+               memcpy(&bkpb, blk, sizeof(BkpBlock));
+               if (bkpb.hole_offset + bkpb.hole_length > BLCKSZ)
                {
                        ereport(emode,
-                                       (errmsg("incorrect checksum of backup block %d in record at %X/%X",
-                                                       i + 1, recptr.xlogid, recptr.xrecoff)));
-                       return (false);
+                                       (errmsg("incorrect hole size in record at %X/%X",
+                                                       recptr.xlogid, recptr.xrecoff)));
+                       return false;
                }
-               blk += sizeof(BkpBlock) + BLCKSZ;
+               blen = sizeof(BkpBlock) + BLCKSZ - bkpb.hole_length;
+               COMP_CRC32(crc, blk, blen);
+               blk += blen;
+       }
+
+       /* Check that xl_tot_len agrees with our calculation */
+       if (blk != (char *) record + record->xl_tot_len)
+       {
+               ereport(emode,
+                               (errmsg("incorrect total length in record at %X/%X",
+                                               recptr.xlogid, recptr.xrecoff)));
+               return false;
        }
 
-       return (true);
+       /* Finally include the record header */
+       COMP_CRC32(crc, (char *) record + sizeof(pg_crc32),
+                          SizeOfXLogRecord - sizeof(pg_crc32));
+       FIN_CRC32(crc);
+
+       if (!EQ_CRC32(record->xl_crc, crc))
+       {
+               ereport(emode,
+                               (errmsg("incorrect resource manager data checksum in record at %X/%X",
+                                               recptr.xlogid, recptr.xrecoff)));
+               return false;
+       }
+
+       return true;
 }
 
 /*
@@ -2382,7 +2497,6 @@ ReadRecord(XLogRecPtr *RecPtr, int emode)
        uint32          targetPageOff;
        uint32          targetRecOff;
        uint32          pageHeaderSize;
-       unsigned        i;
 
        if (readBuf == NULL)
        {
@@ -2518,6 +2632,15 @@ got_record:;
                                                RecPtr->xlogid, RecPtr->xrecoff)));
                goto next_record_is_invalid;
        }
+       if (record->xl_tot_len < SizeOfXLogRecord + record->xl_len ||
+               record->xl_tot_len > SizeOfXLogRecord + record->xl_len +
+               XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ))
+       {
+               ereport(emode,
+                               (errmsg("invalid record length at %X/%X",
+                                               RecPtr->xlogid, RecPtr->xrecoff)));
+               goto next_record_is_invalid;
+       }
        if (record->xl_rmid > RM_MAX_ID)
        {
                ereport(emode,
@@ -2557,18 +2680,6 @@ got_record:;
                }
        }
 
-       /*
-        * Compute total length of record including any appended backup
-        * blocks.
-        */
-       total_len = SizeOfXLogRecord + record->xl_len;
-       for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
-       {
-               if (!(record->xl_info & XLR_SET_BKP_BLOCK(i)))
-                       continue;
-               total_len += sizeof(BkpBlock) + BLCKSZ;
-       }
-
        /*
         * Allocate or enlarge readRecordBuf as needed.  To avoid useless
         * small increases, round its size to a multiple of BLCKSZ, and make
@@ -2576,6 +2687,7 @@ got_record:;
         * "normal" records, but very large commit or abort records might need
         * more space.)
         */
+       total_len = record->xl_tot_len;
        if (total_len > readRecordBufSize)
        {
                uint32          newSize = total_len;
@@ -2666,15 +2778,15 @@ got_record:;
                        goto next_record_is_invalid;
                pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf);
                if (BLCKSZ - SizeOfXLogRecord >= pageHeaderSize +
-                       SizeOfXLogContRecord + MAXALIGN(contrecord->xl_rem_len))
+                       MAXALIGN(SizeOfXLogContRecord + contrecord->xl_rem_len))
                {
                        nextRecord = (XLogRecord *) ((char *) contrecord +
-                               SizeOfXLogContRecord + MAXALIGN(contrecord->xl_rem_len));
+                               MAXALIGN(SizeOfXLogContRecord + contrecord->xl_rem_len));
                }
                EndRecPtr.xlogid = readId;
                EndRecPtr.xrecoff = readSeg * XLogSegSize + readOff +
-                       pageHeaderSize + SizeOfXLogContRecord +
-                       MAXALIGN(contrecord->xl_rem_len);
+                       pageHeaderSize +
+                       MAXALIGN(SizeOfXLogContRecord + contrecord->xl_rem_len);
                ReadRecPtr = *RecPtr;
                return record;
        }
@@ -3194,11 +3306,11 @@ WriteControlFile(void)
        StrNCpy(ControlFile->lc_ctype, localeptr, LOCALE_NAME_BUFLEN);
 
        /* Contents are protected with a CRC */
-       INIT_CRC64(ControlFile->crc);
-       COMP_CRC64(ControlFile->crc,
-                          (char *) ControlFile + sizeof(crc64),
-                          sizeof(ControlFileData) - sizeof(crc64));
-       FIN_CRC64(ControlFile->crc);
+       INIT_CRC32(ControlFile->crc);
+       COMP_CRC32(ControlFile->crc,
+                          (char *) ControlFile,
+                          offsetof(ControlFileData, crc));
+       FIN_CRC32(ControlFile->crc);
 
        /*
         * We write out BLCKSZ bytes into pg_control, zero-padding the excess
@@ -3247,7 +3359,7 @@ WriteControlFile(void)
 static void
 ReadControlFile(void)
 {
-       crc64           crc;
+       pg_crc32        crc;
        int                     fd;
 
        /*
@@ -3281,13 +3393,13 @@ ReadControlFile(void)
                                        ControlFile->pg_control_version, PG_CONTROL_VERSION),
                                 errhint("It looks like you need to initdb.")));
        /* Now check the CRC. */
-       INIT_CRC64(crc);
-       COMP_CRC64(crc,
-                          (char *) ControlFile + sizeof(crc64),
-                          sizeof(ControlFileData) - sizeof(crc64));
-       FIN_CRC64(crc);
+       INIT_CRC32(crc);
+       COMP_CRC32(crc,
+                          (char *) ControlFile,
+                          offsetof(ControlFileData, crc));
+       FIN_CRC32(crc);
 
-       if (!EQ_CRC64(crc, ControlFile->crc))
+       if (!EQ_CRC32(crc, ControlFile->crc))
                ereport(FATAL,
                                (errmsg("incorrect checksum in control file")));
 
@@ -3396,11 +3508,11 @@ UpdateControlFile(void)
 {
        int                     fd;
 
-       INIT_CRC64(ControlFile->crc);
-       COMP_CRC64(ControlFile->crc,
-                          (char *) ControlFile + sizeof(crc64),
-                          sizeof(ControlFileData) - sizeof(crc64));
-       FIN_CRC64(ControlFile->crc);
+       INIT_CRC32(ControlFile->crc);
+       COMP_CRC32(ControlFile->crc,
+                          (char *) ControlFile,
+                          offsetof(ControlFileData, crc));
+       FIN_CRC32(ControlFile->crc);
 
        fd = BasicOpenFile(ControlFilePath, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR);
        if (fd < 0)
@@ -3525,7 +3637,7 @@ BootStrapXLOG(void)
        bool            use_existent;
        uint64          sysidentifier;
        struct timeval tv;
-       crc64           crc;
+       pg_crc32        crc;
 
        /*
         * Select a hopefully-unique system identifier code for this
@@ -3582,16 +3694,17 @@ BootStrapXLOG(void)
        record->xl_prev.xlogid = 0;
        record->xl_prev.xrecoff = 0;
        record->xl_xid = InvalidTransactionId;
+       record->xl_tot_len = SizeOfXLogRecord + sizeof(checkPoint);
        record->xl_len = sizeof(checkPoint);
        record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
        record->xl_rmid = RM_XLOG_ID;
        memcpy(XLogRecGetData(record), &checkPoint, sizeof(checkPoint));
 
-       INIT_CRC64(crc);
-       COMP_CRC64(crc, &checkPoint, sizeof(checkPoint));
-       COMP_CRC64(crc, (char *) record + sizeof(crc64),
-                          SizeOfXLogRecord - sizeof(crc64));
-       FIN_CRC64(crc);
+       INIT_CRC32(crc);
+       COMP_CRC32(crc, &checkPoint, sizeof(checkPoint));
+       COMP_CRC32(crc, (char *) record + sizeof(pg_crc32),
+                          SizeOfXLogRecord - sizeof(pg_crc32));
+       FIN_CRC32(crc);
        record->xl_crc = crc;
 
        /* Create first XLOG segment file */
@@ -4694,7 +4807,8 @@ ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt)
                }
                return NULL;
        }
-       if (record->xl_len != sizeof(CheckPoint))
+       if (record->xl_len != sizeof(CheckPoint) ||
+               record->xl_tot_len != SizeOfXLogRecord + sizeof(CheckPoint))
        {
                switch (whichChkpt)
                {
index beaf417881d9369820b4002b23adc8f1885b741c..4b4f88bda74be50a58ffa17d06f27aa3d2e1d4cd 100644 (file)
@@ -357,7 +357,7 @@ PageRepairFragmentation(Page page, OffsetNumber *unused)
                        lp = PageGetItemId(page, i + 1);
                        lp->lp_len = 0;         /* indicate unused & deallocated */
                }
-               ((PageHeader) page)->pd_upper = pd_special;
+               ((PageHeader) page)->pd_upper = pd_upper = pd_special;
        }
        else
        {                                                       /* nused != 0 */
@@ -411,11 +411,17 @@ PageRepairFragmentation(Page page, OffsetNumber *unused)
                        lp->lp_off = upper;
                }
 
-               ((PageHeader) page)->pd_upper = upper;
+               ((PageHeader) page)->pd_upper = pd_upper = upper;
 
                pfree(itemidbase);
        }
 
+       /*
+        * Zero out the now-free space.  This is not essential, but it allows
+        * xlog.c to compress WAL data better.
+        */
+       MemSet((char *) page + pd_lower, 0, pd_upper - pd_lower);
+
        return (nline - nused);
 }
 
@@ -525,6 +531,13 @@ PageIndexTupleDelete(Page page, OffsetNumber offnum)
        phdr->pd_upper += size;
        phdr->pd_lower -= sizeof(ItemIdData);
 
+       /*
+        * Zero out the just-freed space.  This is not essential, but it allows
+        * xlog.c to compress WAL data better.
+        */
+       MemSet((char *) page + phdr->pd_lower, 0, sizeof(ItemIdData));
+       MemSet(addr, 0, size);
+
        /*
         * Finally, we need to adjust the linp entries that remain.
         *
@@ -672,8 +685,14 @@ PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
                lp->lp_off = upper;
        }
 
-       phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData);
-       phdr->pd_upper = upper;
+       phdr->pd_lower = pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData);
+       phdr->pd_upper = pd_upper = upper;
+
+       /*
+        * Zero out the now-free space.  This is not essential, but it allows
+        * xlog.c to compress WAL data better.
+        */
+       MemSet((char *) page + pd_lower, 0, pd_upper - pd_lower);
 
        pfree(itemidbase);
 }
index c83e21f9e0824a331a51adfa477f2d19899a95a3..793833e0c50461028d54adf0447cbe5a442017a3 100644 (file)
@@ -1,7 +1,18 @@
 /*-------------------------------------------------------------------------
  *
  * pg_crc.c
- *       PostgreSQL 64-bit CRC support
+ *       PostgreSQL CRC support
+ *
+ * See Ross Williams' excellent introduction
+ * A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS, available from
+ * ftp://ftp.rocksoft.com/papers/crc_v3.txt or several other net sites.
+ *
+ * We use a normal (not "reflected", in Williams' terms) CRC, using initial
+ * all-ones register contents and a final bit inversion.
+ *
+ * The 64-bit variant is not used as of PostgreSQL 8.1, but we retain the
+ * code for possible future use.
+ *
  *
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
 #include "utils/pg_crc.h"
 
 
+/*
+ * This table is based on the polynomial
+ *     x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
+ * (This is the same polynomial used in Ethernet checksums, for instance.)
+ */
+const uint32 pg_crc32_table[256] = {
+       0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA,
+       0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3,
+       0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988,
+       0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91,
+       0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE,
+       0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7,
+       0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC,
+       0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5,
+       0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172,
+       0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B,
+       0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940,
+       0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59,
+       0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116,
+       0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F,
+       0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924,
+       0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D,
+       0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A,
+       0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433,
+       0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818,
+       0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01,
+       0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E,
+       0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457,
+       0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C,
+       0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65,
+       0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2,
+       0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB,
+       0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0,
+       0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9,
+       0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086,
+       0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,
+       0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4,
+       0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD,
+       0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A,
+       0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683,
+       0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8,
+       0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1,
+       0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE,
+       0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7,
+       0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC,
+       0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5,
+       0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252,
+       0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,
+       0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60,
+       0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79,
+       0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236,
+       0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F,
+       0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04,
+       0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D,
+       0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A,
+       0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713,
+       0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38,
+       0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21,
+       0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E,
+       0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777,
+       0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C,
+       0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45,
+       0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2,
+       0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB,
+       0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0,
+       0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,
+       0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6,
+       0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF,
+       0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94,
+       0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D
+};
+
+
+#ifdef PROVIDE_64BIT_CRC
+
+/*
+ * This table is based on the polynomial
+ *
+ * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
+ * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
+ * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
+ * x^7 + x^4 + x + 1
+ *
+ * which is borrowed from the DLT1 spec
+ * (ECMA-182, available from http://www.ecma.ch/ecma1/STAND/ECMA-182.HTM)
+ */
+
 #ifdef INT64_IS_BUSTED
 
-const uint32 crc_table0[256] = {
+const uint32 pg_crc64_table0[256] = {
        0x00000000, 0xA9EA3693,
        0x53D46D26, 0xFA3E5BB5,
        0x0E42ECDF, 0xA7A8DA4C,
@@ -150,7 +248,7 @@ const uint32 crc_table0[256] = {
        0x676F8394, 0xCE85B507
 };
 
-const uint32 crc_table1[256] = {
+const uint32 pg_crc64_table1[256] = {
        0x00000000, 0x42F0E1EB,
        0x85E1C3D7, 0xC711223C,
        0x49336645, 0x0BC387AE,
@@ -283,7 +381,7 @@ const uint32 crc_table1[256] = {
 
 #else                                                  /* int64 works */
 
-const uint64 crc_table[256] = {
+const uint64 pg_crc64_table[256] = {
        UINT64CONST(0x0000000000000000), UINT64CONST(0x42F0E1EBA9EA3693),
        UINT64CONST(0x85E1C3D753D46D26), UINT64CONST(0xC711223CFA3E5BB5),
        UINT64CONST(0x493366450E42ECDF), UINT64CONST(0x0BC387AEA7A8DA4C),
@@ -415,3 +513,5 @@ const uint64 crc_table[256] = {
 };
 
 #endif   /* INT64_IS_BUSTED */
+
+#endif /* PROVIDE_64BIT_CRC */
index 0131a4271ab7935e5bfaa85b21d4c77c0eab813b..a73de234a313cfd1e4f26476dd41ec4bb19b7510 100644 (file)
@@ -66,7 +66,7 @@ main(int argc, char *argv[])
        int                     fd;
        char            ControlFilePath[MAXPGPATH];
        char       *DataDir;
-       crc64           crc;
+       pg_crc32        crc;
        char            pgctime_str[128];
        char            ckpttime_str[128];
        char            sysident_str[32];
@@ -120,13 +120,13 @@ main(int argc, char *argv[])
        close(fd);
 
        /* Check the CRC. */
-       INIT_CRC64(crc);
-       COMP_CRC64(crc,
-                          (char *) &ControlFile + sizeof(crc64),
-                          sizeof(ControlFileData) - sizeof(crc64));
-       FIN_CRC64(crc);
+       INIT_CRC32(crc);
+       COMP_CRC32(crc,
+                          (char *) &ControlFile,
+                          offsetof(ControlFileData, crc));
+       FIN_CRC32(crc);
 
-       if (!EQ_CRC64(crc, ControlFile.crc))
+       if (!EQ_CRC32(crc, ControlFile.crc))
                printf(_("WARNING: Calculated CRC checksum does not match value stored in file.\n"
                                 "Either the file is corrupt, or it has a different layout than this program\n"
                         "is expecting.  The results below are untrustworthy.\n\n"));
index 29cdf8a90afb1b5538507a0fe13a17062fdcae78..c7c7e709279e6a14ca1d1cf2d92e4aa55634d1e5 100644 (file)
@@ -327,7 +327,7 @@ ReadControlFile(void)
        int                     fd;
        int                     len;
        char       *buffer;
-       crc64           crc;
+       pg_crc32        crc;
 
        if ((fd = open(ControlFilePath, O_RDONLY)) < 0)
        {
@@ -362,13 +362,13 @@ ReadControlFile(void)
                ((ControlFileData *) buffer)->pg_control_version == PG_CONTROL_VERSION)
        {
                /* Check the CRC. */
-               INIT_CRC64(crc);
-               COMP_CRC64(crc,
-                                  buffer + sizeof(crc64),
-                                  sizeof(ControlFileData) - sizeof(crc64));
-               FIN_CRC64(crc);
+               INIT_CRC32(crc);
+               COMP_CRC32(crc,
+                                  buffer,
+                                  offsetof(ControlFileData, crc));
+               FIN_CRC32(crc);
 
-               if (EQ_CRC64(crc, ((ControlFileData *) buffer)->crc))
+               if (EQ_CRC32(crc, ((ControlFileData *) buffer)->crc))
                {
                        /* Valid data... */
                        memcpy(&ControlFile, buffer, sizeof(ControlFile));
@@ -553,11 +553,11 @@ RewriteControlFile(void)
        ControlFile.prevCheckPoint.xrecoff = 0;
 
        /* Contents are protected with a CRC */
-       INIT_CRC64(ControlFile.crc);
-       COMP_CRC64(ControlFile.crc,
-                          (char *) &ControlFile + sizeof(crc64),
-                          sizeof(ControlFileData) - sizeof(crc64));
-       FIN_CRC64(ControlFile.crc);
+       INIT_CRC32(ControlFile.crc);
+       COMP_CRC32(ControlFile.crc,
+                          (char *) &ControlFile,
+                          offsetof(ControlFileData, crc));
+       FIN_CRC32(ControlFile.crc);
 
        /*
         * We write out BLCKSZ bytes into pg_control, zero-padding the excess
@@ -673,7 +673,7 @@ WriteEmptyXLOG(void)
        XLogPageHeader page;
        XLogLongPageHeader longpage;
        XLogRecord *record;
-       crc64           crc;
+       pg_crc32        crc;
        char            path[MAXPGPATH];
        int                     fd;
        int                     nbytes;
@@ -700,17 +700,18 @@ WriteEmptyXLOG(void)
        record->xl_prev.xlogid = 0;
        record->xl_prev.xrecoff = 0;
        record->xl_xid = InvalidTransactionId;
+       record->xl_tot_len = SizeOfXLogRecord + sizeof(CheckPoint);
        record->xl_len = sizeof(CheckPoint);
        record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
        record->xl_rmid = RM_XLOG_ID;
        memcpy(XLogRecGetData(record), &ControlFile.checkPointCopy,
                   sizeof(CheckPoint));
 
-       INIT_CRC64(crc);
-       COMP_CRC64(crc, &ControlFile.checkPointCopy, sizeof(CheckPoint));
-       COMP_CRC64(crc, (char *) record + sizeof(crc64),
-                          SizeOfXLogRecord - sizeof(crc64));
-       FIN_CRC64(crc);
+       INIT_CRC32(crc);
+       COMP_CRC32(crc, &ControlFile.checkPointCopy, sizeof(CheckPoint));
+       COMP_CRC32(crc, (char *) record + sizeof(pg_crc32),
+                          SizeOfXLogRecord - sizeof(pg_crc32));
+       FIN_CRC32(crc);
        record->xl_crc = crc;
 
        /* Write the first page */
index 71a22180a3e44b5c6d555d178f73d577be81de9f..ef5eb5b82b03a4fd3bdba8ab5dc099ae1f4b2e56 100644 (file)
 
 
 /*
- * Header for each record in XLOG
+ * The overall layout of an XLOG record is:
+ *             Fixed-size header (XLogRecord struct)
+ *             rmgr-specific data
+ *             BkpBlock
+ *             backup block data
+ *             BkpBlock
+ *             backup block data
+ *             ...
  *
- * NOTE: xl_len counts only the rmgr data, not the XLogRecord header,
- * and also not any backup blocks appended to the record (which are signaled
- * by xl_info flag bits).  The total space needed for an XLOG record is
- * really:
- *
- * SizeOfXLogRecord + xl_len + n_backup_blocks * (sizeof(BkpBlock) + BLCKSZ)
+ * where there can be zero to three backup blocks (as signaled by xl_info flag
+ * bits).  XLogRecord structs always start on MAXALIGN boundaries in the WAL
+ * files, and we round up SizeOfXLogRecord so that the rmgr data is also
+ * guaranteed to begin on a MAXALIGN boundary.  However, no padding is added
+ * to align BkpBlock structs or backup block data.
  *
- * rounded up to a MAXALIGN boundary (so that all xlog records start on
- * MAXALIGN boundaries).
+ * NOTE: xl_len counts only the rmgr data, not the XLogRecord header,
+ * and also not any backup blocks.  xl_tot_len counts everything.  Neither
+ * length field is rounded up to an alignment boundary.
  */
 typedef struct XLogRecord
 {
-       crc64           xl_crc;                 /* CRC for this record */
+       pg_crc32        xl_crc;                 /* CRC for this record */
        XLogRecPtr      xl_prev;                /* ptr to previous record in log */
        TransactionId xl_xid;           /* xact id */
+       uint32          xl_tot_len;             /* total len of entire record */
        uint32          xl_len;                 /* total len of rmgr data */
        uint8           xl_info;                /* flag bits, see below */
        RmgrId          xl_rmid;                /* resource manager for this record */
index 1f7ad8238d6cfb5fdd5e21c14332c0e95201d184..32e11bbe898f4a6cd8ee65305a358c24f11bbe5e 100644 (file)
 /*
  * Header info for a backup block appended to an XLOG record.
  *
- * Note that the backup block has its own CRC, and is not covered by
- * the CRC of the XLOG record proper.  Also note that we don't attempt
- * to align either the BkpBlock struct or the block's data.
+ * As a trivial form of data compression, the XLOG code is aware that
+ * PG data pages usually contain an unused "hole" in the middle, which
+ * contains only zero bytes.  If hole_length > 0 then we have removed
+ * such a "hole" from the stored data (and it's not counted in the
+ * XLOG record's CRC, either).  Hence, the amount of block data actually
+ * present following the BkpBlock struct is BLCKSZ - hole_length bytes.
+ *
+ * Note that we don't attempt to align either the BkpBlock struct or the
+ * block's data.  So, the struct must be copied to aligned local storage
+ * before use.
  */
 typedef struct BkpBlock
 {
-       crc64           crc;
-       RelFileNode node;
-       BlockNumber block;
+       RelFileNode node;                       /* relation containing block */
+       BlockNumber block;                      /* block number */
+       uint16          hole_offset;    /* number of bytes before "hole" */
+       uint16          hole_length;    /* number of bytes in "hole" */
+
+       /* ACTUAL BLOCK DATA FOLLOWS AT END OF STRUCT */
 } BkpBlock;
 
 /*
@@ -42,8 +52,9 @@ typedef struct BkpBlock
  * XLogRecord header will never be split across pages; if there's less than
  * SizeOfXLogRecord space left at the end of a page, we just waste it.)
  *
- * Note that xl_rem_len includes backup-block data, unlike xl_len in the
- * initial header.
+ * Note that xl_rem_len includes backup-block data; that is, it tracks
+ * xl_tot_len not xl_len in the initial header.  Also note that the
+ * continuation data isn't necessarily aligned.
  */
 typedef struct XLogContRecord
 {
@@ -53,12 +64,12 @@ typedef struct XLogContRecord
 
 } XLogContRecord;
 
-#define SizeOfXLogContRecord   MAXALIGN(sizeof(XLogContRecord))
+#define SizeOfXLogContRecord   sizeof(XLogContRecord)
 
 /*
  * Each page of XLOG file has a header like this:
  */
-#define XLOG_PAGE_MAGIC 0xD05C /* can be used as WAL version indicator */
+#define XLOG_PAGE_MAGIC 0xD05D /* can be used as WAL version indicator */
 
 typedef struct XLogPageHeaderData
 {
index 1d4d9f3d8bf1dd24dd1c2f569e366ddd23066c9c..2220b7a1136a5250c6fb35a1160e431c7b9372f3 100644 (file)
@@ -22,7 +22,7 @@
 
 
 /* Version identifier for this pg_control format */
-#define PG_CONTROL_VERSION     81
+#define PG_CONTROL_VERSION     810
 
 /*
  * Body of CheckPoint XLOG records.  This is declared here because we keep
@@ -73,12 +73,17 @@ typedef enum DBState
 
 typedef struct ControlFileData
 {
-       crc64           crc;                    /* CRC for remainder of struct */
+       /*
+        * Unique system identifier --- to ensure we match up xlog files with
+        * the installation that produced them.
+        */
+       uint64          system_identifier;
 
        /*
-        * Version identifier information.      Keep these fields at the front,
+        * Version identifier information.      Keep these fields at the same offset,
         * especially pg_control_version; they won't be real useful if they
-        * move around.
+        * move around.  (For historical reasons they must be 8 bytes into
+        * the file rather than immediately at the front.)
         *
         * pg_control_version identifies the format of pg_control itself.
         * catalog_version_no identifies the format of the system catalogs.
@@ -90,12 +95,6 @@ typedef struct ControlFileData
        uint32          pg_control_version;             /* PG_CONTROL_VERSION */
        uint32          catalog_version_no;             /* see catversion.h */
 
-       /*
-        * Unique system identifier --- to ensure we match up xlog files with
-        * the installation that produced them.
-        */
-       uint64          system_identifier;
-
        /*
         * System status data
         */
@@ -127,6 +126,9 @@ typedef struct ControlFileData
        uint32          localeBuflen;
        char            lc_collate[LOCALE_NAME_BUFLEN];
        char            lc_ctype[LOCALE_NAME_BUFLEN];
+
+       /* CRC of all above ... MUST BE LAST! */
+       pg_crc32        crc;
 } ControlFileData;
 
 #endif   /* PG_CONTROL_H */
index 342028cfeb953c9425c1bea850af0edf563aaabe..57d1ecb265c6aa1ea1a42541b0c88a0f101eadca 100644 (file)
@@ -1,7 +1,18 @@
 /*
  * pg_crc.h
  *
- * PostgreSQL 64-bit CRC support
+ * PostgreSQL CRC support
+ *
+ * See Ross Williams' excellent introduction
+ * A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS, available from
+ * ftp://ftp.rocksoft.com/papers/crc_v3.txt or several other net sites.
+ *
+ * We use a normal (not "reflected", in Williams' terms) CRC, using initial
+ * all-ones register contents and a final bit inversion.
+ *
+ * The 64-bit variant is not used as of PostgreSQL 8.1, but we retain the
+ * code for possible future use.
+ *
  *
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
 #ifndef PG_CRC_H
 #define PG_CRC_H
 
+
+typedef uint32 pg_crc32;
+
+/* Initialize a CRC accumulator */
+#define INIT_CRC32(crc) ((crc) = 0xFFFFFFFF)
+
+/* Finish a CRC calculation */
+#define FIN_CRC32(crc) ((crc) ^= 0xFFFFFFFF)
+
+/* Accumulate some (more) bytes into a CRC */
+#define COMP_CRC32(crc, data, len)     \
+do { \
+       unsigned char *__data = (unsigned char *) (data); \
+       uint32          __len = (len); \
+\
+       while (__len-- > 0) \
+       { \
+               int             __tab_index = ((int) ((crc) >> 24) ^ *__data++) & 0xFF; \
+               (crc) = pg_crc32_table[__tab_index] ^ ((crc) << 8); \
+       } \
+} while (0)
+
+/* Check for equality of two CRCs */
+#define EQ_CRC32(c1,c2)  ((c1) == (c2))
+
+/* Constant table for CRC calculation */
+extern const uint32 pg_crc32_table[];
+
+
+#ifdef PROVIDE_64BIT_CRC
+
 /*
  * If we have a 64-bit integer type, then a 64-bit CRC looks just like the
- * usual sort of implementation.  (See Ross Williams' excellent introduction
- * A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS, available from
- * ftp://ftp.rocksoft.com/papers/crc_v3.txt or several other net sites.)
- * If we have no working 64-bit type, then fake it with two 32-bit registers.
- *
- * The present implementation is a normal (not "reflected", in Williams'
- * terms) 64-bit CRC, using initial all-ones register contents and a final
- * bit inversion.  The chosen polynomial is borrowed from the DLT1 spec
- * (ECMA-182, available from http://www.ecma.ch/ecma1/STAND/ECMA-182.HTM):
- *
- * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
- * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
- * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
- * x^7 + x^4 + x + 1
+ * usual sort of implementation.  If we have no working 64-bit type, then
+ * fake it with two 32-bit registers.  (Note: experience has shown that the
+ * two-32-bit-registers code is as fast as, or even much faster than, the
+ * 64-bit code on all but true 64-bit machines.  INT64_IS_BUSTED is therefore
+ * probably the wrong control symbol to use to select the implementation.)
  */
 
 #ifdef INT64_IS_BUSTED
  * all machines, we could do a configure test to decide how to order the
  * two fields, but it seems not worth the trouble.
  */
-typedef struct crc64
+typedef struct pg_crc64
 {
        uint32          crc0;
        uint32          crc1;
-} crc64;
+} pg_crc64;
 
 /* Initialize a CRC accumulator */
 #define INIT_CRC64(crc) ((crc).crc0 = 0xffffffff, (crc).crc1 = 0xffffffff)
@@ -62,8 +95,8 @@ do { \
        while (__len-- > 0) \
        { \
                int             __tab_index = ((int) (__crc1 >> 24) ^ *__data++) & 0xFF; \
-               __crc1 = crc_table1[__tab_index] ^ ((__crc1 << 8) | (__crc0 >> 24)); \
-               __crc0 = crc_table0[__tab_index] ^ (__crc0 << 8); \
+               __crc1 = pg_crc64_table1[__tab_index] ^ ((__crc1 << 8) | (__crc0 >> 24)); \
+               __crc0 = pg_crc64_table0[__tab_index] ^ (__crc0 << 8); \
        } \
        (crc).crc0 = __crc0; \
        (crc).crc1 = __crc1; \
@@ -73,15 +106,15 @@ do { \
 #define EQ_CRC64(c1,c2)  ((c1).crc0 == (c2).crc0 && (c1).crc1 == (c2).crc1)
 
 /* Constant table for CRC calculation */
-extern const uint32 crc_table0[];
-extern const uint32 crc_table1[];
+extern const uint32 pg_crc64_table0[];
+extern const uint32 pg_crc64_table1[];
 
 #else                                                  /* int64 works */
 
-typedef struct crc64
+typedef struct pg_crc64
 {
        uint64          crc0;
-} crc64;
+} pg_crc64;
 
 /* Initialize a CRC accumulator */
 #define INIT_CRC64(crc) ((crc).crc0 = UINT64CONST(0xffffffffffffffff))
@@ -99,7 +132,7 @@ do { \
        while (__len-- > 0) \
        { \
                int             __tab_index = ((int) (__crc0 >> 56) ^ *__data++) & 0xFF; \
-               __crc0 = crc_table[__tab_index] ^ (__crc0 << 8); \
+               __crc0 = pg_crc64_table[__tab_index] ^ (__crc0 << 8); \
        } \
        (crc).crc0 = __crc0; \
 } while (0)
@@ -108,7 +141,9 @@ do { \
 #define EQ_CRC64(c1,c2)  ((c1).crc0 == (c2).crc0)
 
 /* Constant table for CRC calculation */
-extern const uint64 crc_table[];
+extern const uint64 pg_crc64_table[];
 #endif   /* INT64_IS_BUSTED */
 
+#endif /* PROVIDE_64BIT_CRC */
+
 #endif   /* PG_CRC_H */