commit e20cb1e1713f2e37b3e98475a35c9b40842d20a3
Author: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date:   Thu Jun 7 16:09:23 2012 +0300

    Allow WAL record headers to be split across pages.
    
    Rearrange XLogRecord so that xl_tot_len is the first field, to make it
    easier to reassemble records.

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 6935149..3f5e0b2 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -650,7 +650,9 @@ static void CleanupBackupHistory(void);
 static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force);
 static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt);
 static void CheckRecoveryConsistency(void);
-static bool ValidXLOGHeader(XLogPageHeader hdr, int emode);
+static bool ValidXLogPageHeader(XLogPageHeader hdr, int emode);
+static bool ValidXLogRecordHeader(XLogRecPtr *RecPtr, XLogRecord *record,
+					  int emode, bool randAccess);
 static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt);
 static List *readTimeLineHistory(TimeLineID targetTLI);
 static bool existsTimeLineHistory(TimeLineID probeTLI);
@@ -692,7 +694,6 @@ XLogRecPtr
 XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
 {
 	XLogCtlInsert *Insert = &XLogCtl->Insert;
-	XLogRecord *record;
 	XLogRecPtr	RecPtr;
 	XLogRecPtr	WriteRqst;
 	uint32		freespace;
@@ -706,6 +707,7 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
 	XLogRecData dtbuf_rdt1[XLR_MAX_BKP_BLOCKS];
 	XLogRecData dtbuf_rdt2[XLR_MAX_BKP_BLOCKS];
 	XLogRecData dtbuf_rdt3[XLR_MAX_BKP_BLOCKS];
+	XLogRecData hdr_rdt;
 	pg_crc32	rdata_crc;
 	uint32		len,
 				write_len;
@@ -714,6 +716,15 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
 	bool		doPageWrites;
 	bool		isLogSwitch = (rmid == RM_XLOG_ID && info == XLOG_SWITCH);
 	uint8		info_orig = info;
+	static XLogRecord *rechdr;
+
+	if (rechdr == NULL)
+	{
+		rechdr = malloc(SizeOfXLogRecord);
+		if (rechdr == NULL)
+			elog(ERROR, "out of memory");
+		MemSet(rechdr, 0, SizeOfXLogRecord);
+	}
 
 	/* cross-check on whether we should be here or not */
 	if (!XLogInsertAllowed())
@@ -900,6 +911,22 @@ begin:;
 	for (rdt = rdata; rdt != NULL; rdt = rdt->next)
 		COMP_CRC32(rdata_crc, rdt->data, rdt->len);
 
+	/*
+	 * Construct record header (prev-link and CRC are filled in later), and
+	 * make that the first chunk in the chain.
+	 */
+	rechdr->xl_xid = GetCurrentTransactionIdIfAny();
+	rechdr->xl_tot_len = SizeOfXLogRecord + write_len;
+	rechdr->xl_len = len;		/* doesn't include backup blocks */
+	rechdr->xl_info = info;
+	rechdr->xl_rmid = rmid;
+
+	hdr_rdt.next = rdata;
+	hdr_rdt.data = (char *) rechdr;
+	hdr_rdt.len = SizeOfXLogRecord;
+
+	write_len += SizeOfXLogRecord;
+
 	START_CRIT_SECTION();
 
 	/* Now wait to get insert lock */
@@ -959,12 +986,12 @@ begin:;
 	}
 
 	/*
-	 * If there isn't enough space on the current XLOG page for a record
-	 * header, advance to the next page (leaving the unused space as zeroes).
+	 * If the current page is completely full, the record goes to the next
+	 * page, right after the page header.
 	 */
 	updrqst = false;
 	freespace = INSERT_FREESPACE(Insert);
-	if (freespace < SizeOfXLogRecord)
+	if (freespace == 0)
 	{
 		updrqst = AdvanceXLInsertBuffer(false);
 		freespace = INSERT_FREESPACE(Insert);
@@ -1006,21 +1033,13 @@ begin:;
 		return RecPtr;
 	}
 
-	/* Insert record header */
-
-	record = (XLogRecord *) Insert->currpos;
-	record->xl_prev = Insert->PrevRecord;
-	record->xl_xid = GetCurrentTransactionIdIfAny();
-	record->xl_tot_len = SizeOfXLogRecord + write_len;
-	record->xl_len = len;		/* doesn't include backup blocks */
-	record->xl_info = info;
-	record->xl_rmid = rmid;
+	/* Finish the record header */
+	rechdr->xl_prev = Insert->PrevRecord;
 
 	/* Now we can finish computing the record's CRC */
-	COMP_CRC32(rdata_crc, (char *) record + sizeof(pg_crc32),
-			   SizeOfXLogRecord - sizeof(pg_crc32));
+	COMP_CRC32(rdata_crc, (char *) rechdr, offsetof(XLogRecord, xl_crc));
 	FIN_CRC32(rdata_crc);
-	record->xl_crc = rdata_crc;
+	rechdr->xl_crc = rdata_crc;
 
 #ifdef WAL_DEBUG
 	if (XLOG_DEBUG)
@@ -1030,11 +1049,11 @@ begin:;
 		initStringInfo(&buf);
 		appendStringInfo(&buf, "INSERT @ %X/%X: ",
 						 RecPtr.xlogid, RecPtr.xrecoff);
-		xlog_outrec(&buf, record);
+		xlog_outrec(&buf, rechdr);
 		if (rdata->data != NULL)
 		{
 			appendStringInfo(&buf, " - ");
-			RmgrTable[record->xl_rmid].rm_desc(&buf, record->xl_info, rdata->data);
+			RmgrTable[rechdr->xl_rmid].rm_desc(&buf, rechdr->xl_info, rdata->data);
 		}
 		elog(LOG, "%s", buf.data);
 		pfree(buf.data);
@@ -1045,12 +1064,10 @@ begin:;
 	ProcLastRecPtr = RecPtr;
 	Insert->PrevRecord = RecPtr;
 
-	Insert->currpos += SizeOfXLogRecord;
-	freespace -= SizeOfXLogRecord;
-
 	/*
 	 * Append the data, including backup blocks if any
 	 */
+	rdata = &hdr_rdt;
 	while (write_len)
 	{
 		while (rdata->data == NULL)
@@ -1168,7 +1185,7 @@ begin:;
 		/* normal case, ie not xlog switch */
 
 		/* Need to update shared LogwrtRqst if some block was filled up */
-		if (freespace < SizeOfXLogRecord)
+		if (freespace == 0)
 		{
 			/* curridx is filled and available for writing out */
 			updrqst = true;
@@ -2087,7 +2104,7 @@ XLogFlush(XLogRecPtr record)
 				XLogCtlInsert *Insert = &XLogCtl->Insert;
 				uint32		freespace = INSERT_FREESPACE(Insert);
 
-				if (freespace < SizeOfXLogRecord)		/* buffer is full */
+				if (freespace == 0)		/* buffer is full */
 					WriteRqstPtr = XLogCtl->xlblocks[Insert->curridx];
 				else
 				{
@@ -3694,8 +3711,7 @@ RecordIsValid(XLogRecord *record, XLogRecPtr recptr, int emode)
 	}
 
 	/* Finally include the record header */
-	COMP_CRC32(crc, (char *) record + sizeof(pg_crc32),
-			   SizeOfXLogRecord - sizeof(pg_crc32));
+	COMP_CRC32(crc, (char *) record, offsetof(XLogRecord, xl_crc));
 	FIN_CRC32(crc);
 
 	if (!EQ_CRC32(record->xl_crc, crc))
@@ -3725,13 +3741,13 @@ static XLogRecord *
 ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt)
 {
 	XLogRecord *record;
-	char	   *buffer;
 	XLogRecPtr	tmpRecPtr = EndRecPtr;
 	bool		randAccess = false;
 	uint32		len,
 				total_len;
 	uint32		targetRecOff;
 	uint32		pageHeaderSize;
+	bool		gotheader;
 
 	if (readBuf == NULL)
 	{
@@ -3744,6 +3760,10 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt)
 		 */
 		readBuf = (char *) malloc(XLOG_BLCKSZ);
 		Assert(readBuf != NULL);
+
+		readRecordBuf = malloc(XLOG_BLCKSZ);
+		Assert(readRecordBuf != NULL);
+		readRecordBufSize = XLOG_BLCKSZ;
 	}
 
 	if (RecPtr == NULL)
@@ -3751,17 +3771,10 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt)
 		RecPtr = &tmpRecPtr;
 
 		/*
-		 * RecPtr is pointing to end+1 of the previous WAL record.  We must
-		 * advance it if necessary to where the next record starts.  First,
-		 * align to next page if no more records can fit on the current page.
-		 */
-		if (XLOG_BLCKSZ - (RecPtr->xrecoff % XLOG_BLCKSZ) < SizeOfXLogRecord)
-			NextLogPage(*RecPtr);
-
-		/*
-		 * If at page start, we must skip over the page header.  But we can't
-		 * do that until we've read in the page, since the header size is
-		 * variable.
+		 * RecPtr is pointing to end+1 of the previous WAL record.  If
+		 * we're at a page boundary, no more records can fir on the current
+		 * page. We must skip over the page header, but we can't do that
+		 * until we've read in the page, since the header size is variable.
 		 */
 	}
 	else
@@ -3782,7 +3795,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt)
 		 * to go backwards (but we can't reset that variable right here, since
 		 * we might not change files at all).
 		 */
-		lastPageTLI = 0;		/* see comment in ValidXLOGHeader */
+		lastPageTLI = 0;		/* see comment in ValidXLogPageHeader */
 		randAccess = true;		/* allow curFileTLI to go backwards too */
 	}
 
@@ -3822,77 +3835,17 @@ retry:
 						RecPtr->xlogid, RecPtr->xrecoff)));
 		goto next_record_is_invalid;
 	}
-	record = (XLogRecord *) ((char *) readBuf + RecPtr->xrecoff % XLOG_BLCKSZ);
 
 	/*
-	 * xl_len == 0 is bad data for everything except XLOG SWITCH, where it is
-	 * required.
+	 * NB: Even though we use an XLogRecord pointer here, the whole record
+	 * header might not fit on this page. xl_tot_len is the first field in
+	 * struct, so it must be on this page, but we cannot safely access any
+	 * other fields yet.
 	 */
-	if (record->xl_rmid == RM_XLOG_ID && record->xl_info == XLOG_SWITCH)
-	{
-		if (record->xl_len != 0)
-		{
-			ereport(emode_for_corrupt_record(emode, *RecPtr),
-					(errmsg("invalid xlog switch record at %X/%X",
-							RecPtr->xlogid, RecPtr->xrecoff)));
-			goto next_record_is_invalid;
-		}
-	}
-	else if (record->xl_len == 0)
-	{
-		ereport(emode_for_corrupt_record(emode, *RecPtr),
-				(errmsg("record with zero length at %X/%X",
-						RecPtr->xlogid, RecPtr->xrecoff)));
-		goto next_record_is_invalid;
-	}
-	if (record->xl_tot_len < SizeOfXLogRecord + record->xl_len ||
-		record->xl_tot_len > SizeOfXLogRecord + record->xl_len +
-		XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ))
-	{
-		ereport(emode_for_corrupt_record(emode, *RecPtr),
-				(errmsg("invalid record length at %X/%X",
-						RecPtr->xlogid, RecPtr->xrecoff)));
-		goto next_record_is_invalid;
-	}
-	if (record->xl_rmid > RM_MAX_ID)
-	{
-		ereport(emode_for_corrupt_record(emode, *RecPtr),
-				(errmsg("invalid resource manager ID %u at %X/%X",
-						record->xl_rmid, RecPtr->xlogid, RecPtr->xrecoff)));
-		goto next_record_is_invalid;
-	}
-	if (randAccess)
-	{
-		/*
-		 * We can't exactly verify the prev-link, but surely it should be less
-		 * than the record's own address.
-		 */
-		if (!XLByteLT(record->xl_prev, *RecPtr))
-		{
-			ereport(emode_for_corrupt_record(emode, *RecPtr),
-					(errmsg("record with incorrect prev-link %X/%X at %X/%X",
-							record->xl_prev.xlogid, record->xl_prev.xrecoff,
-							RecPtr->xlogid, RecPtr->xrecoff)));
-			goto next_record_is_invalid;
-		}
-	}
-	else
-	{
-		/*
-		 * Record's prev-link should exactly match our previous location. This
-		 * check guards against torn WAL pages where a stale but valid-looking
-		 * WAL record starts on a sector boundary.
-		 */
-		if (!XLByteEQ(record->xl_prev, ReadRecPtr))
-		{
-			ereport(emode_for_corrupt_record(emode, *RecPtr),
-					(errmsg("record with incorrect prev-link %X/%X at %X/%X",
-							record->xl_prev.xlogid, record->xl_prev.xrecoff,
-							RecPtr->xlogid, RecPtr->xrecoff)));
-			goto next_record_is_invalid;
-		}
-	}
+	record = (XLogRecord *) (readBuf + RecPtr->xrecoff % XLOG_BLCKSZ);
+	total_len = record->xl_tot_len;
 
+	/* Make sure the record buffer can hold the whole record. */
 	/*
 	 * Allocate or enlarge readRecordBuf as needed.  To avoid useless small
 	 * increases, round its size to a multiple of XLOG_BLCKSZ, and make sure
@@ -3900,16 +3853,17 @@ retry:
 	 * enough for all "normal" records, but very large commit or abort records
 	 * might need more space.)
 	 */
-	total_len = record->xl_tot_len;
 	if (total_len > readRecordBufSize)
 	{
 		uint32		newSize = total_len;
 
 		newSize += XLOG_BLCKSZ - (newSize % XLOG_BLCKSZ);
 		newSize = Max(newSize, 4 * Max(BLCKSZ, XLOG_BLCKSZ));
-		if (readRecordBuf)
-			free(readRecordBuf);
-		readRecordBuf = (char *) malloc(newSize);
+		if (!readRecordBuf)
+			readRecordBuf = (char *) malloc(newSize);
+		else
+			readRecordBuf = (char *) realloc(readRecordBuf, newSize);
+
 		if (!readRecordBuf)
 		{
 			readRecordBufSize = 0;
@@ -3922,7 +3876,19 @@ retry:
 		readRecordBufSize = newSize;
 	}
 
-	buffer = readRecordBuf;
+	/*
+	 * If we got the whole header already, validate it immediately. Otherwise
+	 * we validate it after reading the rest of the header from the next page.
+	 */
+	if (targetRecOff <= XLOG_BLCKSZ - SizeOfXLogRecord)
+	{
+		if (!ValidXLogRecordHeader(RecPtr, record, emode, randAccess))
+			goto next_record_is_invalid;
+		gotheader = true;
+	}
+	else
+		gotheader = false;
+
 	len = XLOG_BLCKSZ - RecPtr->xrecoff % XLOG_BLCKSZ;
 	if (total_len > len)
 	{
@@ -3930,16 +3896,19 @@ retry:
 		char	   *contrecord;
 		XLogPageHeader pageHeader;
 		XLogRecPtr	pagelsn;
-		uint32		gotlen = len;
+		char	   *buffer;
+		uint32		gotlen;
 
 		/* Initialize pagelsn to the beginning of the page this record is on */
 		pagelsn = *RecPtr;
 		pagelsn.xrecoff = (pagelsn.xrecoff / XLOG_BLCKSZ) * XLOG_BLCKSZ;
 
-		memcpy(buffer, record, len);
-		record = (XLogRecord *) buffer;
-		buffer += len;
-		for (;;)
+		/* Copy the first fragment of the record from the first page. */
+		memcpy(readRecordBuf, readBuf + RecPtr->xrecoff % XLOG_BLCKSZ, len);
+		buffer = readRecordBuf + len;
+		gotlen = len;
+
+		do
 		{
 			/* Calculate pointer to beginning of next page */
 			XLByteAdvance(pagelsn, XLOG_BLCKSZ);
@@ -3947,8 +3916,9 @@ retry:
 			if (!XLogPageRead(&pagelsn, emode, false, false))
 				return NULL;
 
-			/* Check that the continuation record looks valid */
-			if (!(((XLogPageHeader) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD))
+			/* Check that the continuation on next page looks valid */
+			pageHeader = (XLogPageHeader) readBuf;
+			if (!(pageHeader->xlp_info & XLP_FIRST_IS_CONTRECORD))
 			{
 				ereport(emode_for_corrupt_record(emode, *RecPtr),
 						(errmsg("there is no contrecord flag in log segment %s, offset %u",
@@ -3956,14 +3926,13 @@ retry:
 								readOff)));
 				goto next_record_is_invalid;
 			}
-			pageHeader = (XLogPageHeader) readBuf;
-			pageHeaderSize = XLogPageHeaderSize(pageHeader);
-			contrecord = (char *) readBuf + pageHeaderSize;
+			/*
+			 * Cross-check that xlp_rem_len agrees with how much of the record
+			 * we expect there to be left.
+			 */
 			if (pageHeader->xlp_rem_len == 0 ||
 				total_len != (pageHeader->xlp_rem_len + gotlen))
 			{
-				char fname[MAXFNAMELEN];
-				XLogFileName(fname, curFileTLI, readSegNo);
 				ereport(emode_for_corrupt_record(emode, *RecPtr),
 						(errmsg("invalid contrecord length %u in log segment %s, offset %u",
 								pageHeader->xlp_rem_len,
@@ -3971,17 +3940,28 @@ retry:
 								readOff)));
 				goto next_record_is_invalid;
 			}
+
+			/* Append the continuation from this page to the buffer */
+			pageHeaderSize = XLogPageHeaderSize(pageHeader);
+			contrecord = (char *) readBuf + pageHeaderSize;
 			len = XLOG_BLCKSZ - pageHeaderSize;
-			if (pageHeader->xlp_rem_len > len)
+			if (pageHeader->xlp_rem_len < len)
+				len = pageHeader->xlp_rem_len;
+			memcpy(buffer, (char *) contrecord, len);
+			buffer += len;
+			gotlen += len;
+
+			/* If we just reassembled the record header, validate it. */
+			if (!gotheader)
 			{
-				memcpy(buffer, (char *) contrecord, len);
-				gotlen += len;
-				buffer += len;
-				continue;
+				record = (XLogRecord *) readRecordBuf;
+				if (!ValidXLogRecordHeader(RecPtr, record, emode, randAccess))
+					goto next_record_is_invalid;
+				gotheader = true;
 			}
-			memcpy(buffer, (char *) contrecord, pageHeader->xlp_rem_len);
-			break;
-		}
+		} while (pageHeader->xlp_rem_len > len);
+
+		record = (XLogRecord *) readRecordBuf;
 		if (!RecordIsValid(record, *RecPtr, emode))
 			goto next_record_is_invalid;
 		pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf);
@@ -3990,18 +3970,18 @@ retry:
 			readOff + pageHeaderSize + MAXALIGN(pageHeader->xlp_rem_len),
 			EndRecPtr);
 		ReadRecPtr = *RecPtr;
-		/* needn't worry about XLOG SWITCH, it can't cross page boundaries */
-		return record;
 	}
+	else
+	{
+		/* Record does not cross a page boundary */
+		if (!RecordIsValid(record, *RecPtr, emode))
+			goto next_record_is_invalid;
+		EndRecPtr.xlogid = RecPtr->xlogid;
+		EndRecPtr.xrecoff = RecPtr->xrecoff + MAXALIGN(total_len);
 
-	/* Record does not cross a page boundary */
-	if (!RecordIsValid(record, *RecPtr, emode))
-		goto next_record_is_invalid;
-	EndRecPtr.xlogid = RecPtr->xlogid;
-	EndRecPtr.xrecoff = RecPtr->xrecoff + MAXALIGN(total_len);
-
-	ReadRecPtr = *RecPtr;
-	memcpy(buffer, record, total_len);
+		ReadRecPtr = *RecPtr;
+		memcpy(readRecordBuf, record, total_len);
+	}
 
 	/*
 	 * Special processing if it's an XLOG SWITCH record
@@ -4019,7 +3999,7 @@ retry:
 		 */
 		readOff = XLogSegSize - XLOG_BLCKSZ;
 	}
-	return (XLogRecord *) buffer;
+	return record;
 
 next_record_is_invalid:
 	failedSources |= readSource;
@@ -4044,7 +4024,7 @@ next_record_is_invalid:
  * ReadRecord.	It's not intended for use from anywhere else.
  */
 static bool
-ValidXLOGHeader(XLogPageHeader hdr, int emode)
+ValidXLogPageHeader(XLogPageHeader hdr, int emode)
 {
 	XLogRecPtr	recaddr;
 
@@ -4163,6 +4143,88 @@ ValidXLOGHeader(XLogPageHeader hdr, int emode)
 }
 
 /*
+ * Validate an XLOG record header.
+ *
+ * This is just a convenience subroutine to avoid duplicated code in
+ * ReadRecord.	It's not intended for use from anywhere else.
+ */
+static bool
+ValidXLogRecordHeader(XLogRecPtr *RecPtr, XLogRecord *record, int emode,
+					  bool randAccess)
+{
+	/*
+	 * xl_len == 0 is bad data for everything except XLOG SWITCH, where it is
+	 * required.
+	 */
+	if (record->xl_rmid == RM_XLOG_ID && record->xl_info == XLOG_SWITCH)
+	{
+		if (record->xl_len != 0)
+		{
+			ereport(emode_for_corrupt_record(emode, *RecPtr),
+					(errmsg("invalid xlog switch record at %X/%X",
+							RecPtr->xlogid, RecPtr->xrecoff)));
+			return false;
+		}
+	}
+	else if (record->xl_len == 0)
+	{
+		ereport(emode_for_corrupt_record(emode, *RecPtr),
+				(errmsg("record with zero length at %X/%X",
+						RecPtr->xlogid, RecPtr->xrecoff)));
+		return false;
+	}
+	if (record->xl_tot_len < SizeOfXLogRecord + record->xl_len ||
+		record->xl_tot_len > SizeOfXLogRecord + record->xl_len +
+		XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ))
+	{
+		ereport(emode_for_corrupt_record(emode, *RecPtr),
+				(errmsg("invalid record length at %X/%X",
+						RecPtr->xlogid, RecPtr->xrecoff)));
+		return false;
+	}
+	if (record->xl_rmid > RM_MAX_ID)
+	{
+		ereport(emode_for_corrupt_record(emode, *RecPtr),
+				(errmsg("invalid resource manager ID %u at %X/%X",
+						record->xl_rmid, RecPtr->xlogid, RecPtr->xrecoff)));
+		return false;
+	}
+	if (randAccess)
+	{
+		/*
+		 * We can't exactly verify the prev-link, but surely it should be less
+		 * than the record's own address.
+		 */
+		if (!XLByteLT(record->xl_prev, *RecPtr))
+		{
+			ereport(emode_for_corrupt_record(emode, *RecPtr),
+					(errmsg("record with incorrect prev-link %X/%X at %X/%X",
+							record->xl_prev.xlogid, record->xl_prev.xrecoff,
+							RecPtr->xlogid, RecPtr->xrecoff)));
+			return false;
+		}
+	}
+	else
+	{
+		/*
+		 * Record's prev-link should exactly match our previous location. This
+		 * check guards against torn WAL pages where a stale but valid-looking
+		 * WAL record starts on a sector boundary.
+		 */
+		if (!XLByteEQ(record->xl_prev, ReadRecPtr))
+		{
+			ereport(emode_for_corrupt_record(emode, *RecPtr),
+					(errmsg("record with incorrect prev-link %X/%X at %X/%X",
+							record->xl_prev.xlogid, record->xl_prev.xrecoff,
+							RecPtr->xlogid, RecPtr->xrecoff)));
+			return false;
+		}
+	}
+
+	return true;
+}
+
+/*
  * Try to read a timeline's history file.
  *
  * If successful, return the list of component TLIs (the given TLI followed by
@@ -5171,8 +5233,7 @@ BootStrapXLOG(void)
 
 	INIT_CRC32(crc);
 	COMP_CRC32(crc, &checkPoint, sizeof(checkPoint));
-	COMP_CRC32(crc, (char *) record + sizeof(pg_crc32),
-			   SizeOfXLogRecord - sizeof(pg_crc32));
+	COMP_CRC32(crc, (char *) record, offsetof(XLogRecord, xl_crc));
 	FIN_CRC32(crc);
 	record->xl_crc = crc;
 
@@ -7707,7 +7768,7 @@ CreateCheckPoint(int flags)
 	 * checkpoint, even though physically before it.  Got that?
 	 */
 	freespace = INSERT_FREESPACE(Insert);
-	if (freespace < SizeOfXLogRecord)
+	if (freespace == 0)
 	{
 		(void) AdvanceXLInsertBuffer(false);
 		/* OK to ignore update return flag, since we will do flush anyway */
@@ -10269,7 +10330,7 @@ retry:
 							fname, readOff)));
 			goto next_record_is_invalid;
 		}
-		if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode))
+		if (!ValidXLogPageHeader((XLogPageHeader) readBuf, emode))
 			goto next_record_is_invalid;
 	}
 
@@ -10295,7 +10356,7 @@ retry:
 				fname, readOff)));
 		goto next_record_is_invalid;
 	}
-	if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode))
+	if (!ValidXLogPageHeader((XLogPageHeader) readBuf, emode))
 		goto next_record_is_invalid;
 
 	Assert(targetSegNo == readSegNo);
diff --git a/src/bin/pg_resetxlog/pg_resetxlog.c b/src/bin/pg_resetxlog/pg_resetxlog.c
index 0012cff..15f2b27 100644
--- a/src/bin/pg_resetxlog/pg_resetxlog.c
+++ b/src/bin/pg_resetxlog/pg_resetxlog.c
@@ -942,8 +942,7 @@ WriteEmptyXLOG(void)
 
 	INIT_CRC32(crc);
 	COMP_CRC32(crc, &ControlFile.checkPointCopy, sizeof(CheckPoint));
-	COMP_CRC32(crc, (char *) record + sizeof(pg_crc32),
-			   SizeOfXLogRecord - sizeof(pg_crc32));
+	COMP_CRC32(crc, (char *) record, offsetof(XLogRecord, xl_crc));
 	FIN_CRC32(crc);
 	record->xl_crc = crc;
 
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index b581910..ec79870 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -40,15 +40,16 @@
  */
 typedef struct XLogRecord
 {
-	pg_crc32	xl_crc;			/* CRC for this record */
-	XLogRecPtr	xl_prev;		/* ptr to previous record in log */
-	TransactionId xl_xid;		/* xact id */
 	uint32		xl_tot_len;		/* total len of entire record */
+	TransactionId xl_xid;		/* xact id */
 	uint32		xl_len;			/* total len of rmgr data */
 	uint8		xl_info;		/* flag bits, see below */
 	RmgrId		xl_rmid;		/* resource manager for this record */
+	/* 2 bytes of padding here, initialize to zero */
+	XLogRecPtr	xl_prev;		/* ptr to previous record in log */
+	pg_crc32	xl_crc;			/* CRC for this record */
 
-	/* Depending on MAXALIGN, there are either 2 or 6 wasted bytes here */
+	/* If MAXALIGN==8, there are 4 wasted bytes here */
 
 	/* ACTUAL LOG DATA FOLLOWS AT END OF STRUCT */
 
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
index 239b749..a958856 100644
--- a/src/include/access/xlog_internal.h
+++ b/src/include/access/xlog_internal.h
@@ -63,9 +63,7 @@ typedef struct XLogPageHeaderData
 	/*
 	 * When there is not enough space on current page for whole record, we
 	 * continue on the next page.  xlp_rem_len is the number of bytes
-	 * remaining from a previous page. (However, the XLogRecord header will
-	 * never be split across pages; if there's less than SizeOfXLogRecord
-	 * space left at the end of a page, we just waste it.)
+	 * remaining from a previous page.
 	 *
 	 * Note that xl_rem_len includes backup-block data; that is, it tracks
 	 * xl_tot_len not xl_len in the initial header.  Also note that the
