From 0692b4c88a8ffa3ffd8f5e083745d616ed152a6f Mon Sep 17 00:00:00 2001
From: Craig Ringer <craig@2ndquadrant.com>
Date: Tue, 23 Feb 2016 16:00:09 +0800
Subject: [PATCH 3/8] Retain extra WAL for failover slots in base backups

Change the return value of pg_start_backup(), the BASE_BACKUP walsender
command, etc to report the minimum WAL required by any failover slot if
this is a lower LSN than the redo position so that base backups contain
the WAL required for slots to work.

Add a new backup label entry 'MIN FAILOVER SLOT LSN' that, if present,
indicates the minimum LSN needed by any failover slot that is present in
the base backup. Backup tools should check for this entry and ensure
they retain all xlogs including and after that point.
---
 src/backend/access/transam/xlog.c | 40 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index a92f09d..74b7b23 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -9797,6 +9797,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
 	bool		backup_started_in_recovery = false;
 	XLogRecPtr	checkpointloc;
 	XLogRecPtr	startpoint;
+	XLogRecPtr  slot_startpoint;
 	TimeLineID	starttli;
 	pg_time_t	stamp_time;
 	char		strfbuf[128];
@@ -9943,6 +9944,16 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
 			checkpointfpw = ControlFile->checkPointCopy.fullPageWrites;
 			LWLockRelease(ControlFileLock);
 
+			/*
+			 * If failover slots are in use we must retain and transfer WAL
+			 * older than the redo location so that those slots can be replayed
+			 * from after a failover event.
+			 *
+			 * This MUST be at an xlog segment boundary so truncate the LSN
+			 * appropriately.
+			 */
+			slot_startpoint = (ReplicationSlotsComputeRequiredLSN(true)/ XLOG_SEG_SIZE) * XLOG_SEG_SIZE;
+
 			if (backup_started_in_recovery)
 			{
 				XLogRecPtr	recptr;
@@ -10111,6 +10122,10 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
 						 backup_started_in_recovery ? "standby" : "master");
 		appendStringInfo(&labelfbuf, "START TIME: %s\n", strfbuf);
 		appendStringInfo(&labelfbuf, "LABEL: %s\n", backupidstr);
+		if (slot_startpoint != InvalidXLogRecPtr)
+			appendStringInfo(&labelfbuf,  "MIN FAILOVER SLOT LSN: %X/%X\n",
+						(uint32)(slot_startpoint>>32), (uint32)slot_startpoint);
+
 
 		/*
 		 * Okay, write the file, or return its contents to caller.
@@ -10204,9 +10219,34 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
 
 	/*
 	 * We're done.  As a convenience, return the starting WAL location.
+	 *
+	 * pg_basebackup etc expect to use this as the position to start copying
+	 * WAL from, so we should return the minimum of the slot start LSN and the
+	 * current redo position to make sure we get all WAL required by failover
+	 * slots.
+	 *
+	 * The min required LSN for failover slots is also available from the
+	 * 'MIN FAILOVER SLOT LSN' entry in the backup label file.
 	 */
+	if (slot_startpoint != InvalidXLogRecPtr && slot_startpoint < startpoint)
+	{
+		List *history;
+		TimeLineID slot_start_tli;
+
+		/* Min LSN required by a slot may be on an older timeline. */
+		history = readTimeLineHistory(ThisTimeLineID);
+		slot_start_tli = tliOfPointInHistory(slot_startpoint, history);
+		list_free_deep(history);
+
+		if (slot_start_tli < starttli)
+			starttli = slot_start_tli;
+
+		startpoint = slot_startpoint;
+	}
+
 	if (starttli_p)
 		*starttli_p = starttli;
+
 	return startpoint;
 }
 
-- 
2.1.0

