>From 463cdb627c47b2e3945ae87fb6f594252be3c570 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Mon, 19 Aug 2013 13:24:30 +0200
Subject: [PATCH 2/8] wal_decoding: Log xl_running_xact's at a higher frequency
 than checkpoints are done

Logging information about running xacts more frequently is beneficial for both,
hot standby which can reach consistency faster and release some resources
earlier using this information, and future logical replication which can
initialize quicker using this.

Do so in the background writer which seems to be the best choice as its
regularly running and shouldn't be busy for too long without getting back into
its main loop.

Also mark xl_running_xact records as being relevant for async commit so the wal
writer writes them out soonish instead of possibly waiting a long time.
---
 src/backend/postmaster/bgwriter.c | 62 +++++++++++++++++++++++++++++++++++++++
 src/backend/storage/ipc/standby.c | 27 ++++++++++++++---
 src/include/storage/standby.h     |  2 +-
 3 files changed, 86 insertions(+), 5 deletions(-)

diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
index 286ae86..13d57c5 100644
--- a/src/backend/postmaster/bgwriter.c
+++ b/src/backend/postmaster/bgwriter.c
@@ -54,9 +54,11 @@
 #include "storage/shmem.h"
 #include "storage/smgr.h"
 #include "storage/spin.h"
+#include "storage/standby.h"
 #include "utils/guc.h"
 #include "utils/memutils.h"
 #include "utils/resowner.h"
+#include "utils/timestamp.h"
 
 
 /*
@@ -71,6 +73,20 @@ int			BgWriterDelay = 200;
 #define HIBERNATE_FACTOR			50
 
 /*
+ * Interval in which standby snapshots are logged into the WAL stream, in
+ * milliseconds.
+ */
+#define LOG_SNAPSHOT_INTERVAL_MS 15000
+
+/*
+ * LSN and timestamp at which we last issued a LogStandbySnapshot(), to avoid
+ * doing so too often or repeatedly if there has been no other write activity
+ * in the system.
+ */
+static TimestampTz last_snapshot_ts;
+static XLogRecPtr last_snapshot_lsn = InvalidXLogRecPtr;
+
+/*
  * Flags set by interrupt handlers for later service in the main loop.
  */
 static volatile sig_atomic_t got_SIGHUP = false;
@@ -142,6 +158,12 @@ BackgroundWriterMain(void)
 	CurrentResourceOwner = ResourceOwnerCreate(NULL, "Background Writer");
 
 	/*
+	 * We just started, assume there has been either a shutdown or
+	 * end-of-recovery snapshot.
+	 */
+	last_snapshot_ts = GetCurrentTimestamp();
+
+	/*
 	 * Create a memory context that we will do all our work in.  We do this so
 	 * that we can reset the context during error recovery and thereby avoid
 	 * possible memory leaks.  Formerly this code just ran in
@@ -276,6 +298,46 @@ BackgroundWriterMain(void)
 		}
 
 		/*
+		 * Log a new xl_running_xacts every now and then so replication can get
+		 * into a consistent state faster (think of suboverflowed snapshots)
+		 * and clean up resources (locks, KnownXids*) more frequently. The
+		 * costs of this are relatively low, so doing it 4 times
+		 * (LOG_SNAPSHOT_INTERVAL_MS) a minute seems fine.
+		 *
+		 * We assume the interval for writing xl_running_xacts is
+		 * significantly bigger than BgWriterDelay, so we don't complicate the
+		 * overall timeout handling but just assume we're going to get called
+		 * often enough even if hibernation mode is active. It's not that
+		 * important that log_snap_interval_ms is met strictly. To make sure
+		 * we're not waking the disk up unneccesarily on an idle system we
+		 * check whether there has been any WAL inserted since the last time
+		 * we've logged a running xacts.
+		 *
+		 * We do this logging in the bgwriter as its the only process thats
+		 * run regularly and returns to its mainloop all the
+		 * time. E.g. Checkpointer, when active, is barely ever in its
+		 * mainloop and thus makes it hard to log regularly.
+		 */
+		if (XLogStandbyInfoActive() && !RecoveryInProgress())
+		{
+			TimestampTz timeout = 0;
+			TimestampTz now = GetCurrentTimestamp();
+			timeout = TimestampTzPlusMilliseconds(last_snapshot_ts,
+												  LOG_SNAPSHOT_INTERVAL_MS);
+
+			/*
+			 * only log if enough time has passed and some xlog record has been
+			 * inserted.
+			 */
+			if (now >= timeout &&
+				last_snapshot_lsn != GetXLogInsertRecPtr())
+			{
+				last_snapshot_lsn = LogStandbySnapshot();
+				last_snapshot_ts = now;
+			}
+		}
+
+		/*
 		 * Sleep until we are signaled or BgWriterDelay has elapsed.
 		 *
 		 * Note: the feedback control loop in BgBufferSync() expects that we
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
index c704412..97da1a0 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -42,7 +42,7 @@ static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlis
 									   ProcSignalReason reason);
 static void ResolveRecoveryConflictWithLock(Oid dbOid, Oid relOid);
 static void SendRecoveryConflictWithBufferPin(ProcSignalReason reason);
-static void LogCurrentRunningXacts(RunningTransactions CurrRunningXacts);
+static XLogRecPtr LogCurrentRunningXacts(RunningTransactions CurrRunningXacts);
 static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks);
 
 
@@ -853,10 +853,13 @@ standby_redo(XLogRecPtr lsn, XLogRecord *record)
  * currently running xids, performed by StandbyReleaseOldLocks().
  * Zero xids should no longer be possible, but we may be replaying WAL
  * from a time when they were possible.
+ *
+ * Returns the RecPtr of the last inserted record.
  */
-void
+XLogRecPtr
 LogStandbySnapshot(void)
 {
+	XLogRecPtr recptr;
 	RunningTransactions running;
 	xl_standby_lock *locks;
 	int			nlocks;
@@ -876,9 +879,12 @@ LogStandbySnapshot(void)
 	 * record we write, because standby will open up when it sees this.
 	 */
 	running = GetRunningTransactionData();
-	LogCurrentRunningXacts(running);
+	recptr = LogCurrentRunningXacts(running);
+
 	/* GetRunningTransactionData() acquired XidGenLock, we must release it */
 	LWLockRelease(XidGenLock);
+
+	return recptr;
 }
 
 /*
@@ -889,7 +895,7 @@ LogStandbySnapshot(void)
  * is a contiguous chunk of memory and never exists fully until it is
  * assembled in WAL.
  */
-static void
+static XLogRecPtr
 LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
 {
 	xl_running_xacts xlrec;
@@ -939,6 +945,19 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
 			 CurrRunningXacts->oldestRunningXid,
 			 CurrRunningXacts->latestCompletedXid,
 			 CurrRunningXacts->nextXid);
+
+	/*
+	 * Ensure running_xacts information is synced to disk not too far in the
+	 * future. We don't want to stall anything though (i.e. use XLogFlush()),
+	 * so we let the wal writer do it during normal
+	 * operation. XLogSetAsyncXactLSN() conveniently will mark the LSN as
+	 * to-be-synced and nudge the WALWriter into action if sleeping. Check
+	 * XLogBackgroundFlush() for details why a record might not be flushed
+	 * without it.
+	 */
+	XLogSetAsyncXactLSN(recptr);
+
+	return recptr;
 }
 
 /*
diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h
index 7f3f051..d4a8fe4 100644
--- a/src/include/storage/standby.h
+++ b/src/include/storage/standby.h
@@ -113,6 +113,6 @@ typedef RunningTransactionsData *RunningTransactions;
 extern void LogAccessExclusiveLock(Oid dbOid, Oid relOid);
 extern void LogAccessExclusiveLockPrepare(void);
 
-extern void LogStandbySnapshot(void);
+extern XLogRecPtr LogStandbySnapshot(void);
 
 #endif   /* STANDBY_H */
-- 
1.8.4.21.g992c386.dirty

