*** a/src/backend/access/transam/xlog.c
--- b/src/backend/access/transam/xlog.c
***************
*** 6095,6102 **** StartupXLOG(void)
  			StartupSUBTRANS(oldestActiveXID);
  			StartupMultiXact();
  
- 			ProcArrayInitRecoveryInfo(oldestActiveXID);
- 
  			/*
  			 * If we're beginning at a shutdown checkpoint, we know that
  			 * nothing was running on the master at this point. So fake-up an
--- 6095,6100 ----
*** a/src/backend/storage/ipc/procarray.c
--- b/src/backend/storage/ipc/procarray.c
***************
*** 435,453 **** ProcArrayClearTransaction(PGPROC *proc)
  }
  
  /*
-  * ProcArrayInitRecoveryInfo
-  *
-  * When trying to assemble our snapshot we only care about xids after this value.
-  * See comments for LogStandbySnapshot().
-  */
- void
- ProcArrayInitRecoveryInfo(TransactionId oldestActiveXid)
- {
- 	latestObservedXid = oldestActiveXid;
- 	TransactionIdRetreat(latestObservedXid);
- }
- 
- /*
   * ProcArrayApplyRecoveryInfo -- apply recovery info about xids
   *
   * Takes us through 3 states: Initialized, Pending and Ready.
--- 435,440 ----
***************
*** 519,533 **** ProcArrayApplyRecoveryInfo(RunningTransactions running)
  	Assert(standbyState == STANDBY_INITIALIZED);
  
  	/*
! 	 * OK, we need to initialise from the RunningTransactionsData record
! 	 */
! 
! 	/*
! 	 * Remove all xids except xids later than the snapshot. We don't know
! 	 * exactly which ones that is until precisely now, so that is why we allow
! 	 * xids to be added only to remove most of them again here.
  	 */
- 	ExpireOldKnownAssignedTransactionIds(running->nextXid);
  	StandbyReleaseOldLocks(running->nextXid);
  
  	/*
--- 506,514 ----
  	Assert(standbyState == STANDBY_INITIALIZED);
  
  	/*
! 	 * Release any locks belonging to old transactions that are not
! 	 * running according to the running-xacts record.
  	 */
  	StandbyReleaseOldLocks(running->nextXid);
  
  	/*
***************
*** 536,544 **** ProcArrayApplyRecoveryInfo(RunningTransactions running)
  	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
  
  	/*
- 	 * Combine the running xact data with already known xids, if any exist.
  	 * KnownAssignedXids is sorted so we cannot just add new xids, we have to
! 	 * combine them first, sort them and then re-add to KnownAssignedXids.
  	 *
  	 * Some of the new xids are top-level xids and some are subtransactions.
  	 * We don't call SubtransSetParent because it doesn't matter yet. If we
--- 517,524 ----
  	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
  
  	/*
  	 * KnownAssignedXids is sorted so we cannot just add new xids, we have to
! 	 * sort them first.
  	 *
  	 * Some of the new xids are top-level xids and some are subtransactions.
  	 * We don't call SubtransSetParent because it doesn't matter yet. If we
***************
*** 547,597 **** ProcArrayApplyRecoveryInfo(RunningTransactions running)
  	 * xids to subtrans. If RunningXacts is overflowed then we don't have
  	 * enough information to correctly update subtrans anyway.
  	 */
  
  	/*
! 	 * Allocate a temporary array so we can combine xids. The total of both
! 	 * arrays should never normally exceed TOTAL_MAX_CACHED_SUBXIDS.
  	 */
! 	xids = palloc(sizeof(TransactionId) * TOTAL_MAX_CACHED_SUBXIDS);
  
  	/*
! 	 * Get the remaining KnownAssignedXids. In most cases there won't be any
! 	 * at all since this exists only to catch a theoretical race condition.
! 	 */
! 	nxids = KnownAssignedXidsGet(xids, InvalidTransactionId);
! 	if (nxids > 0)
! 		KnownAssignedXidsDisplay(trace_recovery(DEBUG3));
! 
! 	/*
! 	 * Now we have a copy of any KnownAssignedXids we can zero the array
! 	 * before we re-insert combined snapshot.
! 	 */
! 	KnownAssignedXidsRemovePreceding(InvalidTransactionId);
! 
! 	/*
! 	 * Add to the temp array any xids which have not already completed, taking
! 	 * care not to overflow in extreme cases.
  	 */
  	for (i = 0; i < running->xcnt; i++)
  	{
  		TransactionId xid = running->xids[i];
  
  		/*
! 		 * The running-xacts snapshot can contain xids that were running at
! 		 * the time of the snapshot, yet complete before the snapshot was
! 		 * written to WAL. They're running now, so ignore them.
  		 */
  		if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid))
  			continue;
  
  		xids[nxids++] = xid;
- 
- 		/*
- 		 * Test for overflow only after we have filtered out already complete
- 		 * transactions.
- 		 */
- 		if (nxids > TOTAL_MAX_CACHED_SUBXIDS)
- 			elog(ERROR, "too many xids to add into KnownAssignedXids");
  	}
  
  	if (nxids > 0)
--- 527,558 ----
  	 * xids to subtrans. If RunningXacts is overflowed then we don't have
  	 * enough information to correctly update subtrans anyway.
  	 */
+ 	Assert(procArray->numKnownAssignedXids == 0);
  
  	/*
! 	 * Allocate a temporary array to avoid modifying the array passed as
! 	 * argument.
  	 */
! 	xids = palloc(sizeof(TransactionId) * running->xcnt);
  
  	/*
! 	 * Add to the temp array any xids which have not already completed.
  	 */
+ 	nxids = 0;
  	for (i = 0; i < running->xcnt; i++)
  	{
  		TransactionId xid = running->xids[i];
  
  		/*
! 		 * The running-xacts snapshot can contain xids that were still visible
! 		 * in the procarray when the snapshot was taken, but were already
! 		 * WAL-logged as completed. They're not running anymore, so ignore
! 		 * them.
  		 */
  		if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid))
  			continue;
  
  		xids[nxids++] = xid;
  	}
  
  	if (nxids > 0)
***************
*** 603,621 **** ProcArrayApplyRecoveryInfo(RunningTransactions running)
  		qsort(xids, nxids, sizeof(TransactionId), xidComparator);
  
  		/*
- 		 * Re-initialise latestObservedXid to the highest xid we've seen.
- 		 */
- 		latestObservedXid = xids[nxids - 1];
- 
- 		/*
  		 * Add the sorted snapshot into KnownAssignedXids
  		 */
  		for (i = 0; i < nxids; i++)
! 		{
! 			TransactionId xid = xids[i];
! 
! 			KnownAssignedXidsAdd(xid, xid, true);
! 		}
  
  		KnownAssignedXidsDisplay(trace_recovery(DEBUG3));
  	}
--- 564,573 ----
  		qsort(xids, nxids, sizeof(TransactionId), xidComparator);
  
  		/*
  		 * Add the sorted snapshot into KnownAssignedXids
  		 */
  		for (i = 0; i < nxids; i++)
! 			KnownAssignedXidsAdd(xids[i], xids[i], true);
  
  		KnownAssignedXidsDisplay(trace_recovery(DEBUG3));
  	}
***************
*** 623,630 **** ProcArrayApplyRecoveryInfo(RunningTransactions running)
  	pfree(xids);
  
  	/*
! 	 * Now we've got the running xids we need to set the global values thare
! 	 * used to track snapshots as they evolve further
  	 *
  	 * * latestCompletedXid which will be the xmax for snapshots *
  	 * lastOverflowedXid which shows whether snapshots overflow * nextXid
--- 575,582 ----
  	pfree(xids);
  
  	/*
! 	 * Now we've got the running xids we need to set the global values that
! 	 * are used to track snapshots as they evolve further.
  	 *
  	 * * latestCompletedXid which will be the xmax for snapshots *
  	 * lastOverflowedXid which shows whether snapshots overflow * nextXid
***************
*** 633,668 **** ProcArrayApplyRecoveryInfo(RunningTransactions running)
  	 * but the recovery snapshot isn't fully valid yet because we know there
  	 * are some subxids missing. We don't know the specific subxids that are
  	 * missing, so conservatively assume the last one is latestObservedXid.
- 	 * If no missing subxids, try to clear lastOverflowedXid.
- 	 *
- 	 * If the snapshot didn't overflow it's still possible that an overflow
- 	 * occurred in the gap between taking snapshot and logging record, so we
- 	 * also need to check if lastOverflowedXid is already ahead of us.
  	 */
  	if (running->subxid_overflow)
  	{
  		standbyState = STANDBY_SNAPSHOT_PENDING;
  
  		standbySnapshotPendingXmin = latestObservedXid;
! 		if (TransactionIdFollows(latestObservedXid,
! 								 procArray->lastOverflowedXid))
! 			procArray->lastOverflowedXid = latestObservedXid;
! 	}
! 	else if (TransactionIdFollows(procArray->lastOverflowedXid,
! 								  latestObservedXid))
! 	{
! 		standbyState = STANDBY_SNAPSHOT_PENDING;
! 
! 		standbySnapshotPendingXmin = procArray->lastOverflowedXid;
  	}
  	else
  	{
  		standbyState = STANDBY_SNAPSHOT_READY;
  
  		standbySnapshotPendingXmin = InvalidTransactionId;
! 		if (TransactionIdFollows(running->oldestRunningXid,
! 								 procArray->lastOverflowedXid))
! 			procArray->lastOverflowedXid = InvalidTransactionId;
  	}
  
  	/*
--- 585,607 ----
  	 * but the recovery snapshot isn't fully valid yet because we know there
  	 * are some subxids missing. We don't know the specific subxids that are
  	 * missing, so conservatively assume the last one is latestObservedXid.
  	 */
+ 	latestObservedXid = running->nextXid;
+ 	TransactionIdRetreat(latestObservedXid);
+ 
  	if (running->subxid_overflow)
  	{
  		standbyState = STANDBY_SNAPSHOT_PENDING;
  
  		standbySnapshotPendingXmin = latestObservedXid;
! 		procArray->lastOverflowedXid = latestObservedXid;
  	}
  	else
  	{
  		standbyState = STANDBY_SNAPSHOT_READY;
  
  		standbySnapshotPendingXmin = InvalidTransactionId;
! 		procArray->lastOverflowedXid = InvalidTransactionId;
  	}
  
  	/*
***************
*** 1407,1412 **** GetSnapshotData(Snapshot snapshot)
--- 1346,1355 ----
   * Similar to GetSnapshotData but returns more information. We include
   * all PGPROCs with an assigned TransactionId, even VACUUM processes.
   *
+  * We acquire XidGenLock, but the caller is responsible for releasing it.
+  * This ensures that no new XIDs enter the proc array until the caller has
+  * WAL-logged this snapshot, and releases the lock.
+  *
   * The returned data structure is statically allocated; caller should not
   * modify it, and must not assume it is valid past the next call.
   *
***************
*** 1526,1532 **** GetRunningTransactionData(void)
  	CurrentRunningXacts->oldestRunningXid = oldestRunningXid;
  	CurrentRunningXacts->latestCompletedXid = latestCompletedXid;
  
! 	LWLockRelease(XidGenLock);
  	LWLockRelease(ProcArrayLock);
  
  	Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid));
--- 1469,1475 ----
  	CurrentRunningXacts->oldestRunningXid = oldestRunningXid;
  	CurrentRunningXacts->latestCompletedXid = latestCompletedXid;
  
! 	/* We don't release XidGenLock here, the caller is responsible for that */
  	LWLockRelease(ProcArrayLock);
  
  	Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid));
***************
*** 2337,2346 **** DisplayXidCache(void)
   *		unobserved XIDs.
   *
   * RecordKnownAssignedTransactionIds() should be run for *every* WAL record
!  * type apart from XLOG_RUNNING_XACTS (since that initialises the first
!  * snapshot so that RecordKnownAssignedTransactionIds() can be called). Must
!  * be called for each record after we have executed StartupCLOG() et al,
!  * since we must ExtendCLOG() etc..
   *
   * Called during recovery in analogy with and in place of GetNewTransactionId()
   */
--- 2280,2287 ----
   *		unobserved XIDs.
   *
   * RecordKnownAssignedTransactionIds() should be run for *every* WAL record
!  * associated with a transaction. Must be called for each record after we
!  * have executed StartupCLOG() et al, since we must ExtendCLOG() etc..
   *
   * Called during recovery in analogy with and in place of GetNewTransactionId()
   */
***************
*** 2348,2360 **** void
  RecordKnownAssignedTransactionIds(TransactionId xid)
  {
  	Assert(standbyState >= STANDBY_INITIALIZED);
- 	Assert(TransactionIdIsValid(latestObservedXid));
  	Assert(TransactionIdIsValid(xid));
  
  	elog(trace_recovery(DEBUG4), "record known xact %u latestObservedXid %u",
  		 xid, latestObservedXid);
  
  	/*
  	 * When a newly observed xid arrives, it is frequently the case that it is
  	 * *not* the next xid in sequence. When this occurs, we must treat the
  	 * intervening xids as running also.
--- 2289,2308 ----
  RecordKnownAssignedTransactionIds(TransactionId xid)
  {
  	Assert(standbyState >= STANDBY_INITIALIZED);
  	Assert(TransactionIdIsValid(xid));
  
  	elog(trace_recovery(DEBUG4), "record known xact %u latestObservedXid %u",
  		 xid, latestObservedXid);
  
  	/*
+ 	 * If the KnownAssignedXids machinery isn't up yet, do nothing.
+ 	 */
+ 	if (standbyState == STANDBY_INITIALIZED)
+ 		return;
+ 
+ 	Assert(TransactionIdIsValid(latestObservedXid));
+ 
+ 	/*
  	 * When a newly observed xid arrives, it is frequently the case that it is
  	 * *not* the next xid in sequence. When this occurs, we must treat the
  	 * intervening xids as running also.
*** a/src/backend/storage/ipc/standby.c
--- b/src/backend/storage/ipc/standby.c
***************
*** 671,677 **** StandbyReleaseAllLocks(void)
  /*
   * StandbyReleaseOldLocks
   *		Release standby locks held by XIDs < removeXid, as long
!  *		as their not prepared transactions.
   */
  void
  StandbyReleaseOldLocks(TransactionId removeXid)
--- 671,677 ----
  /*
   * StandbyReleaseOldLocks
   *		Release standby locks held by XIDs < removeXid, as long
!  *		as they're not prepared transactions.
   */
  void
  StandbyReleaseOldLocks(TransactionId removeXid)
***************
*** 848,861 **** LogStandbySnapshot(TransactionId *oldestActiveXid, TransactionId *nextXid)
  	 * record we write, because standby will open up when it sees this.
  	 */
  	running = GetRunningTransactionData();
- 
- 	/*
- 	 * The gap between GetRunningTransactionData() and
- 	 * LogCurrentRunningXacts() is what most of the fuss is about here, so
- 	 * artifically extending this interval is a great way to test the little
- 	 * used parts of the code.
- 	 */
  	LogCurrentRunningXacts(running);
  
  	*oldestActiveXid = running->oldestRunningXid;
  	*nextXid = running->nextXid;
--- 848,856 ----
  	 * record we write, because standby will open up when it sees this.
  	 */
  	running = GetRunningTransactionData();
  	LogCurrentRunningXacts(running);
+ 	/* GetRunningTransactionData() acquired XidGenLock, we must release it */
+ 	LWLockRelease(XidGenLock);
  
  	*oldestActiveXid = running->oldestRunningXid;
  	*nextXid = running->nextXid;
*** a/src/include/storage/procarray.h
--- b/src/include/storage/procarray.h
***************
*** 28,34 **** extern void ProcArrayRemove(PGPROC *proc, TransactionId latestXid);
  extern void ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid);
  extern void ProcArrayClearTransaction(PGPROC *proc);
  
- extern void ProcArrayInitRecoveryInfo(TransactionId oldestActiveXid);
  extern void ProcArrayApplyRecoveryInfo(RunningTransactions running);
  extern void ProcArrayApplyXidAssignment(TransactionId topxid,
  							int nsubxids, TransactionId *subxids);
--- 28,33 ----
