*** a/src/backend/access/heap/heapam.c
--- b/src/backend/access/heap/heapam.c
***************
*** 4763,4768 **** heap_xlog_inplace(XLogRecPtr lsn, XLogRecord *record)
--- 4763,4802 ----
  	UnlockReleaseBuffer(buffer);
  }
  
+ static void
+ heap_xlog_copyrelfile(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	xl_heap_copyrelfile *xlrec = (xl_heap_copyrelfile *) XLogRecGetData(record);
+ 
+ 	/*
+ 	 * No conflict processing required for XLOG_HEAP2_COPYFILE records
+ 	 */
+ 
+ 	/*
+ 	 * If this is just crash recovery then we already synced the
+ 	 * changes to disk, so we don't need to replay the copy. That
+ 	 * is equivalent to skipping writing WAL in other cases.
+ 	 */
+ 	if (InArchiveRecovery)
+ 	{
+ 		SMgrRelation src = smgropen(xlrec->node);
+ 		SMgrRelation dst = smgropen(xlrec->dstnode);
+ 
+ 		Relation srcrel = CreateFakeRelcacheEntry(src->smgr_rnode);
+ 
+ 		/*
+ 		 * Flush all buffers for the source relfilenode before
+ 		 * we do the copy outside of shared buffers.
+ 		 */
+ 		FlushRelationBuffers(srcrel);
+ 
+ 		/*
+ 		 * Replay the copy and sync
+ 		 */
+ 		heap_copy_relfilenode(src, dst, xlrec->forkNum, false);
+ 	}
+ }
+ 
  void
  heap_redo(XLogRecPtr lsn, XLogRecord *record)
  {
***************
*** 4824,4829 **** heap2_redo(XLogRecPtr lsn, XLogRecord *record)
--- 4858,4866 ----
  		case XLOG_HEAP2_CLEANUP_INFO:
  			heap_xlog_cleanup_info(lsn, record);
  			break;
+ 		case XLOG_HEAP2_COPYFILE:
+ 			heap_xlog_copyrelfile(lsn, record);
+ 			break;
  		default:
  			elog(PANIC, "heap2_redo: unknown op code %u", info);
  	}
***************
*** 4952,4961 **** heap2_desc(StringInfo buf, uint8 xl_info, char *rec)
--- 4989,5090 ----
  		appendStringInfo(buf, "cleanup info: remxid %u",
  						 xlrec->latestRemovedXid);
  	}
+ 	else if (info == XLOG_HEAP2_COPYFILE)
+ 	{
+ 		xl_heap_copyrelfile *xlrec = (xl_heap_copyrelfile *) rec;
+ 
+ 		appendStringInfo(buf, "copy relfilenode: source rel %u/%u/%u;"
+ 							" destination rel %u/%u/%u; fork %u",
+ 						 xlrec->node.spcNode, xlrec->node.dbNode,
+ 						 xlrec->node.relNode,
+ 						 xlrec->dstnode.spcNode, xlrec->dstnode.dbNode,
+ 						 xlrec->dstnode.relNode,
+ 						 xlrec->forkNum);
+ 	}
  	else
  		appendStringInfo(buf, "UNKNOWN");
  }
  
+ static void
+ log_copyrelfiles(SMgrRelation src, SMgrRelation dst,
+ 				   ForkNumber forkNum)
+ {
+ 	xl_heap_copyrelfile xlrec;
+ 	XLogRecPtr	recptr;
+ 	XLogRecData rdata[1];
+ 
+ 	START_CRIT_SECTION();
+ 
+ 	xlrec.node = src->smgr_rnode;
+ 	xlrec.dstnode = dst->smgr_rnode;
+ 	xlrec.forkNum = forkNum;
+ 
+ 	rdata[0].data = (char *) &xlrec;
+ 	rdata[0].len = SizeOfHeapCopyRelFile;
+ 	rdata[0].buffer = InvalidBuffer;
+ 	rdata[0].next = NULL;
+ 
+ 	recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_COPYFILE, rdata);
+ 
+ 	END_CRIT_SECTION();
+ }
+ 
+ /*
+  *	heap_copy_relfilenode		- copy source to destination relfilenode
+  *
+  * Copy data, block by block and then sync the file. All done outside
+  * shared buffers, so can only be executed with AccessExclusiveLock.
+  *
+  * Both smgrs must already exist
+  */
+ void
+ heap_copy_relfilenode(SMgrRelation src, SMgrRelation dst,
+ 				   ForkNumber forkNum, bool istemp)
+ {
+ 	BlockNumber nblocks;
+ 	BlockNumber blkno;
+ 	char		buf[BLCKSZ];
+ 
+ 	/*
+ 	 * We log a single instruction to truncate the destination and copy
+ 	 * from source, to allow us to avoid writing WAL for every block.
+ 	 * The WAL message is a no-op unless we are InArchiveRecovery.
+ 	 */
+ 	if (!InRecovery && !istemp)
+ 		log_copyrelfiles(src, dst, forkNum);
+ 
+ 	nblocks = smgrnblocks(src, forkNum);
+ 
+ 	for (blkno = 0; blkno < nblocks; blkno++)
+ 	{
+ 		smgrread(src, forkNum, blkno, buf);
+ 
+ 		/*
+ 		 * Now write the page.	We say isTemp = true even if it's not a temp
+ 		 * rel, because there's no need for smgr to schedule an fsync for this
+ 		 * write; we'll do it ourselves below.
+ 		 */
+ 		smgrextend(dst, forkNum, blkno, buf, true);
+ 	}
+ 
+ 	/*
+ 	 * If the rel isn't temp, we must fsync it down to disk before it's safe
+ 	 * to commit the transaction.  (For a temp rel we don't care since the rel
+ 	 * will be uninteresting after a crash anyway.)
+ 	 *
+ 	 * It's obvious that we must do this when not WAL-logging the copy. It's
+ 	 * less obvious that we have to do it even if we did WAL-log the copied
+ 	 * pages. The reason is that since we're copying outside shared buffers, a
+ 	 * CHECKPOINT occurring during the copy has no way to flush the previously
+ 	 * written data to disk (indeed it won't know the new rel even exists).  A
+ 	 * crash later on would replay WAL from the checkpoint, therefore it
+ 	 * wouldn't replay our earlier WAL entries. If we do not fsync those pages
+ 	 * here, they might still not be on disk when the crash occurs.
+ 	 */
+ 	if (!istemp)
+ 		smgrimmedsync(dst, forkNum);
+ }
+ 
  /*
   *	heap_sync		- sync a heap, for use when no WAL has been written
   *
*** a/src/backend/access/transam/xlog.c
--- b/src/backend/access/transam/xlog.c
***************
*** 163,170 **** static bool LocalRecoveryInProgress = true;
   */
  static int	LocalXLogInsertAllowed = -1;
  
! /* Are we recovering using offline XLOG archives? */
! static bool InArchiveRecovery = false;
  
  /* Was the last xlog file restored from archive, or local? */
  static bool restoredFromArchive = false;
--- 163,170 ----
   */
  static int	LocalXLogInsertAllowed = -1;
  
! /* Are we doing more than a crash recovery? */
! bool InArchiveRecovery = false;
  
  /* Was the last xlog file restored from archive, or local? */
  static bool restoredFromArchive = false;
*** a/src/backend/commands/tablecmds.c
--- b/src/backend/commands/tablecmds.c
***************
*** 329,336 **** static void ATExecEnableDisableRule(Relation rel, char *rulename,
  						char fires_when);
  static void ATExecAddInherit(Relation rel, RangeVar *parent);
  static void ATExecDropInherit(Relation rel, RangeVar *parent);
- static void copy_relation_data(SMgrRelation rel, SMgrRelation dst,
- 				   ForkNumber forkNum, bool istemp);
  static const char *storage_name(char c);
  
  
--- 329,334 ----
***************
*** 6995,7001 **** ATExecSetTableSpace(Oid tableOid, Oid newTableSpace)
  	RelationCreateStorage(newrnode, rel->rd_istemp);
  
  	/* copy main fork */
! 	copy_relation_data(rel->rd_smgr, dstrel, MAIN_FORKNUM, rel->rd_istemp);
  
  	/* copy those extra forks that exist */
  	for (forkNum = MAIN_FORKNUM + 1; forkNum <= MAX_FORKNUM; forkNum++)
--- 6993,6999 ----
  	RelationCreateStorage(newrnode, rel->rd_istemp);
  
  	/* copy main fork */
! 	heap_copy_relfilenode(rel->rd_smgr, dstrel, MAIN_FORKNUM, rel->rd_istemp);
  
  	/* copy those extra forks that exist */
  	for (forkNum = MAIN_FORKNUM + 1; forkNum <= MAX_FORKNUM; forkNum++)
***************
*** 7003,7009 **** ATExecSetTableSpace(Oid tableOid, Oid newTableSpace)
  		if (smgrexists(rel->rd_smgr, forkNum))
  		{
  			smgrcreate(dstrel, forkNum, false);
! 			copy_relation_data(rel->rd_smgr, dstrel, forkNum, rel->rd_istemp);
  		}
  	}
  
--- 7001,7007 ----
  		if (smgrexists(rel->rd_smgr, forkNum))
  		{
  			smgrcreate(dstrel, forkNum, false);
! 			heap_copy_relfilenode(rel->rd_smgr, dstrel, forkNum, rel->rd_istemp);
  		}
  	}
  
***************
*** 7021,7040 **** ATExecSetTableSpace(Oid tableOid, Oid newTableSpace)
  
  	heap_close(pg_class, RowExclusiveLock);
  
- 	/*
- 	 * Write an XLOG UNLOGGED record if WAL-logging was skipped because WAL
- 	 * archiving is not enabled.
- 	 */
- 	if (!XLogIsNeeded() && !rel->rd_istemp)
- 	{
- 		char		reason[NAMEDATALEN + 40];
- 
- 		snprintf(reason, sizeof(reason), "ALTER TABLE SET TABLESPACE on \"%s\"",
- 				 RelationGetRelationName(rel));
- 
- 		XLogReportUnloggedStatement(reason);
- 	}
- 
  	relation_close(rel, NoLock);
  
  	/* Make sure the reltablespace change is visible */
--- 7019,7024 ----
***************
*** 7048,7112 **** ATExecSetTableSpace(Oid tableOid, Oid newTableSpace)
  }
  
  /*
-  * Copy data, block by block
-  */
- static void
- copy_relation_data(SMgrRelation src, SMgrRelation dst,
- 				   ForkNumber forkNum, bool istemp)
- {
- 	bool		use_wal;
- 	BlockNumber nblocks;
- 	BlockNumber blkno;
- 	char		buf[BLCKSZ];
- 	Page		page = (Page) buf;
- 
- 	/*
- 	 * We need to log the copied data in WAL iff WAL archiving/streaming is
- 	 * enabled AND it's not a temp rel.
- 	 *
- 	 * Note: If you change the conditions here, update the conditions in
- 	 * ATExecSetTableSpace() for when an XLOG UNLOGGED record is written to
- 	 * match.
- 	 */
- 	use_wal = XLogIsNeeded() && !istemp;
- 
- 	nblocks = smgrnblocks(src, forkNum);
- 
- 	for (blkno = 0; blkno < nblocks; blkno++)
- 	{
- 		smgrread(src, forkNum, blkno, buf);
- 
- 		/* XLOG stuff */
- 		if (use_wal)
- 			log_newpage(&dst->smgr_rnode, forkNum, blkno, page);
- 
- 		/*
- 		 * Now write the page.	We say isTemp = true even if it's not a temp
- 		 * rel, because there's no need for smgr to schedule an fsync for this
- 		 * write; we'll do it ourselves below.
- 		 */
- 		smgrextend(dst, forkNum, blkno, buf, true);
- 	}
- 
- 	/*
- 	 * If the rel isn't temp, we must fsync it down to disk before it's safe
- 	 * to commit the transaction.  (For a temp rel we don't care since the rel
- 	 * will be uninteresting after a crash anyway.)
- 	 *
- 	 * It's obvious that we must do this when not WAL-logging the copy. It's
- 	 * less obvious that we have to do it even if we did WAL-log the copied
- 	 * pages. The reason is that since we're copying outside shared buffers, a
- 	 * CHECKPOINT occurring during the copy has no way to flush the previously
- 	 * written data to disk (indeed it won't know the new rel even exists).  A
- 	 * crash later on would replay WAL from the checkpoint, therefore it
- 	 * wouldn't replay our earlier WAL entries. If we do not fsync those pages
- 	 * here, they might still not be on disk when the crash occurs.
- 	 */
- 	if (!istemp)
- 		smgrimmedsync(dst, forkNum);
- }
- 
- /*
   * ALTER TABLE ENABLE/DISABLE TRIGGER
   *
   * We just pass this off to trigger.c.
--- 7032,7037 ----
*** a/src/include/access/heapam.h
--- b/src/include/access/heapam.h
***************
*** 119,124 **** extern void simple_heap_update(Relation relation, ItemPointer otid,
--- 119,126 ----
  extern void heap_markpos(HeapScanDesc scan);
  extern void heap_restrpos(HeapScanDesc scan);
  
+ extern void heap_copy_relfilenode(SMgrRelation src, SMgrRelation dst,
+ 				   ForkNumber forkNum, bool istemp);
  extern void heap_sync(Relation relation);
  
  extern void heap_redo(XLogRecPtr lsn, XLogRecord *rptr);
*** a/src/include/access/htup.h
--- b/src/include/access/htup.h
***************
*** 18,23 ****
--- 18,24 ----
  #include "access/tupmacs.h"
  #include "storage/itemptr.h"
  #include "storage/relfilenode.h"
+ #include "storage/smgr.h"
  
  /*
   * MaxTupleAttributeNumber limits the number of (user) columns in a tuple.
***************
*** 583,588 **** typedef HeapTupleData *HeapTuple;
--- 584,590 ----
  #define XLOG_HEAP2_CLEAN		0x10
  /* 0x20 is free, was XLOG_HEAP2_CLEAN_MOVE */
  #define XLOG_HEAP2_CLEANUP_INFO 0x30
+ #define XLOG_HEAP2_COPYFILE		0x40
  
  /*
   * All what we need to find changed tuple
***************
*** 727,732 **** typedef struct xl_heap_freeze
--- 729,744 ----
  
  #define SizeOfHeapFreeze (offsetof(xl_heap_freeze, cutoff_xid) + sizeof(TransactionId))
  
+ /* This is for logging the command to copy a whole relfilenode fork */
+ typedef struct xl_heap_copyrelfile
+ {
+ 	RelFileNode node;		/* node of source */
+ 	RelFileNode dstnode;	/* node of destination */
+ 	ForkNumber forkNum;
+ } xl_heap_copyrelfile;
+ 
+ #define SizeOfHeapCopyRelFile	(sizeof(xl_heap_copyrelfile))
+ 
  extern void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple,
  									   TransactionId *latestRemovedXid);
  
*** a/src/include/access/xlog.h
--- b/src/include/access/xlog.h
***************
*** 142,147 **** extern PGDLLIMPORT TimeLineID ThisTimeLineID;	/* current TLI */
--- 142,149 ----
   * see XLogCtl notes in xlog.c
   */
  extern bool InRecovery;
+ /* Is this more than just a crash recovery? */
+ extern bool InArchiveRecovery;
  
  /*
   * Like InRecovery, standbyState is only valid in the startup process.
