diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 8079226..14149a9 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -1210,10 +1210,15 @@ static void
 BufferSync(int flags)
 {
 	int			buf_id;
-	int			num_to_scan;
+	int			buf_id_start;
 	int			num_to_write;
 	int			num_written;
 	int			mask = BM_DIRTY;
+	bool		target_chosen = false;
+	RelFileNode	target_rnode = {0, 0, 0};
+	ForkNumber	target_forknum = 0;
+	int			target_segno = 0;
+	int			target_bufs = 0;
 
 	/* Make sure we can handle the pin inside SyncOneBuffer */
 	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
@@ -1275,10 +1280,10 @@ BufferSync(int flags)
 	 * Note that we don't read the buffer alloc count here --- that should be
 	 * left untouched till the next BgBufferSync() call.
 	 */
-	buf_id = StrategySyncStart(NULL, NULL);
-	num_to_scan = NBuffers;
+	buf_id_start = 0;
 	num_written = 0;
-	while (num_to_scan-- > 0)
+	target_chosen = false;
+	for (buf_id = 0; buf_id < NBuffers; buf_id++)
 	{
 		volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id];
 
@@ -1294,7 +1299,12 @@ BufferSync(int flags)
 		 * write the buffer though we didn't need to.  It doesn't seem worth
 		 * guarding against this, though.
 		 */
-		if (bufHdr->flags & BM_CHECKPOINT_NEEDED)
+		/* the above reasoning applies to the target checks too */
+		if ((!target_chosen ||
+			 (RelFileNodeEquals(target_rnode, bufHdr->tag.rnode) &&
+			  target_forknum == bufHdr->tag.forkNum &&
+			  target_segno == bufHdr->tag.blockNum / RELSEG_SIZE)) &&
+			bufHdr->flags & BM_CHECKPOINT_NEEDED)
 		{
 			if (SyncOneBuffer(buf_id, false) & BUF_WRITTEN)
 			{
@@ -1320,11 +1330,34 @@ BufferSync(int flags)
 				 * Sleep to throttle our I/O rate.
 				 */
 				CheckpointWriteDelay(flags, (double) num_written / num_to_write);
+
+				/* Find other buffers belonging to this segment */
+				if (!target_chosen)
+				{
+					LockBufHdr(bufHdr);
+					target_rnode = bufHdr->tag.rnode;
+					target_forknum = bufHdr->tag.forkNum;
+					target_segno = bufHdr->tag.blockNum / RELSEG_SIZE;
+					UnlockBufHdr(bufHdr);
+					target_bufs = 0;
+					buf_id_start = buf_id;
+					target_chosen = true;
+				}
+				target_bufs++;
 			}
 		}
 
-		if (++buf_id >= NBuffers)
-			buf_id = 0;
+		if (buf_id == NBuffers - 1 && target_chosen)
+		{
+			if (log_checkpoints)
+				elog(LOG, "checkpoint sync: wrote %d buffers for target, syncing",
+					 target_bufs);
+
+			smgrsyncrel(target_rnode, target_forknum, target_segno);
+			target_chosen = false;
+			/* continue the scan where we left before we chose the target */
+			buf_id = buf_id_start;
+		}
 	}
 
 	/*
@@ -1825,10 +1858,11 @@ CheckPointBuffers(int flags)
 {
 	TRACE_POSTGRESQL_BUFFER_CHECKPOINT_START(flags);
 	CheckpointStats.ckpt_write_t = GetCurrentTimestamp();
+	smgrsyncbegin();
 	BufferSync(flags);
 	CheckpointStats.ckpt_sync_t = GetCurrentTimestamp();
 	TRACE_POSTGRESQL_BUFFER_CHECKPOINT_SYNC_START();
-	smgrsync();
+	smgrsyncend();
 	CheckpointStats.ckpt_sync_end_t = GetCurrentTimestamp();
 	TRACE_POSTGRESQL_BUFFER_CHECKPOINT_DONE();
 }
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index e629181..4e4eb03 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -187,6 +187,7 @@ static MdfdVec *_mdfd_getseg(SMgrRelation reln, ForkNumber forkno,
 			 BlockNumber blkno, bool skipFsync, ExtensionBehavior behavior);
 static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum,
 		   MdfdVec *seg);
+static void mdsyncguts(PendingOperationEntry *entry, ForkNumber forknum, int segno);
 
 
 /*
@@ -235,7 +236,8 @@ SetForwardFsyncRequests(void)
 	/* Perform any pending fsyncs we may have queued up, then drop table */
 	if (pendingOpsTable)
 	{
-		mdsync();
+		mdsyncbegin();
+		mdsyncend();
 		hash_destroy(pendingOpsTable);
 	}
 	pendingOpsTable = NULL;
@@ -970,27 +972,18 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum)
 	}
 }
 
+static bool mdsync_in_progress = false;
+/* Statistics on sync times */
+static 	int			processed = 0;
+static	uint64		longest = 0;
+static	uint64		total_elapsed = 0;
+
 /*
  *	mdsync() -- Sync previous writes to stable storage.
  */
 void
-mdsync(void)
+mdsyncbegin(void)
 {
-	static bool mdsync_in_progress = false;
-
-	HASH_SEQ_STATUS hstat;
-	PendingOperationEntry *entry;
-	int			absorb_counter;
-
-	/* Statistics on sync times */
-	int			processed = 0;
-	instr_time	sync_start,
-				sync_end,
-				sync_diff;
-	uint64		elapsed;
-	uint64		longest = 0;
-	uint64		total_elapsed = 0;
-
 	/*
 	 * This is only called during checkpoints, and checkpoints should only
 	 * occur in processes that have created a pendingOpsTable.
@@ -1036,6 +1029,8 @@ mdsync(void)
 	if (mdsync_in_progress)
 	{
 		/* prior try failed, so update any stale cycle_ctr values */
+		HASH_SEQ_STATUS hstat;
+		PendingOperationEntry *entry;
 		hash_seq_init(&hstat, pendingOpsTable);
 		while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL)
 		{
@@ -1048,6 +1043,151 @@ mdsync(void)
 
 	/* Set flag to detect failure if we don't reach the end of the loop */
 	mdsync_in_progress = true;
+}
+
+void
+mdsyncrel(RelFileNode rnode, ForkNumber forknum, int segno)
+{
+	PendingOperationEntry *entry;
+
+	/*
+	 * If fsync is off then we don't have to bother opening the
+	 * file at all.  (We delay checking until this point so that
+	 * changing fsync on the fly behaves sensibly.)
+	 */
+	if (!enableFsync)
+		return;
+
+	entry = hash_search(pendingOpsTable, &rnode, HASH_FIND, NULL);
+
+	if (entry)
+	{
+		if (bms_is_member(segno, entry->requests[forknum]))
+		{
+			mdsyncguts(entry, forknum, segno);
+			bms_del_member(entry->requests[forknum], segno);
+		}
+	}
+}
+
+static void
+mdsyncguts(PendingOperationEntry *entry, ForkNumber forknum, int segno)
+{
+	int			failures;
+	instr_time	sync_start,
+				sync_end,
+				sync_diff;
+	uint64		elapsed;
+
+	/*
+	 * The fsync table could contain requests to fsync segments
+	 * that have been deleted (unlinked) by the time we get to
+	 * them. Rather than just hoping an ENOENT (or EACCES on
+	 * Windows) error can be ignored, what we do on error is
+	 * absorb pending requests and then retry.	Since mdunlink()
+	 * queues a "cancel" message before actually unlinking, the
+	 * fsync request is guaranteed to be marked canceled after the
+	 * absorb if it really was this case. DROP DATABASE likewise
+	 * has to tell us to forget fsync requests before it starts
+	 * deletions.
+	 */
+	for (failures = 0;; failures++) /* loop exits at "break" */
+	{
+		SMgrRelation reln;
+		MdfdVec    *seg;
+		char	   *path;
+		int			save_errno;
+
+		/*
+		 * Find or create an smgr hash entry for this relation.
+		 * This may seem a bit unclean -- md calling smgr?	But
+		 * it's really the best solution.  It ensures that the
+		 * open file reference isn't permanently leaked if we get
+		 * an error here. (You may say "but an unreferenced
+		 * SMgrRelation is still a leak!" Not really, because the
+		 * only case in which a checkpoint is done by a process
+		 * that isn't about to shut down is in the checkpointer,
+		 * and it will periodically do smgrcloseall(). This fact
+		 * justifies our not closing the reln in the success path
+		 * either, which is a good thing since in non-checkpointer
+		 * cases we couldn't safely do that.)
+		 */
+		reln = smgropen(entry->rnode, InvalidBackendId);
+
+		/* Attempt to open and fsync the target segment */
+		seg = _mdfd_getseg(reln, forknum,
+						   (BlockNumber) segno * (BlockNumber) RELSEG_SIZE,
+						   false, EXTENSION_RETURN_NULL);
+
+		INSTR_TIME_SET_CURRENT(sync_start);
+
+		if (seg != NULL && FileSync(seg->mdfd_vfd) >= 0)
+		{
+			/* Success; update statistics about sync timing */
+			INSTR_TIME_SET_CURRENT(sync_end);
+			sync_diff = sync_end;
+			INSTR_TIME_SUBTRACT(sync_diff, sync_start);
+			elapsed = INSTR_TIME_GET_MICROSEC(sync_diff);
+			if (elapsed > longest)
+				longest = elapsed;
+			total_elapsed += elapsed;
+			processed++;
+			if (log_checkpoints)
+				elog(LOG, "checkpoint sync: number=%d file=%s time=%.3f msec",
+					 processed,
+					 FilePathName(seg->mdfd_vfd),
+					 (double) elapsed / 1000);
+
+			break;	/* out of retry loop */
+		}
+
+		/* Compute file name for use in message */
+		save_errno = errno;
+		path = _mdfd_segpath(reln, forknum, (BlockNumber) segno);
+		errno = save_errno;
+
+		/*
+		 * It is possible that the relation has been dropped or
+		 * truncated since the fsync request was entered.
+		 * Therefore, allow ENOENT, but only if we didn't fail
+		 * already on this file.  This applies both for
+		 * _mdfd_getseg() and for FileSync, since fd.c might have
+		 * closed the file behind our back.
+		 *
+		 * XXX is there any point in allowing more than one retry?
+		 * Don't see one at the moment, but easy to change the
+		 * test here if so.
+		 */
+		if (!FILE_POSSIBLY_DELETED(errno) || failures > 0)
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not fsync file \"%s\": %m",
+							path)));
+		else
+			ereport(DEBUG1,
+					(errcode_for_file_access(),
+					 errmsg("could not fsync file \"%s\" but retrying: %m",
+							path)));
+		pfree(path);
+
+		/*
+		 * Absorb incoming requests and check to see if a cancel
+		 * arrived for this relation fork.
+		 */
+		AbsorbFsyncRequests();
+		//absorb_counter = FSYNCS_PER_ABSORB; /* might as well... */
+
+		if (entry->canceled[forknum])
+			break;
+	}				/* end retry loop */
+}
+
+void
+mdsyncend(void)
+{
+	int			absorb_counter;
+	HASH_SEQ_STATUS hstat;
+	PendingOperationEntry *entry;
 
 	/* Now scan the hashtable for fsync requests to process */
 	absorb_counter = FSYNCS_PER_ABSORB;
@@ -1088,8 +1228,6 @@ mdsync(void)
 
 			while ((segno = bms_first_member(requests)) >= 0)
 			{
-				int			failures;
-
 				/*
 				 * If fsync is off then we don't have to bother opening the
 				 * file at all.  (We delay checking until this point so that
@@ -1111,109 +1249,7 @@ mdsync(void)
 					absorb_counter = FSYNCS_PER_ABSORB;
 				}
 
-				/*
-				 * The fsync table could contain requests to fsync segments
-				 * that have been deleted (unlinked) by the time we get to
-				 * them. Rather than just hoping an ENOENT (or EACCES on
-				 * Windows) error can be ignored, what we do on error is
-				 * absorb pending requests and then retry.	Since mdunlink()
-				 * queues a "cancel" message before actually unlinking, the
-				 * fsync request is guaranteed to be marked canceled after the
-				 * absorb if it really was this case. DROP DATABASE likewise
-				 * has to tell us to forget fsync requests before it starts
-				 * deletions.
-				 */
-				for (failures = 0;; failures++) /* loop exits at "break" */
-				{
-					SMgrRelation reln;
-					MdfdVec    *seg;
-					char	   *path;
-					int			save_errno;
-
-					/*
-					 * Find or create an smgr hash entry for this relation.
-					 * This may seem a bit unclean -- md calling smgr?	But
-					 * it's really the best solution.  It ensures that the
-					 * open file reference isn't permanently leaked if we get
-					 * an error here. (You may say "but an unreferenced
-					 * SMgrRelation is still a leak!" Not really, because the
-					 * only case in which a checkpoint is done by a process
-					 * that isn't about to shut down is in the checkpointer,
-					 * and it will periodically do smgrcloseall(). This fact
-					 * justifies our not closing the reln in the success path
-					 * either, which is a good thing since in non-checkpointer
-					 * cases we couldn't safely do that.)
-					 */
-					reln = smgropen(entry->rnode, InvalidBackendId);
-
-					/* Attempt to open and fsync the target segment */
-					seg = _mdfd_getseg(reln, forknum,
-							 (BlockNumber) segno * (BlockNumber) RELSEG_SIZE,
-									   false, EXTENSION_RETURN_NULL);
-
-					INSTR_TIME_SET_CURRENT(sync_start);
-
-					if (seg != NULL &&
-						FileSync(seg->mdfd_vfd) >= 0)
-					{
-						/* Success; update statistics about sync timing */
-						INSTR_TIME_SET_CURRENT(sync_end);
-						sync_diff = sync_end;
-						INSTR_TIME_SUBTRACT(sync_diff, sync_start);
-						elapsed = INSTR_TIME_GET_MICROSEC(sync_diff);
-						if (elapsed > longest)
-							longest = elapsed;
-						total_elapsed += elapsed;
-						processed++;
-						if (log_checkpoints)
-							elog(DEBUG1, "checkpoint sync: number=%d file=%s time=%.3f msec",
-								 processed,
-								 FilePathName(seg->mdfd_vfd),
-								 (double) elapsed / 1000);
-
-						break;	/* out of retry loop */
-					}
-
-					/* Compute file name for use in message */
-					save_errno = errno;
-					path = _mdfd_segpath(reln, forknum, (BlockNumber) segno);
-					errno = save_errno;
-
-					/*
-					 * It is possible that the relation has been dropped or
-					 * truncated since the fsync request was entered.
-					 * Therefore, allow ENOENT, but only if we didn't fail
-					 * already on this file.  This applies both for
-					 * _mdfd_getseg() and for FileSync, since fd.c might have
-					 * closed the file behind our back.
-					 *
-					 * XXX is there any point in allowing more than one retry?
-					 * Don't see one at the moment, but easy to change the
-					 * test here if so.
-					 */
-					if (!FILE_POSSIBLY_DELETED(errno) ||
-						failures > 0)
-						ereport(ERROR,
-								(errcode_for_file_access(),
-								 errmsg("could not fsync file \"%s\": %m",
-										path)));
-					else
-						ereport(DEBUG1,
-								(errcode_for_file_access(),
-						errmsg("could not fsync file \"%s\" but retrying: %m",
-							   path)));
-					pfree(path);
-
-					/*
-					 * Absorb incoming requests and check to see if a cancel
-					 * arrived for this relation fork.
-					 */
-					AbsorbFsyncRequests();
-					absorb_counter = FSYNCS_PER_ABSORB; /* might as well... */
-
-					if (entry->canceled[forknum])
-						break;
-				}				/* end retry loop */
+				mdsyncguts(entry, forknum, segno);
 			}
 			bms_free(requests);
 		}
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index f7f1437..00b000d 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -58,7 +58,9 @@ typedef struct f_smgr
 											  BlockNumber nblocks);
 	void		(*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum);
 	void		(*smgr_pre_ckpt) (void);		/* may be NULL */
-	void		(*smgr_sync) (void);	/* may be NULL */
+	void		(*smgr_sync_begin) (void);	/* may be NULL */
+	void		(*smgr_sync_rel) (RelFileNode rnode, ForkNumber forknum, int segno);	/* may be NULL */
+	void		(*smgr_sync_end) (void);	/* may be NULL */
 	void		(*smgr_post_ckpt) (void);		/* may be NULL */
 } f_smgr;
 
@@ -67,7 +69,7 @@ static const f_smgr smgrsw[] = {
 	/* magnetic disk */
 	{mdinit, NULL, mdclose, mdcreate, mdexists, mdunlink, mdextend,
 		mdprefetch, mdread, mdwrite, mdnblocks, mdtruncate, mdimmedsync,
-		mdpreckpt, mdsync, mdpostckpt
+	 mdpreckpt, mdsyncbegin, mdsyncrel, mdsyncend, mdpostckpt
 	}
 };
 
@@ -705,17 +707,47 @@ smgrpreckpt(void)
 }
 
 /*
- *	smgrsync() -- Sync files to disk during checkpoint.
+ *	smgrsyncbegin() -- Sync files to disk during checkpoint.
  */
 void
-smgrsync(void)
+smgrsyncbegin(void)
 {
 	int			i;
 
 	for (i = 0; i < NSmgr; i++)
 	{
-		if (smgrsw[i].smgr_sync)
-			(*(smgrsw[i].smgr_sync)) ();
+		if (smgrsw[i].smgr_sync_begin)
+			(*(smgrsw[i].smgr_sync_begin)) ();
+	}
+}
+
+/*
+ *	smgrsyncrel() -- Sync files to disk during checkpoint.
+ */
+void
+smgrsyncrel(RelFileNode rnode, ForkNumber forknum, int segno)
+{
+	int			i;
+
+	for (i = 0; i < NSmgr; i++)
+	{
+		if (smgrsw[i].smgr_sync_rel)
+			(*(smgrsw[i].smgr_sync_rel)) (rnode, forknum, segno);
+	}
+}
+
+/*
+ *	smgrsyncend() -- Sync files to disk during checkpoint.
+ */
+void
+smgrsyncend(void)
+{
+	int			i;
+
+	for (i = 0; i < NSmgr; i++)
+	{
+		if (smgrsw[i].smgr_sync_end)
+			(*(smgrsw[i].smgr_sync_end)) ();
 	}
 }
 
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index 98b6f13..4725221 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -100,7 +100,9 @@ extern void smgrtruncate(SMgrRelation reln, ForkNumber forknum,
 			 BlockNumber nblocks);
 extern void smgrimmedsync(SMgrRelation reln, ForkNumber forknum);
 extern void smgrpreckpt(void);
-extern void smgrsync(void);
+extern void smgrsyncbegin(void);
+extern void smgrsyncrel(RelFileNode rnode, ForkNumber forknum, int segno);
+extern void smgrsyncend(void);
 extern void smgrpostckpt(void);
 extern void AtEOXact_SMgr(void);
 
@@ -126,7 +128,9 @@ extern void mdtruncate(SMgrRelation reln, ForkNumber forknum,
 		   BlockNumber nblocks);
 extern void mdimmedsync(SMgrRelation reln, ForkNumber forknum);
 extern void mdpreckpt(void);
-extern void mdsync(void);
+extern void mdsyncbegin(void);
+extern void mdsyncrel(RelFileNode rnode, ForkNumber forknum, int segno);
+extern void mdsyncend(void);
 extern void mdpostckpt(void);
 
 extern void SetForwardFsyncRequests(void);