diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
index 4df69c2..f58ac3e 100644
*** a/src/backend/postmaster/bgwriter.c
--- b/src/backend/postmaster/bgwriter.c
*************** static bool am_bg_writer = false;
*** 168,173 ****
--- 168,175 ----
  
  static bool ckpt_active = false;
  
+ static int checkpoint_flags = 0;
+ 
  /* these values are valid when ckpt_active is true: */
  static pg_time_t ckpt_start_time;
  static XLogRecPtr ckpt_start_recptr;
*************** static pg_time_t last_xlog_switch_time;
*** 180,186 ****
  
  static void CheckArchiveTimeout(void);
  static void BgWriterNap(void);
! static bool IsCheckpointOnSchedule(double progress);
  static bool ImmediateCheckpointRequested(void);
  static bool CompactBgwriterRequestQueue(void);
  
--- 182,188 ----
  
  static void CheckArchiveTimeout(void);
  static void BgWriterNap(void);
! static bool IsCheckpointOnSchedule(double progress,double target);
  static bool ImmediateCheckpointRequested(void);
  static bool CompactBgwriterRequestQueue(void);
  
*************** CheckpointWriteDelay(int flags, double p
*** 691,696 ****
--- 693,701 ----
  	if (!am_bg_writer)
  		return;
  
+ 	/* Cache this value for a later spread sync */
+ 	checkpoint_flags=flags;
+ 
  	/*
  	 * Perform the usual bgwriter duties and take a nap, unless we're behind
  	 * schedule, in which case we just try to catch up as quickly as possible.
*************** CheckpointWriteDelay(int flags, double p
*** 698,704 ****
  	if (!(flags & CHECKPOINT_IMMEDIATE) &&
  		!shutdown_requested &&
  		!ImmediateCheckpointRequested() &&
! 		IsCheckpointOnSchedule(progress))
  	{
  		if (got_SIGHUP)
  		{
--- 703,709 ----
  	if (!(flags & CHECKPOINT_IMMEDIATE) &&
  		!shutdown_requested &&
  		!ImmediateCheckpointRequested() &&
! 		IsCheckpointOnSchedule(progress,CheckPointCompletionTarget))
  	{
  		if (got_SIGHUP)
  		{
*************** CheckpointWriteDelay(int flags, double p
*** 726,731 ****
--- 731,799 ----
  }
  
  /*
+  * CheckpointSyncDelay -- yield control to bgwriter during a checkpoint
+  *
+  * This function is called after each file sync performed by mdsync().
+  * It is responsible for keeping the bgwriter's normal activities in
+  * progress during a long checkpoint.
+  */
+ void
+ CheckpointSyncDelay(int finished,int goal)
+ {
+ 	int flags = checkpoint_flags;
+ 	int nap_count = 0;
+ 	double progress;
+ 	double CheckPointSyncTarget = 0.8;
+  
+ 	/* Do nothing if checkpoint is being executed by non-bgwriter process */
+ 	if (!am_bg_writer)
+ 		return;
+ 
+ 	/*
+ 	 * Limit progress to the goal, which
+ 	 * may be possible if the segments to sync were calculated wrong.
+ 	 */
+ 	ckpt_cached_elapsed = 0;
+ 	if (finished > goal) finished=goal;
+ 
+ 	/* 
+ 	 * Base our progress on the assumption that the write took
+ 	 * checkpoint_completion_target worth of time, and that sync
+ 	 * progress is advancing from beyond that point.
+ 	 */
+ 	progress = CheckPointCompletionTarget + 
+ 		(1.0 - CheckPointCompletionTarget) * finished / goal;
+ 
+ 	/*
+ 	 * Perform the usual bgwriter duties and nap until we've just
+ 	 * crossed our deadline.
+ 	 */
+ 	elog(DEBUG2,"checkpoint sync: considering a nap after progress=%.1f",progress);
+ 	while (!(flags & CHECKPOINT_IMMEDIATE) &&
+ 			!shutdown_requested &&
+ 			!ImmediateCheckpointRequested() &&
+ 			(IsCheckpointOnSchedule(progress,CheckPointSyncTarget)))
+ 	{
+ 		if (got_SIGHUP)
+ 		{
+ 			got_SIGHUP = false;
+ 			ProcessConfigFile(PGC_SIGHUP);
+ 		}
+ 
+ 		elog(DEBUG2,"checkpoint sync: nap count=%d",nap_count);
+ 		nap_count++;
+ 
+ 		AbsorbFsyncRequests();
+ 
+ 		BgBufferSync();
+ 		CheckArchiveTimeout();
+ 		BgWriterNap();
+ 	}
+ 	if (nap_count > 0)
+ 		elog(DEBUG1,"checkpoint sync: naps=%d",nap_count);
+ }
+ 
+ /*
   * IsCheckpointOnSchedule -- are we on schedule to finish this checkpoint
   *		 in time?
   *
*************** CheckpointWriteDelay(int flags, double p
*** 734,740 ****
   * than the elapsed time/segments.
   */
  static bool
! IsCheckpointOnSchedule(double progress)
  {
  	XLogRecPtr	recptr;
  	struct timeval now;
--- 802,808 ----
   * than the elapsed time/segments.
   */
  static bool
! IsCheckpointOnSchedule(double progress,double target)
  {
  	XLogRecPtr	recptr;
  	struct timeval now;
*************** IsCheckpointOnSchedule(double progress)
*** 743,750 ****
  
  	Assert(ckpt_active);
  
! 	/* Scale progress according to checkpoint_completion_target. */
! 	progress *= CheckPointCompletionTarget;
  
  	/*
  	 * Check against the cached value first. Only do the more expensive
--- 811,820 ----
  
  	Assert(ckpt_active);
  
! 	/* Scale progress according to given target. */
! 	progress *= target;
! 
! 	elog(DEBUG2,"checkpoint schedule check: scaled progress=%.1f target=%.1f",progress,target);
  
  	/*
  	 * Check against the cached value first. Only do the more expensive
*************** IsCheckpointOnSchedule(double progress)
*** 773,778 ****
--- 843,850 ----
  			 ((double) recptr.xrecoff - (double) ckpt_start_recptr.xrecoff) / XLogSegSize) /
  			CheckPointSegments;
  
+ 		elog(DEBUG2,"checkpoint schedule: elapsed xlogs=%.1f",elapsed_xlogs);
+ 
  		if (progress < elapsed_xlogs)
  		{
  			ckpt_cached_elapsed = elapsed_xlogs;
*************** IsCheckpointOnSchedule(double progress)
*** 787,792 ****
--- 859,866 ----
  	elapsed_time = ((double) ((pg_time_t) now.tv_sec - ckpt_start_time) +
  					now.tv_usec / 1000000.0) / CheckPointTimeout;
  
+ 	elog(DEBUG2,"checkpoint schedule: elapsed time=%.1f",elapsed_time);
+ 
  	if (progress < elapsed_time)
  	{
  		ckpt_cached_elapsed = elapsed_time;
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 9d585b6..f294f6f 100644
*** a/src/backend/storage/smgr/md.c
--- b/src/backend/storage/smgr/md.c
***************
*** 31,39 ****
  #include "pg_trace.h"
  
  
- /* interval for calling AbsorbFsyncRequests in mdsync */
- #define FSYNCS_PER_ABSORB		10
- 
  /*
   * Special values for the segno arg to RememberFsyncRequest.
   *
--- 31,36 ----
*************** mdsync(void)
*** 932,938 ****
  
  	HASH_SEQ_STATUS hstat;
  	PendingOperationEntry *entry;
- 	int			absorb_counter;
  
  	/* Statistics on sync times */
  	int			processed = 0;
--- 929,934 ----
*************** mdsync(void)
*** 943,948 ****
--- 939,948 ----
  	uint64		longest = 0;
  	uint64		total_elapsed = 0;
  
+ 	/* Sync spreading counters */
+ 	int			sync_segments = 0;
+ 	int			current_segment = 0;
+ 	
  	/*
  	 * This is only called during checkpoints, and checkpoints should only
  	 * occur in processes that have created a pendingOpsTable.
*************** mdsync(void)
*** 1001,1008 ****
  	/* Set flag to detect failure if we don't reach the end of the loop */
  	mdsync_in_progress = true;
  
  	/* Now scan the hashtable for fsync requests to process */
- 	absorb_counter = FSYNCS_PER_ABSORB;
  	hash_seq_init(&hstat, pendingOpsTable);
  	while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL)
  	{
--- 1001,1033 ----
  	/* Set flag to detect failure if we don't reach the end of the loop */
  	mdsync_in_progress = true;
  
+ 	/* For spread sync timing purposes, make a scan through the
+ 	 * hashtable to count its entries.  Using hash_get_num_entries
+ 	 * instead would require a stronger lock than we want to have at
+ 	 * this point, and we don't want to count requests destined for
+ 	 * next cycle anyway
+ 	 * 
+ 	 * XXX Should we skip this if there is no sync spreading, or if
+ 	 *     fsync is off?
+ 	 */
+ 	hash_seq_init(&hstat, pendingOpsTable);
+ 	while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL)
+ 	{
+ 		if (entry->cycle_ctr == mdsync_cycle_ctr)
+ 			continue;
+ 		sync_segments++;
+ 	}
+ 
+ 	/* 
+ 	 * In the unexpected situation where the above estimate says there
+ 	 * is nothing to sync, avoid division by zero errors in the
+ 	 * progress computation below.
+ 	 */
+ 	if (sync_segments == 0)
+ 		sync_segments = 1;
+ 	elog(DEBUG1, "checkpoint sync:  estimated segments=%d",sync_segments);
+ 
  	/* Now scan the hashtable for fsync requests to process */
  	hash_seq_init(&hstat, pendingOpsTable);
  	while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL)
  	{
*************** mdsync(void)
*** 1027,1043 ****
  			int			failures;
  
  			/*
! 			 * If in bgwriter, we want to absorb pending requests every so
! 			 * often to prevent overflow of the fsync request queue.  It is
! 			 * unspecified whether newly-added entries will be visited by
! 			 * hash_seq_search, but we don't care since we don't need to
! 			 * process them anyway.
  			 */
! 			if (--absorb_counter <= 0)
! 			{
! 				AbsorbFsyncRequests();
! 				absorb_counter = FSYNCS_PER_ABSORB;
! 			}
  
  			/*
  			 * The fsync table could contain requests to fsync segments that
--- 1052,1060 ----
  			int			failures;
  
  			/*
! 			 * If in bgwriter, perform normal duties.
  			 */
! 			CheckpointSyncDelay(current_segment,sync_segments);
  
  			/*
  			 * The fsync table could contain requests to fsync segments that
*************** mdsync(void)
*** 1131,1140 ****
  				pfree(path);
  
  				/*
! 				 * Absorb incoming requests and check to see if canceled.
  				 */
! 				AbsorbFsyncRequests();
! 				absorb_counter = FSYNCS_PER_ABSORB;		/* might as well... */
  
  				if (entry->canceled)
  					break;
--- 1148,1156 ----
  				pfree(path);
  
  				/*
! 				 * If in bgwriter, perform normal duties.
  				 */
! 				CheckpointSyncDelay(current_segment,sync_segments);
  
  				if (entry->canceled)
  					break;
*************** mdsync(void)
*** 1149,1154 ****
--- 1165,1172 ----
  		if (hash_search(pendingOpsTable, &entry->tag,
  						HASH_REMOVE, NULL) == NULL)
  			elog(ERROR, "pendingOpsTable corrupted");
+ 
+ 		current_segment++;
  	}							/* end loop over hashtable entries */
  
  	/* Return sync performance metrics for report at checkpoint end */
diff --git a/src/include/postmaster/bgwriter.h b/src/include/postmaster/bgwriter.h
index eaf2206..5da0aa2 100644
*** a/src/include/postmaster/bgwriter.h
--- b/src/include/postmaster/bgwriter.h
*************** extern void BackgroundWriterMain(void);
*** 26,31 ****
--- 26,32 ----
  
  extern void RequestCheckpoint(int flags);
  extern void CheckpointWriteDelay(int flags, double progress);
+ extern void CheckpointSyncDelay(int finished,int goal);
  
  extern bool ForwardFsyncRequest(RelFileNodeBackend rnode, ForkNumber forknum,
  					BlockNumber segno);
