diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 5663711..e8f8781 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -209,11 +209,12 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
 		CommitTransactionCommand();
 	}
 
-	/* Turn vacuum cost accounting on or off */
+	/* Adjust vacuum cost accounting state and update VacuumActive flag */
 	PG_TRY();
 	{
 		ListCell   *cur;
 
+		VacuumActive = true;
 		VacuumCostActive = (VacuumCostDelay > 0);
 		VacuumCostBalance = 0;
 
@@ -254,8 +255,9 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
 	}
 	PG_CATCH();
 	{
-		/* Make sure cost accounting is turned off after error */
+		/* Make sure vacuum state variables are fixed up on error */
 		VacuumCostActive = false;
+		VacuumActive = false;
 		PG_RE_THROW();
 	}
 	PG_END_TRY();
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 1f89e52..1146dee 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -81,6 +81,10 @@ static bool IsForInput;
 /* local state for LockBufferForCleanup */
 static volatile BufferDesc *PinCountWaitBuf = NULL;
 
+/* local state for BufferAlloc */
+static int hint_bit_write_allowance;
+static int buffer_allocation_count;
+
 
 static Buffer ReadBuffer_common(SMgrRelation reln, char relpersistence,
 				  ForkNumber forkNum, BlockNumber blockNum,
@@ -578,6 +582,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 	for (;;)
 	{
 		bool		lock_held;
+		bool		write_buffer = false;
 
 		/*
 		 * Select a victim buffer.	The buffer is returned with its header
@@ -600,13 +605,41 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 			LWLockRelease(BufFreelistLock);
 
 		/*
+		 * If the buffer is dirty, we definitely need to write it out.  If it's
+		 * not dirty, but there are hint bits set, then we can decide whether
+		 * or not we want to take the time to write it.  We allow up to 5% of
+		 * otherwise-not-dirty pages to be written due to hint bit changes,
+		 * because getting hint bits on disk improves performance of future
+		 * scans, but we avoid writing too many such pages in a row to prevent
+		 * a dramatic performance drop-off when scanning a table with many
+		 * hint bits unset.
+		 *
+		 * It's important to make sure that a large sequential scan which
+		 * forces out hint bit changes doesn't create a lot of random I/O.
+		 * To avoid that, we periodically grant ourselves a chunk of hint bit
+		 * writes, use them up as quickly as needed, and then eventually grant
+		 * ourselves another chunk.
+		 */
+		if ((buffer_allocation_count++ % 2000) == 0)
+			hint_bit_write_allowance = 100;
+		if ((oldFlags & BM_DIRTY) != 0)
+			write_buffer = true;
+		else if ((oldFlags & BM_HINT_BITS) != 0
+				&& hint_bit_write_allowance > 0)
+		{
+			write_buffer = true;
+			--hint_bit_write_allowance;
+		}
+
+		/*
 		 * If the buffer was dirty, try to write it out.  There is a race
 		 * condition here, in that someone might dirty it after we released it
 		 * above, or even while we are writing it out (since our share-lock
-		 * won't prevent hint-bit updates).  We will recheck the dirty bit
+		 * won't prevent hint bit updates, and vacuum marks pages dirty even
+		 * when only hint bit changes are made).  We will recheck the dirty bit
 		 * after re-locking the buffer header.
 		 */
-		if (oldFlags & BM_DIRTY)
+		if (write_buffer)
 		{
 			/*
 			 * We need a share-lock on the buffer contents to write it out
@@ -801,7 +834,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 	 * 1 so that the buffer can survive one clock-sweep pass.)
 	 */
 	buf->tag = newTag;
-	buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_CHECKPOINT_NEEDED | BM_IO_ERROR | BM_PERMANENT);
+	buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_HINT_BITS | BM_CHECKPOINT_NEEDED | BM_IO_ERROR | BM_PERMANENT);
 	if (relpersistence == RELPERSISTENCE_PERMANENT)
 		buf->flags |= BM_TAG_VALID | BM_PERMANENT;
 	else
@@ -1612,7 +1645,16 @@ SyncOneBuffer(int buf_id, bool skip_recently_used)
 		return result;
 	}
 
-	if (!(bufHdr->flags & BM_VALID) || !(bufHdr->flags & BM_DIRTY))
+	/*
+	 * We can get here either because we're checkpointing, or from the
+	 * background writer cleaning scan.  In the latter case, we want to
+	 * write out buffers with hint bit changes so that such changes eventually
+	 * make their way to disk.  In the former case we normally won't get called
+	 * if only hint bit changes have occurred, but if it somehow happens we'll
+	 * just write the page anyway.
+	 */
+	if (!(bufHdr->flags & BM_VALID)
+		|| !(bufHdr->flags & (BM_DIRTY | BM_HINT_BITS)))
 	{
 		/* It's clean, so nothing to do */
 		UnlockBufHdr(bufHdr);
@@ -2065,7 +2107,10 @@ PrintPinnedBufs(void)
  *
  *		This function writes all dirty pages of a relation out to disk
  *		(or more accurately, out to kernel disk buffers), ensuring that the
- *		kernel has an up-to-date view of the relation.
+ *		kernel has an up-to-date view of the relation.  We flush pages with
+ *		only hint bit changes, too: this probably isn't critical, but since
+ *		this only happens during relatively heavyweight operations, it
+ *		shouldn't cost much.
  *
  *		Generally, the caller should be holding AccessExclusiveLock on the
  *		target relation to ensure that no other backend is busy dirtying
@@ -2094,7 +2139,8 @@ FlushRelationBuffers(Relation rel)
 		{
 			bufHdr = &LocalBufferDescriptors[i];
 			if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
-				(bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
+				(bufHdr->flags & BM_VALID) &&
+				(bufHdr->flags & (BM_DIRTY | BM_HINT_BITS)))
 			{
 				ErrorContextCallback errcontext;
 
@@ -2128,7 +2174,8 @@ FlushRelationBuffers(Relation rel)
 		bufHdr = &BufferDescriptors[i];
 		LockBufHdr(bufHdr);
 		if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
-			(bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
+			(bufHdr->flags & BM_VALID) &&
+			(bufHdr->flags & (BM_DIRTY | BM_HINT_BITS)))
 		{
 			PinBuffer_Locked(bufHdr);
 			LWLockAcquire(bufHdr->content_lock, LW_SHARED);
@@ -2146,7 +2193,10 @@ FlushRelationBuffers(Relation rel)
  *
  *		This function writes all dirty pages of a database out to disk
  *		(or more accurately, out to kernel disk buffers), ensuring that the
- *		kernel has an up-to-date view of the database.
+ *		kernel has an up-to-date view of the relation.  We flush pages with
+ *		only hint bit changes, too: this probably isn't critical, but since
+ *		this only happens during relatively heavyweight operations, it
+ *		shouldn't cost much.
  *
  *		Generally, the caller should be holding an appropriate lock to ensure
  *		no other backend is active in the target database; otherwise more
@@ -2170,7 +2220,8 @@ FlushDatabaseBuffers(Oid dbid)
 		bufHdr = &BufferDescriptors[i];
 		LockBufHdr(bufHdr);
 		if (bufHdr->tag.rnode.dbNode == dbid &&
-			(bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
+			(bufHdr->flags & BM_VALID) &&
+			(bufHdr->flags & (BM_DIRTY | BM_HINT_BITS)))
 		{
 			PinBuffer_Locked(bufHdr);
 			LWLockAcquire(bufHdr->content_lock, LW_SHARED);
@@ -2248,15 +2299,25 @@ IncrBufferRefCount(Buffer buffer)
 /*
  * SetBufferCommitInfoNeedsSave
  *
- *	Mark a buffer dirty when we have updated tuple commit-status bits in it.
- *
- * This is essentially the same as MarkBufferDirty, except that the caller
- * might have only share-lock instead of exclusive-lock on the buffer's
- * content lock.  We preserve the distinction mainly as a way of documenting
- * that the caller has not made a critical data change --- the status-bit
- * update could be redone by someone else just as easily.  Therefore, no WAL
- * log record need be generated, whereas calls to MarkBufferDirty really ought
- * to be associated with a WAL-entry-creating action.
+ * Mark a buffer as containing hint-bit changes.  For local buffers, this
+ * is no different from MarkBufferDirty, except that the caller might have
+ * only share-lock instead of exclusive-lock on the buffer content.  But for
+ * shared buffers, we set BM_HINT_BITS rather than BM_DIRTY.  The background
+ * writer's cleaning scan will still write such buffers to speed up future
+ * access to the tuples stored in those blocks, but checkpoints and writes
+ * forced by buffer allocations will skip such writes, since they are
+ * non-critical.  This allows some hint bit changes to make their way to disk
+ * without (hopefully) consuming too much I/O bandwidth.
+ *
+ * As a special exception, when VACUUM is active, we mark hint-bit updated
+ * buffers as dirty, so that VACUUM can be used to get all the hint bits set
+ * without fear of losing some of those updates before they get flushed out
+ * to disk.
+ *
+ * Note that a status-bit update could be redone by someone else just as
+ * easily.  Therefore, no WAL log record need be generated, whereas calls to
+ * MarkBufferDirty really ought to be associated with a WAL-entry-creating
+ * action.
  */
 void
 SetBufferCommitInfoNeedsSave(Buffer buffer)
@@ -2272,6 +2333,12 @@ SetBufferCommitInfoNeedsSave(Buffer buffer)
 		return;
 	}
 
+	if (VacuumActive)
+	{
+		MarkBufferDirty(buffer);
+		return;
+	}
+
 	bufHdr = &BufferDescriptors[buffer - 1];
 
 	Assert(PrivateRefCount[buffer - 1] > 0);
@@ -2281,20 +2348,20 @@ SetBufferCommitInfoNeedsSave(Buffer buffer)
 	/*
 	 * This routine might get called many times on the same page, if we are
 	 * making the first scan after commit of an xact that added/deleted many
-	 * tuples.	So, be as quick as we can if the buffer is already dirty.  We
-	 * do this by not acquiring spinlock if it looks like the status bits are
-	 * already OK.	(Note it is okay if someone else clears BM_JUST_DIRTIED
-	 * immediately after we look, because the buffer content update is already
-	 * done and will be reflected in the I/O.)
+	 * tuples.	So, be as quick as we can if the buffer is already marked.  We
+	 * do this by not acquiring spinlock if it looks like BM_HINT_BITS is
+	 * already set.  (If memory-ordering effects cause us to see a stale value,
+	 * the worst thing that can happen is we fail to set BM_HINT_BITS.  But
+	 * that won't result in data corruption, since hint bits are not critical.)
 	 */
-	if ((bufHdr->flags & (BM_DIRTY | BM_JUST_DIRTIED)) !=
+	if ((bufHdr->flags & (BM_HINT_BITS | BM_JUST_DIRTIED)) !=
 		(BM_DIRTY | BM_JUST_DIRTIED))
 	{
 		LockBufHdr(bufHdr);
 		Assert(bufHdr->refcount > 0);
-		if (!(bufHdr->flags & BM_DIRTY) && VacuumCostActive)
+		if (!(bufHdr->flags & (BM_HINT_BITS | BM_DIRTY)) && VacuumCostActive)
 			VacuumCostBalance += VacuumCostPageDirty;
-		bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
+		bufHdr->flags |= BM_HINT_BITS | BM_JUST_DIRTIED;
 		UnlockBufHdr(bufHdr);
 	}
 }
@@ -2583,8 +2650,8 @@ WaitIO(volatile BufferDesc *buf)
  * io_in_progress lock until he's done.
  *
  * Input operations are only attempted on buffers that are not BM_VALID,
- * and output operations only on buffers that are BM_VALID and BM_DIRTY,
- * so we can always tell if the work is already done.
+ * and output operations only on buffers that are BM_VALID and either BM_DIRTY
+ * or BM_HINT_BITS, so we can always tell if the work is already done.
  *
  * Returns TRUE if we successfully marked the buffer as I/O busy,
  * FALSE if someone else already did the work.
@@ -2620,7 +2687,8 @@ StartBufferIO(volatile BufferDesc *buf, bool forInput)
 
 	/* Once we get here, there is definitely no I/O active on this buffer */
 
-	if (forInput ? (buf->flags & BM_VALID) : !(buf->flags & BM_DIRTY))
+	if (forInput ? (buf->flags & BM_VALID) :
+		!(buf->flags & (BM_DIRTY | BM_HINT_BITS)))
 	{
 		/* someone else already did the I/O */
 		UnlockBufHdr(buf);
@@ -2666,7 +2734,7 @@ TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty,
 	Assert(buf->flags & BM_IO_IN_PROGRESS);
 	buf->flags &= ~(BM_IO_IN_PROGRESS | BM_IO_ERROR);
 	if (clear_dirty && !(buf->flags & BM_JUST_DIRTIED))
-		buf->flags &= ~(BM_DIRTY | BM_CHECKPOINT_NEEDED);
+		buf->flags &= ~(BM_DIRTY | BM_HINT_BITS | BM_CHECKPOINT_NEEDED);
 	buf->flags |= set_flag_bits;
 
 	UnlockBufHdr(buf);
@@ -2704,7 +2772,7 @@ AbortBufferIO(void)
 		Assert(buf->flags & BM_IO_IN_PROGRESS);
 		if (IsForInput)
 		{
-			Assert(!(buf->flags & BM_DIRTY));
+			Assert(!(buf->flags & (BM_DIRTY | BM_HINT_BITS)));
 			/* We'd better not think buffer is valid yet */
 			Assert(!(buf->flags & BM_VALID));
 			UnlockBufHdr(buf);
@@ -2714,7 +2782,7 @@ AbortBufferIO(void)
 			BufFlags	sv_flags;
 
 			sv_flags = buf->flags;
-			Assert(sv_flags & BM_DIRTY);
+			Assert(sv_flags & (BM_DIRTY | BM_HINT_BITS));
 			UnlockBufHdr(buf);
 			/* Issue notice if this is not the first failure... */
 			if (sv_flags & BM_IO_ERROR)
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index 0a01ed5..bf9f506 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -248,7 +248,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 	 * it's all ours now.
 	 */
 	bufHdr->tag = newTag;
-	bufHdr->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);
+	bufHdr->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_HINT_BITS | BM_IO_ERROR);
 	bufHdr->flags |= BM_TAG_VALID;
 	bufHdr->usage_count = 1;
 
diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
index 984ffd0..26b08ed 100644
--- a/src/backend/utils/init/globals.c
+++ b/src/backend/utils/init/globals.c
@@ -116,6 +116,7 @@ int			VacuumCostDelay = 0;
 
 int			VacuumCostBalance = 0;		/* working state for vacuum */
 bool		VacuumCostActive = false;
+bool		VacuumActive = false;
 
 int			GinFuzzySearchLimit = 0;
 
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index aa8cce5..4d7fbe5 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -231,6 +231,7 @@ extern int	VacuumCostDelay;
 
 extern int	VacuumCostBalance;
 extern bool VacuumCostActive;
+extern bool	VacuumActive;
 
 
 /* in tcop/postgres.c */
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 0652bdf..bff02d8 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -38,6 +38,7 @@
 #define BM_PIN_COUNT_WAITER		(1 << 6)		/* have waiter for sole pin */
 #define BM_CHECKPOINT_NEEDED	(1 << 7)		/* must write for checkpoint */
 #define BM_PERMANENT			(1 << 8)		/* permanent relation (not unlogged) */
+#define BM_HINT_BITS			(1 << 9)		/* hint bits changed */
 
 typedef bits16 BufFlags;