From aa3890a7364ee8f67f04a59c7a8a9cb65086b084 Mon Sep 17 00:00:00 2001
From: Jeff Davis <jdavis@postgresql.org>
Date: Thu, 10 Nov 2022 14:46:30 -0800
Subject: [PATCH v1] Fix theoretical torn page hazard.

The original report was concerned with a possible inconsistency
between the heap and the visibility map, which I was unable to
confirm.

However, there does seem to be a torn page hazard when checksums are
enabled. It may be impossible in practice, because it would require a
page tear between the checksum and the flags, so I am considering this
a theoretical risk.

Also fix related comments, which are misleading.

Reported-by: Konstantin Knizhnik
Discussion: https://postgr.es/m/fed17dac-8cb8-4f5b-d462-1bb4908c029e@garret.ru
Backpatch-through: 11
---
 src/backend/access/heap/heapam.c        | 28 ++++++-------------------
 src/backend/access/heap/visibilitymap.c | 13 ++++++++++--
 2 files changed, 17 insertions(+), 24 deletions(-)

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 12be87efed..5c8cdfb9b2 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -8823,21 +8823,17 @@ heap_xlog_visible(XLogReaderState *record)
 		/*
 		 * We don't bump the LSN of the heap page when setting the visibility
 		 * map bit (unless checksums or wal_hint_bits is enabled, in which
-		 * case we must), because that would generate an unworkable volume of
-		 * full-page writes.  This exposes us to torn page hazards, but since
+		 * case we must). This exposes us to torn page hazards, but since
 		 * we're not inspecting the existing page contents in any way, we
 		 * don't care.
-		 *
-		 * However, all operations that clear the visibility map bit *do* bump
-		 * the LSN, and those operations will only be replayed if the XLOG LSN
-		 * follows the page LSN.  Thus, if the page LSN has advanced past our
-		 * XLOG record's LSN, we mustn't mark the page all-visible, because
-		 * the subsequent update won't be replayed to clear the flag.
 		 */
 		page = BufferGetPage(buffer);
 
 		PageSetAllVisible(page);
 
+		if (XLogHintBitIsNeeded())
+			PageSetLSN(page, lsn);
+
 		MarkBufferDirty(buffer);
 	}
 	else if (action == BLK_RESTORED)
@@ -8901,20 +8897,8 @@ heap_xlog_visible(XLogReaderState *record)
 		reln = CreateFakeRelcacheEntry(rlocator);
 		visibilitymap_pin(reln, blkno, &vmbuffer);
 
-		/*
-		 * Don't set the bit if replay has already passed this point.
-		 *
-		 * It might be safe to do this unconditionally; if replay has passed
-		 * this point, we'll replay at least as far this time as we did
-		 * before, and if this bit needs to be cleared, the record responsible
-		 * for doing so should be again replayed, and clear it.  For right
-		 * now, out of an abundance of conservatism, we use the same test here
-		 * we did for the heap page.  If this results in a dropped bit, no
-		 * real harm is done; and the next VACUUM will fix it.
-		 */
-		if (lsn > PageGetLSN(vmpage))
-			visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer,
-							  xlrec->cutoff_xid, xlrec->flags);
+		visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer,
+						  xlrec->cutoff_xid, xlrec->flags);
 
 		ReleaseBuffer(vmbuffer);
 		FreeFakeRelcacheEntry(reln);
diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
index d62761728b..e029ae9ae2 100644
--- a/src/backend/access/heap/visibilitymap.c
+++ b/src/backend/access/heap/visibilitymap.c
@@ -287,8 +287,17 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
 										  cutoff_xid, flags);
 
 				/*
-				 * If data checksums are enabled (or wal_log_hints=on), we
-				 * need to protect the heap page from being torn.
+				 * If XLogHintBitIsNeeded(), we protect against torn pages
+				 * like usual: the call to log_heap_visible() has inserted a
+				 * WAL record which includes an image of the heap page if
+				 * necessary, and we update the heap page's LSN below.
+				 *
+				 * If !XLogHintBitIsNeeded(), a torn page is inconsequential
+				 * if the only change was to a hint bit. The above call to
+				 * log_heap_visible() has specified REGBUF_NO_IMAGE for the
+				 * heap buffer, so we must not update the heap page's LSN
+				 * because that would signal to the next modification that a
+				 * full page image has already been taken.
 				 */
 				if (XLogHintBitIsNeeded())
 				{
-- 
2.34.1

