From f0c2d489dd8f2268803f4d2cc8642e29cb0d3576 Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Thu, 28 Sep 2023 11:19:06 -0700
Subject: [PATCH v4] Assert that buffers are marked dirty before
 XLogRegisterBuffer().

Enforce the rule from transam/README in XLogRegisterBuffer(), and
update callers to follow the rule.

Hash indexes sometimes register clean pages as a part of the locking
protocol, so provide a REGBUF_NO_CHANGE flag to support that use.

Discussion: https://postgr.es/m/c84114f8-c7f1-5b57-f85a-3adc31e1a904@iki.fi
---
 src/backend/access/gin/ginbtree.c       | 14 +++---
 src/backend/access/gin/gindatapage.c    |  6 +++
 src/backend/access/gin/ginentrypage.c   |  3 ++
 src/backend/access/gin/ginfast.c        |  5 ++-
 src/backend/access/hash/hash.c          | 11 +++--
 src/backend/access/hash/hashovfl.c      | 21 ++++++---
 src/backend/access/transam/xloginsert.c | 16 ++++++-
 src/backend/storage/buffer/bufmgr.c     | 59 +++++++++++++++++++++++++
 src/include/access/xloginsert.h         |  1 +
 src/include/storage/bufmgr.h            |  2 +
 10 files changed, 118 insertions(+), 20 deletions(-)

diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c
index 06e97a7fbb..fc694b40f1 100644
--- a/src/backend/access/gin/ginbtree.c
+++ b/src/backend/access/gin/ginbtree.c
@@ -387,24 +387,22 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
 		START_CRIT_SECTION();
 
 		if (RelationNeedsWAL(btree->index) && !btree->isBuild)
-		{
 			XLogBeginInsert();
-			XLogRegisterBuffer(0, stack->buffer, REGBUF_STANDARD);
-			if (BufferIsValid(childbuf))
-				XLogRegisterBuffer(1, childbuf, REGBUF_STANDARD);
-		}
 
-		/* Perform the page update, and register any extra WAL data */
+		/*
+		 * Perform the page update, dirty and register stack->buffer, and
+		 * register any extra WAL data.
+		 */
 		btree->execPlaceToPage(btree, stack->buffer, stack,
 							   insertdata, updateblkno, ptp_workspace);
 
-		MarkBufferDirty(stack->buffer);
-
 		/* An insert to an internal page finishes the split of the child. */
 		if (BufferIsValid(childbuf))
 		{
 			GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
 			MarkBufferDirty(childbuf);
+			if (RelationNeedsWAL(btree->index) && !btree->isBuild)
+				XLogRegisterBuffer(1, childbuf, REGBUF_STANDARD);
 		}
 
 		if (RelationNeedsWAL(btree->index) && !btree->isBuild)
diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c
index 344e1c5e6b..2494273085 100644
--- a/src/backend/access/gin/gindatapage.c
+++ b/src/backend/access/gin/gindatapage.c
@@ -721,9 +721,12 @@ dataExecPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
 	/* Apply changes to page */
 	dataPlaceToPageLeafRecompress(buf, leaf);
 
+	MarkBufferDirty(buf);
+
 	/* If needed, register WAL data built by computeLeafRecompressWALData */
 	if (RelationNeedsWAL(btree->index) && !btree->isBuild)
 	{
+		XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
 		XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen);
 	}
 }
@@ -1155,6 +1158,8 @@ dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
 	pitem = (PostingItem *) insertdata;
 	GinDataPageAddPostingItem(page, pitem, off);
 
+	MarkBufferDirty(buf);
+
 	if (RelationNeedsWAL(btree->index) && !btree->isBuild)
 	{
 		/*
@@ -1167,6 +1172,7 @@ dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
 		data.offset = off;
 		data.newitem = *pitem;
 
+		XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
 		XLogRegisterBufData(0, (char *) &data,
 							sizeof(ginxlogInsertDataInternal));
 	}
diff --git a/src/backend/access/gin/ginentrypage.c b/src/backend/access/gin/ginentrypage.c
index 5a8c0eb98d..379496735f 100644
--- a/src/backend/access/gin/ginentrypage.c
+++ b/src/backend/access/gin/ginentrypage.c
@@ -571,6 +571,8 @@ entryExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
 		elog(ERROR, "failed to add item to index page in \"%s\"",
 			 RelationGetRelationName(btree->index));
 
+	MarkBufferDirty(buf);
+
 	if (RelationNeedsWAL(btree->index) && !btree->isBuild)
 	{
 		/*
@@ -583,6 +585,7 @@ entryExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
 		data.isDelete = insertData->isDelete;
 		data.offset = off;
 
+		XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
 		XLogRegisterBufData(0, (char *) &data,
 							offsetof(ginxlogInsertEntry, tuple));
 		XLogRegisterBufData(0, (char *) insertData->entry,
diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c
index eb6c554831..c8fe7c78a7 100644
--- a/src/backend/access/gin/ginfast.c
+++ b/src/backend/access/gin/ginfast.c
@@ -397,6 +397,9 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
 		}
 
 		Assert((ptr - collectordata) <= collector->sumsize);
+
+		MarkBufferDirty(buffer);
+
 		if (needWal)
 		{
 			XLogRegisterBuffer(1, buffer, REGBUF_STANDARD);
@@ -404,8 +407,6 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
 		}
 
 		metadata->tailFreeSize = PageGetExactFreeSpace(page);
-
-		MarkBufferDirty(buffer);
 	}
 
 	/*
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index fc5d97f606..7a025f33cf 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -824,11 +824,16 @@ hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf,
 				XLogRegisterData((char *) &xlrec, SizeOfHashDelete);
 
 				/*
-				 * bucket buffer needs to be registered to ensure that we can
-				 * acquire a cleanup lock on it during replay.
+				 * bucket buffer was not changed, but still needs to be
+				 * registered to ensure that we can acquire a cleanup lock on
+				 * it during replay.
 				 */
 				if (!xlrec.is_primary_bucket_page)
-					XLogRegisterBuffer(0, bucket_buf, REGBUF_STANDARD | REGBUF_NO_IMAGE);
+				{
+					uint8		flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE;
+
+					XLogRegisterBuffer(0, bucket_buf, flags);
+				}
 
 				XLogRegisterBuffer(1, buf, REGBUF_STANDARD);
 				XLogRegisterBufData(1, (char *) deletable,
diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c
index 39bb2cb9f6..9d1ff20b92 100644
--- a/src/backend/access/hash/hashovfl.c
+++ b/src/backend/access/hash/hashovfl.c
@@ -658,11 +658,15 @@ _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf,
 		XLogRegisterData((char *) &xlrec, SizeOfHashSqueezePage);
 
 		/*
-		 * bucket buffer needs to be registered to ensure that we can acquire
-		 * a cleanup lock on it during replay.
+		 * bucket buffer was not changed, but still needs to be registered to
+		 * ensure that we can acquire a cleanup lock on it during replay.
 		 */
 		if (!xlrec.is_prim_bucket_same_wrt)
-			XLogRegisterBuffer(0, bucketbuf, REGBUF_STANDARD | REGBUF_NO_IMAGE);
+		{
+			uint8		flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE;
+
+			XLogRegisterBuffer(0, bucketbuf, flags);
+		}
 
 		XLogRegisterBuffer(1, wbuf, REGBUF_STANDARD);
 		if (xlrec.ntups > 0)
@@ -960,11 +964,16 @@ readpage:
 						XLogRegisterData((char *) &xlrec, SizeOfHashMovePageContents);
 
 						/*
-						 * bucket buffer needs to be registered to ensure that
-						 * we can acquire a cleanup lock on it during replay.
+						 * bucket buffer was not changed, but still needs to
+						 * be registered to ensure that we can acquire a
+						 * cleanup lock on it during replay.
 						 */
 						if (!xlrec.is_prim_bucket_same_wrt)
-							XLogRegisterBuffer(0, bucket_buf, REGBUF_STANDARD | REGBUF_NO_IMAGE);
+						{
+							int			flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE;
+
+							XLogRegisterBuffer(0, bucket_buf, flags);
+						}
 
 						XLogRegisterBuffer(1, wbuf, REGBUF_STANDARD);
 						XLogRegisterBufData(1, (char *) itup_offsets,
diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c
index 588626424e..e4aaa551a0 100644
--- a/src/backend/access/transam/xloginsert.c
+++ b/src/backend/access/transam/xloginsert.c
@@ -248,6 +248,20 @@ XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
 	Assert(!((flags & REGBUF_FORCE_IMAGE) && (flags & (REGBUF_NO_IMAGE))));
 	Assert(begininsert_called);
 
+	/*
+	 * Ordinarily, buffer should be exclusive-locked and marked dirty before
+	 * we get here, otherwise we could end up violating one of the rules in
+	 * access/transam/README.
+	 *
+	 * Some callers intentionally register a clean page and never update that
+	 * page's LSN; in that case they can pass the flag REGBUF_NO_CHANGE to
+	 * bypass these checks.
+	 */
+#ifdef USE_ASSERT_CHECKING
+	if (!(flags & REGBUF_NO_CHANGE))
+		Assert(BufferIsExclusiveLocked(buffer) && BufferIsDirty(buffer));
+#endif
+
 	if (block_id >= max_registered_block_id)
 	{
 		if (block_id >= max_registered_buffers)
@@ -1313,8 +1327,8 @@ log_newpage_range(Relation rel, ForkNumber forknum,
 		START_CRIT_SECTION();
 		for (i = 0; i < nbufs; i++)
 		{
-			XLogRegisterBuffer(i, bufpack[i], flags);
 			MarkBufferDirty(bufpack[i]);
+			XLogRegisterBuffer(i, bufpack[i], flags);
 		}
 
 		recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI);
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 8b96759b84..21a29f9081 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -2098,6 +2098,65 @@ ExtendBufferedRelShared(BufferManagerRelation bmr,
 	return first_block;
 }
 
+/*
+ * BufferIsExclusiveLocked
+ *
+ *      Checks if buffer is exclusive-locked.
+ *
+ * Buffer must be pinned.
+ */
+bool
+BufferIsExclusiveLocked(Buffer buffer)
+{
+	BufferDesc *bufHdr;
+
+	if (BufferIsLocal(buffer))
+	{
+		int			bufid = -buffer - 1;
+
+		bufHdr = GetLocalBufferDescriptor(bufid);
+	}
+	else
+	{
+		bufHdr = GetBufferDescriptor(buffer - 1);
+	}
+
+	Assert(BufferIsPinned(buffer));
+	return LWLockHeldByMeInMode(BufferDescriptorGetContentLock(bufHdr),
+								LW_EXCLUSIVE);
+}
+
+/*
+ * BufferIsDirty
+ *
+ *		Checks if buffer is already dirty.
+ *
+ * Buffer must be pinned and exclusive-locked.  (Without an exclusive lock,
+ * the result may be stale before it's returned.)
+ */
+bool
+BufferIsDirty(Buffer buffer)
+{
+	BufferDesc *bufHdr;
+
+	if (BufferIsLocal(buffer))
+	{
+		int			bufid = -buffer - 1;
+
+		bufHdr = GetLocalBufferDescriptor(bufid);
+	}
+	else
+	{
+		bufHdr = GetBufferDescriptor(buffer - 1);
+	}
+
+	Assert(BufferIsPinned(buffer));
+	Assert(LWLockHeldByMeInMode(BufferDescriptorGetContentLock(bufHdr),
+								LW_EXCLUSIVE));
+
+	return pg_atomic_read_u32(&bufHdr->state) & BM_DIRTY;
+}
+
 /*
  * MarkBufferDirty
  *
diff --git a/src/include/access/xloginsert.h b/src/include/access/xloginsert.h
index 31785dc578..cace867497 100644
--- a/src/include/access/xloginsert.h
+++ b/src/include/access/xloginsert.h
@@ -37,6 +37,7 @@
 									 * will be skipped) */
 #define REGBUF_KEEP_DATA	0x10	/* include data even if a full-page image
 									 * is taken */
+#define REGBUF_NO_CHANGE	0x20	/* intentionally register clean buffer */
 
 /* prototypes for public functions in xloginsert.c: */
 extern void XLogBeginInsert(void);
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index d89021f918..9241f86861 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -179,6 +179,8 @@ extern Buffer ReadBufferWithoutRelcache(RelFileLocator rlocator,
 										bool permanent);
 extern void ReleaseBuffer(Buffer buffer);
 extern void UnlockReleaseBuffer(Buffer buffer);
+extern bool BufferIsExclusiveLocked(Buffer buffer);
+extern bool BufferIsDirty(Buffer buffer);
 extern void MarkBufferDirty(Buffer buffer);
 extern void IncrBufferRefCount(Buffer buffer);
 extern void CheckBufferIsPinnedOnce(Buffer buffer);
-- 
2.34.1

