From 564b3472099ab9e61d8c108d5c8d1b3d6f5de423 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Mon, 19 Jun 2023 16:28:02 +1200
Subject: [PATCH v3 1/3] Fix race in SSI interaction with empty btrees.

When predicate-locking btrees, we have a special case for completely
empty btrees, since there is no page to lock.  This was racy, because,
without buffer lock held, a matching key could be inserted between the
_bt_search() and the PredicateLockRelation() calls.

Fix, by rechecking _bt_search() after taking the relation-level SIREAD
lock, if using SERIALIZABLE isolation and an empty btree is discovered.

Back-patch to all supported releases.

Reported-by: Artem Anisimov <artem.anisimov.255@gmail.com>
Reviewed-by: Dmitry Dolgov <9erthalion6@gmail.com>
Discussion: https://postgr.es/m/17949-a0f17035294a55e2%40postgresql.org
---
 src/backend/access/nbtree/nbtsearch.c | 39 +++++++++++++++++----------
 1 file changed, 25 insertions(+), 14 deletions(-)

diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index 7e05e58676..2477b7aefb 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -1381,23 +1381,34 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 	if (!BufferIsValid(buf))
 	{
 		/*
-		 * We only get here if the index is completely empty. Lock relation
-		 * because nothing finer to lock exists.
+		 * Since we have no pages locked, it's possible for another
+		 * transaction to insert data between _bt_search() and
+		 * PredicateLockRelation().  We have to try again after taking a
+		 * relation-level predicate lock, to close a narrow window where we
+		 * wouldn't scan concurrently inserted tuples, but the writer wouldn't
+		 * see our predicate lock.
 		 */
-		PredicateLockRelation(rel, scan->xs_snapshot);
-
-		/*
-		 * mark parallel scan as done, so that all the workers can finish
-		 * their scan
-		 */
-		_bt_parallel_done(scan);
-		BTScanPosInvalidate(so->currPos);
+		if (IsolationIsSerializable())
+		{
+			PredicateLockRelation(rel, scan->xs_snapshot);
+			stack = _bt_search(rel, NULL, &inskey, &buf, BT_READ,
+							   scan->xs_snapshot);
+			_bt_freestack(stack);
+		}
 
-		return false;
+		if (!BufferIsValid(buf))
+		{
+			/*
+			 * Mark parallel scan as done, so that all the workers can finish
+			 * their scan.
+			 */
+			_bt_parallel_done(scan);
+			BTScanPosInvalidate(so->currPos);
+			return false;
+		}
 	}
-	else
-		PredicateLockPage(rel, BufferGetBlockNumber(buf),
-						  scan->xs_snapshot);
+
+	PredicateLockPage(rel, BufferGetBlockNumber(buf), scan->xs_snapshot);
 
 	_bt_initialize_more_data(so, dir);
 
-- 
2.40.1

