From 9ed223c36591b9ab777de48e8e3f28e321d3f9d4 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Tue, 24 Dec 2024 00:07:38 +0200
Subject: [PATCH v2 2/2] Fix bug in catcache invalidation

If a new entry is added that belongs to a catche list entry, and cache
invalidation happens while the list entry is being built, the new entry
can be missed.

To fix, change the way we detect concurrent invalidations while a
catcache entry is being built. Keep a stack of entries that are being
built, and apply cache invalidation to those entries in addition to the
real catcache entries.

Discussion: XXX
---
 src/backend/utils/cache/catcache.c | 185 +++++++++++++++++------------
 src/tools/pgindent/typedefs.list   |   1 +
 2 files changed, 112 insertions(+), 74 deletions(-)

diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c
index 4f06b45239c..2332c52c2bd 100644
--- a/src/backend/utils/cache/catcache.c
+++ b/src/backend/utils/cache/catcache.c
@@ -41,6 +41,23 @@
 #include "utils/resowner.h"
 #include "utils/syscache.h"
 
+/*
+ * If a catcache invalidation happens while we are in the middle of creating a
+ * catcache entry (or list), it might apply to the entry we're creating,
+ * making it invalid before it's inserted to the catcache.  To catch such
+ * cases, we have a stack of "create-in-progress" entries. Cache invalidation
+ * marks any matching entries in the stack as dead, in addition to the actual
+ * CatCTup and CatCList entries .
+ */
+typedef struct CatCCreating
+{
+	CatCache   *cache;
+	uint32		hash_value;
+	bool		list;
+	bool		dead;
+	struct CatCCreating *next;
+} CatCCreating;
+static CatCCreating *catcache_creating_stack = NULL;
 
  /* #define CACHEDEBUG */	/* turns DEBUG elogs on */
 
@@ -93,8 +110,7 @@ static void CatCacheRemoveCList(CatCache *cache, CatCList *cl);
 static void RehashCatCache(CatCache *cp);
 static void RehashCatCacheLists(CatCache *cp);
 static void CatalogCacheInitializeCache(CatCache *cache);
-static CatCTup *CatalogCacheCreateEntry(CatCache *cache,
-										HeapTuple ntp, SysScanDesc scandesc,
+static CatCTup *CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp,
 										Datum *arguments,
 										uint32 hashValue, Index hashIndex);
 
@@ -662,6 +678,16 @@ CatCacheInvalidate(CatCache *cache, uint32 hashValue)
 			/* could be multiple matches, so keep looking! */
 		}
 	}
+
+	/* Also invalidate any entries that are being built */
+	for (CatCCreating *e = catcache_creating_stack; e != NULL; e = e->next)
+	{
+		if (e->cache == cache)
+		{
+			if (e->list || e->hash_value == hashValue)
+				e->dead = true;
+		}
+	}
 }
 
 /* ----------------------------------------------------------------
@@ -744,6 +770,13 @@ ResetCatalogCache(CatCache *cache)
 #endif
 		}
 	}
+
+	/* Also invalidate any entries that are being built */
+	for (CatCCreating *e = catcache_creating_stack; e != NULL; e = e->next)
+	{
+		if (e->cache == cache)
+			e->dead = true;
+	}
 }
 
 /*
@@ -1493,7 +1526,7 @@ SearchCatCacheMiss(CatCache *cache,
 
 		while (HeapTupleIsValid(ntp = systable_getnext(scandesc)))
 		{
-			ct = CatalogCacheCreateEntry(cache, ntp, scandesc, NULL,
+			ct = CatalogCacheCreateEntry(cache, ntp, NULL,
 										 hashValue, hashIndex);
 			/* upon failure, we must start the scan over */
 			if (ct == NULL)
@@ -1528,7 +1561,7 @@ SearchCatCacheMiss(CatCache *cache,
 		if (IsBootstrapProcessingMode())
 			return NULL;
 
-		ct = CatalogCacheCreateEntry(cache, NULL, NULL, arguments,
+		ct = CatalogCacheCreateEntry(cache, NULL, arguments,
 									 hashValue, hashIndex);
 
 		/* Creating a negative cache entry shouldn't fail */
@@ -1665,6 +1698,8 @@ SearchCatCacheList(CatCache *cache,
 	HeapTuple	ntp;
 	MemoryContext oldcxt;
 	int			i;
+	volatile CatCCreating creating_list;
+	bool		first_iter = true;
 
 	/*
 	 * one-time startup overhead for each cache
@@ -1779,12 +1814,25 @@ SearchCatCacheList(CatCache *cache,
 	 */
 	ctlist = NIL;
 
+	/*
+	 * Cache invalidation can happen while we're building the list.
+	 * CatalogCacheCreateEntry() handles concurrent invalidation of individual
+	 * tuples, but it's also possible that a new entry is concurrently added
+	 * that we might miss. Push an "in-progress" entry that will receive the
+	 * invalidation, until we have built the final list entry.
+	 */
+	creating_list.cache = cache;
+	creating_list.hash_value = lHashValue;
+	creating_list.list = true;
+	creating_list.dead = false;
+	creating_list.next = catcache_creating_stack;
+	catcache_creating_stack = (CatCCreating *) &creating_list;
+
 	PG_TRY();
 	{
 		ScanKeyData cur_skey[CATCACHE_MAXKEYS];
 		Relation	relation;
 		SysScanDesc scandesc;
-		bool		stale;
 
 		relation = table_open(cache->cc_reloid, AccessShareLock);
 
@@ -1810,9 +1858,10 @@ SearchCatCacheList(CatCache *cache,
 			/* The list will be ordered iff we are doing an index scan */
 			ordered = (scandesc->irel != NULL);
 
-			stale = false;
+			creating_list.dead = false;
 
-			INJECTION_POINT("catcache-list-miss-systable-scan-started");
+			if (first_iter)
+				INJECTION_POINT("catcache-list-miss-systable-scan-started");
 
 			while (HeapTupleIsValid(ntp = systable_getnext(scandesc)))
 			{
@@ -1856,30 +1905,13 @@ SearchCatCacheList(CatCache *cache,
 				if (!found)
 				{
 					/* We didn't find a usable entry, so make a new one */
-					ct = CatalogCacheCreateEntry(cache, ntp, scandesc, NULL,
+					ct = CatalogCacheCreateEntry(cache, ntp, NULL,
 												 hashValue, hashIndex);
+
 					/* upon failure, we must start the scan over */
 					if (ct == NULL)
 					{
-						/*
-						 * Release refcounts on any items we already had.  We
-						 * dare not try to free them if they're now
-						 * unreferenced, since an error while doing that would
-						 * result in the PG_CATCH below doing extra refcount
-						 * decrements.  Besides, we'll likely re-adopt those
-						 * items in the next iteration, so it's not worth
-						 * complicating matters to try to get rid of them.
-						 */
-						foreach(ctlist_item, ctlist)
-						{
-							ct = (CatCTup *) lfirst(ctlist_item);
-							Assert(ct->c_list == NULL);
-							Assert(ct->refcount > 0);
-							ct->refcount--;
-						}
-						/* Reset ctlist in preparation for new try */
-						ctlist = NIL;
-						stale = true;
+						creating_list.dead = true;
 						break;
 					}
 				}
@@ -1891,7 +1923,32 @@ SearchCatCacheList(CatCache *cache,
 			}
 
 			systable_endscan(scandesc);
-		} while (stale);
+
+			/* upon failure, we must start the scan over */
+			if (creating_list.dead)
+			{
+				/*
+				 * Release refcounts on any items we already had.  We dare not
+				 * try to free them if they're now unreferenced, since an
+				 * error while doing that would result in the PG_CATCH below
+				 * doing extra refcount decrements.  Besides, we'll likely
+				 * re-adopt those items in the next iteration, so it's not
+				 * worth complicating matters to try to get rid of them.
+				 */
+				foreach(ctlist_item, ctlist)
+				{
+					ct = (CatCTup *) lfirst(ctlist_item);
+					Assert(ct->c_list == NULL);
+					Assert(ct->refcount > 0);
+					ct->refcount--;
+				}
+				/* Reset ctlist in preparation for new try */
+				ctlist = NIL;
+				first_iter = false;
+				continue;
+			}
+			break;
+		} while (true);
 
 		table_close(relation, AccessShareLock);
 
@@ -1919,6 +1976,7 @@ SearchCatCacheList(CatCache *cache,
 	}
 	PG_CATCH();
 	{
+		catcache_creating_stack = creating_list.next;
 		foreach(ctlist_item, ctlist)
 		{
 			ct = (CatCTup *) lfirst(ctlist_item);
@@ -1937,6 +1995,8 @@ SearchCatCacheList(CatCache *cache,
 		PG_RE_THROW();
 	}
 	PG_END_TRY();
+	catcache_creating_stack = creating_list.next;
+	Assert(!creating_list.dead);
 
 	cl->cl_magic = CL_MAGIC;
 	cl->my_cache = cache;
@@ -2009,23 +2069,6 @@ ReleaseCatCacheListWithOwner(CatCList *list, ResourceOwner resowner)
 }
 
 
-/*
- * equalTuple
- *		Are these tuples memcmp()-equal?
- */
-static bool
-equalTuple(HeapTuple a, HeapTuple b)
-{
-	uint32		alen;
-	uint32		blen;
-
-	alen = a->t_len;
-	blen = b->t_len;
-	return (alen == blen &&
-			memcmp((char *) a->t_data,
-				   (char *) b->t_data, blen) == 0);
-}
-
 /*
  * CatalogCacheCreateEntry
  *		Create a new CatCTup entry, copying the given HeapTuple and other
@@ -2033,17 +2076,16 @@ equalTuple(HeapTuple a, HeapTuple b)
  *
  * To create a normal cache entry, ntp must be the HeapTuple just fetched
  * from scandesc, and "arguments" is not used.  To create a negative cache
- * entry, pass NULL for ntp and scandesc; then "arguments" is the cache
- * keys to use.  In either case, hashValue/hashIndex are the hash values
- * computed from the cache keys.
+ * entry, pass NULL for ntp; then "arguments" is the cache keys to use.
+ * In either case, hashValue/hashIndex are the hash values computed from
+ * the cache keys.
  *
  * Returns NULL if we attempt to detoast the tuple and observe that it
  * became stale.  (This cannot happen for a negative entry.)  Caller must
  * retry the tuple lookup in that case.
  */
 static CatCTup *
-CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, SysScanDesc scandesc,
-						Datum *arguments,
+CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, Datum *arguments,
 						uint32 hashValue, Index hashIndex)
 {
 	CatCTup    *ct;
@@ -2076,38 +2118,33 @@ CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, SysScanDesc scandesc,
 		 */
 		if (HeapTupleHasExternal(ntp))
 		{
-			bool		need_cmp = IsInplaceUpdateOid(cache->cc_reloid);
-			HeapTuple	before = NULL;
-			bool		matches = true;
-
-			if (need_cmp)
-				before = heap_copytuple(ntp);
-			dtp = toast_flatten_tuple(ntp, cache->cc_tupdesc);
+			volatile CatCCreating creating;
 
 			/*
 			 * The tuple could become stale while we are doing toast table
-			 * access (since AcceptInvalidationMessages can run then).
-			 * equalTuple() detects staleness from inplace updates, while
-			 * systable_recheck_tuple() detects staleness from normal updates.
-			 *
-			 * While this equalTuple() follows the usual rule of reading with
-			 * a pin and no buffer lock, it warrants suspicion since an
-			 * inplace update could appear at any moment.  It's safe because
-			 * the inplace update sends an invalidation that can't reorder
-			 * before the inplace heap change.  If the heap change reaches
-			 * this process just after equalTuple() looks, we've not missed
-			 * its inval.
+			 * access (since AcceptInvalidationMessages can run then). The
+			 * invalidation will mark our 'creating' entry as dead.
 			 */
-			if (need_cmp)
+			creating.cache = cache;
+			creating.hash_value = hashValue;
+			creating.list = false;
+			creating.dead = false;
+			creating.next = catcache_creating_stack;
+			catcache_creating_stack = (CatCCreating *) &creating;
+
+			PG_TRY();
 			{
-				matches = equalTuple(before, ntp);
-				heap_freetuple(before);
+				dtp = toast_flatten_tuple(ntp, cache->cc_tupdesc);
 			}
-			if (!matches || !systable_recheck_tuple(scandesc, ntp))
+			PG_FINALLY();
 			{
-				heap_freetuple(dtp);
-				return NULL;
+				Assert(catcache_creating_stack == &creating);
+				catcache_creating_stack = creating.next;
 			}
+			PG_END_TRY();
+
+			if (creating.dead)
+				return NULL;
 		}
 		else
 			dtp = ntp;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index e1c4f913f84..e1d63776581 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -383,6 +383,7 @@ CaseTestExpr
 CaseWhen
 Cash
 CastInfo
+CatCCreating
 CatCList
 CatCTup
 CatCache
-- 
2.39.5

