From 3e46b7c75ff8b93d6b960ffde3222255cbd52066 Mon Sep 17 00:00:00 2001
From: Antonin Houska <ah@cybertec.at>
Date: Wed, 11 Dec 2024 19:22:41 +0100
Subject: [PATCH 1/8] Adjust signature of cluster_rel() and its subroutines.

So far cluster_rel() received OID of the relation it should process and it
performed opening and locking of the relation itself. Yet copy_table_data()
received the OID as well and also had to open the relation itself. This patch
tries to eliminate the repeated opening and closing.

One particular reason for this change is that the VACUUM FULL / CLUSTER
command with the CONCURRENTLY option will need to release all locks on the
relation (and possibly on the clustering index) at some point. Since it makes
little sense to keep relation reference w/o lock, the cluster_rel() function
also closes its reference to the relation (and its index). Neither the
function nor its subroutines may open extra references because then it'd be a
bit harder to close them all.
---
 src/backend/commands/cluster.c | 130 ++++++++++++++++-----------------
 src/backend/commands/vacuum.c  |   9 +--
 src/include/commands/cluster.h |   2 +-
 3 files changed, 70 insertions(+), 71 deletions(-)

diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index ae0863d9a2..7ec605b0bd 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -69,8 +69,8 @@ typedef struct
 
 
 static void cluster_multiple_rels(List *rtcs, ClusterParams *params);
-static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose);
-static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
+static void rebuild_relation(Relation OldHeap, Relation index, bool verbose);
+static void copy_table_data(Relation NewHeap, Relation OldHeap, Relation OldIndex,
 							bool verbose, bool *pSwapToastByContent,
 							TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
 static List *get_tables_to_cluster(MemoryContext cluster_context);
@@ -191,13 +191,11 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel)
 								stmt->indexname, stmt->relation->relname)));
 		}
 
+		/* For non-partitioned tables, do what we came here to do. */
 		if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
 		{
-			/* close relation, keep lock till commit */
-			table_close(rel, NoLock);
-
-			/* Do the job. */
-			cluster_rel(tableOid, indexOid, &params);
+			cluster_rel(rel, indexOid, &params);
+			/* cluster_rel closes the relation, but keeps lock */
 
 			return;
 		}
@@ -274,6 +272,7 @@ cluster_multiple_rels(List *rtcs, ClusterParams *params)
 	foreach(lc, rtcs)
 	{
 		RelToCluster *rtc = (RelToCluster *) lfirst(lc);
+		Relation	rel;
 
 		/* Start a new transaction for each relation. */
 		StartTransactionCommand();
@@ -281,8 +280,11 @@ cluster_multiple_rels(List *rtcs, ClusterParams *params)
 		/* functions in indexes may want a snapshot set */
 		PushActiveSnapshot(GetTransactionSnapshot());
 
-		/* Do the job. */
-		cluster_rel(rtc->tableOid, rtc->indexOid, params);
+		rel = table_open(rtc->tableOid, AccessExclusiveLock);
+
+		/* Process this table */
+		cluster_rel(rel, rtc->indexOid, params);
+		/* cluster_rel closes the relation, but keeps lock */
 
 		PopActiveSnapshot();
 		CommitTransactionCommand();
@@ -295,8 +297,7 @@ cluster_multiple_rels(List *rtcs, ClusterParams *params)
  * This clusters the table by creating a new, clustered table and
  * swapping the relfilenumbers of the new table and the old table, so
  * the OID of the original table is preserved.  Thus we do not lose
- * GRANT, inheritance nor references to this table (this was a bug
- * in releases through 7.3).
+ * GRANT, inheritance nor references to this table.
  *
  * Indexes are rebuilt too, via REINDEX. Since we are effectively bulk-loading
  * the new table, it's better to create the indexes afterwards than to fill
@@ -307,14 +308,17 @@ cluster_multiple_rels(List *rtcs, ClusterParams *params)
  * and error messages should refer to the operation as VACUUM not CLUSTER.
  */
 void
-cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params)
+cluster_rel(Relation OldHeap, Oid indexOid, ClusterParams *params)
 {
-	Relation	OldHeap;
+	Oid			tableOid = RelationGetRelid(OldHeap);
 	Oid			save_userid;
 	int			save_sec_context;
 	int			save_nestlevel;
 	bool		verbose = ((params->options & CLUOPT_VERBOSE) != 0);
 	bool		recheck = ((params->options & CLUOPT_RECHECK) != 0);
+	Relation	index = NULL;
+
+	Assert(CheckRelationLockedByMe(OldHeap, AccessExclusiveLock, false));
 
 	/* Check for user-requested abort. */
 	CHECK_FOR_INTERRUPTS();
@@ -327,21 +331,6 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params)
 		pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND,
 									 PROGRESS_CLUSTER_COMMAND_VACUUM_FULL);
 
-	/*
-	 * We grab exclusive access to the target rel and index for the duration
-	 * of the transaction.  (This is redundant for the single-transaction
-	 * case, since cluster() already did it.)  The index lock is taken inside
-	 * check_index_is_clusterable.
-	 */
-	OldHeap = try_relation_open(tableOid, AccessExclusiveLock);
-
-	/* If the table has gone away, we can skip processing it */
-	if (!OldHeap)
-	{
-		pgstat_progress_end_command();
-		return;
-	}
-
 	/*
 	 * Switch to the table owner's userid, so that any index functions are run
 	 * as that user.  Also lock down security-restricted operations and
@@ -444,7 +433,11 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params)
 
 	/* Check heap and index are valid to cluster on */
 	if (OidIsValid(indexOid))
+	{
 		check_index_is_clusterable(OldHeap, indexOid, AccessExclusiveLock);
+		/* Open the index (It should already be locked.) */
+		index = index_open(indexOid, NoLock);
+	}
 
 	/*
 	 * Quietly ignore the request if this is a materialized view which has not
@@ -473,9 +466,8 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params)
 	TransferPredicateLocksToHeapRelation(OldHeap);
 
 	/* rebuild_relation does all the dirty work */
-	rebuild_relation(OldHeap, indexOid, verbose);
-
-	/* NB: rebuild_relation does table_close() on OldHeap */
+	rebuild_relation(OldHeap, index, verbose);
+	/* rebuild_relation closes OldHeap, and index if valid */
 
 out:
 	/* Roll back any GUC changes executed by index functions */
@@ -624,44 +616,67 @@ mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
  * rebuild_relation: rebuild an existing relation in index or physical order
  *
  * OldHeap: table to rebuild --- must be opened and exclusive-locked!
- * indexOid: index to cluster by, or InvalidOid to rewrite in physical order.
+ * index: index to cluster by, or NULL to rewrite in physical order. Must be
+ * opened and locked.
  *
- * NB: this routine closes OldHeap at the right time; caller should not.
+ * On exit, the heap (and also the index, if one was passed) are closed, but
+ * still locked with AccessExclusiveLock.
  */
 static void
-rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose)
+rebuild_relation(Relation OldHeap, Relation index, bool verbose)
 {
 	Oid			tableOid = RelationGetRelid(OldHeap);
 	Oid			accessMethod = OldHeap->rd_rel->relam;
 	Oid			tableSpace = OldHeap->rd_rel->reltablespace;
 	Oid			OIDNewHeap;
+	Relation	NewHeap;
 	char		relpersistence;
 	bool		is_system_catalog;
 	bool		swap_toast_by_content;
 	TransactionId frozenXid;
 	MultiXactId cutoffMulti;
 
-	if (OidIsValid(indexOid))
+	if (index)
 		/* Mark the correct index as clustered */
-		mark_index_clustered(OldHeap, indexOid, true);
+		mark_index_clustered(OldHeap, RelationGetRelid(index), true);
 
 	/* Remember info about rel before closing OldHeap */
 	relpersistence = OldHeap->rd_rel->relpersistence;
 	is_system_catalog = IsSystemRelation(OldHeap);
 
-	/* Close relcache entry, but keep lock until transaction commit */
-	table_close(OldHeap, NoLock);
-
-	/* Create the transient table that will receive the re-ordered data */
+	/*
+	 * Create the transient table that will receive the re-ordered data.
+	 *
+	 * NoLock for the old heap because we already have it locked and want to
+	 * keep unlocking straightforward. The new heap (and its TOAST if one
+	 * exists) will be locked in AccessExclusiveMode on return. Since others
+	 * can't see it yet, we do not care.
+	 */
 	OIDNewHeap = make_new_heap(tableOid, tableSpace,
 							   accessMethod,
 							   relpersistence,
-							   AccessExclusiveLock);
+							   NoLock);
+	NewHeap = table_open(OIDNewHeap, NoLock);
+	/* NewHeap already locked by make_new_heap */
+	Assert(CheckRelationLockedByMe(NewHeap, AccessExclusiveLock, false));
 
 	/* Copy the heap data into the new table in the desired order */
-	copy_table_data(OIDNewHeap, tableOid, indexOid, verbose,
+	copy_table_data(NewHeap, OldHeap, index, verbose,
 					&swap_toast_by_content, &frozenXid, &cutoffMulti);
 
+
+	/* Close relcache entries, but keep lock until transaction commit */
+	table_close(OldHeap, NoLock);
+	if (index)
+		index_close(index, NoLock);
+
+	/*
+	 * Close the new relation so it can be dropped as soon as the storage is
+	 * swapped. The relation is not visible to others, so no need to unlock it
+	 * explicitly.
+	 */
+	table_close(NewHeap, NoLock);
+
 	/*
 	 * Swap the physical files of the target and transient tables, then
 	 * rebuild the target's indexes and throw away the transient table.
@@ -810,13 +825,10 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, Oid NewAccessMethod,
  * *pCutoffMulti receives the MultiXactId used as a cutoff point.
  */
 static void
-copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
+copy_table_data(Relation NewHeap, Relation OldHeap, Relation OldIndex, bool verbose,
 				bool *pSwapToastByContent, TransactionId *pFreezeXid,
 				MultiXactId *pCutoffMulti)
 {
-	Relation	NewHeap,
-				OldHeap,
-				OldIndex;
 	Relation	relRelation;
 	HeapTuple	reltup;
 	Form_pg_class relform;
@@ -835,16 +847,6 @@ copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
 
 	pg_rusage_init(&ru0);
 
-	/*
-	 * Open the relations we need.
-	 */
-	NewHeap = table_open(OIDNewHeap, AccessExclusiveLock);
-	OldHeap = table_open(OIDOldHeap, AccessExclusiveLock);
-	if (OidIsValid(OIDOldIndex))
-		OldIndex = index_open(OIDOldIndex, AccessExclusiveLock);
-	else
-		OldIndex = NULL;
-
 	/* Store a copy of the namespace name for logging purposes */
 	nspname = get_namespace_name(RelationGetNamespace(OldHeap));
 
@@ -945,7 +947,8 @@ copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
 	 * provided, else plain seqscan.
 	 */
 	if (OldIndex != NULL && OldIndex->rd_rel->relam == BTREE_AM_OID)
-		use_sort = plan_cluster_use_sort(OIDOldHeap, OIDOldIndex);
+		use_sort = plan_cluster_use_sort(RelationGetRelid(OldHeap),
+										 RelationGetRelid(OldIndex));
 	else
 		use_sort = false;
 
@@ -1000,24 +1003,21 @@ copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
 					   tups_recently_dead,
 					   pg_rusage_show(&ru0))));
 
-	if (OldIndex != NULL)
-		index_close(OldIndex, NoLock);
-	table_close(OldHeap, NoLock);
-	table_close(NewHeap, NoLock);
-
 	/* Update pg_class to reflect the correct values of pages and tuples. */
 	relRelation = table_open(RelationRelationId, RowExclusiveLock);
 
-	reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDNewHeap));
+	reltup = SearchSysCacheCopy1(RELOID,
+								 ObjectIdGetDatum(RelationGetRelid(NewHeap)));
 	if (!HeapTupleIsValid(reltup))
-		elog(ERROR, "cache lookup failed for relation %u", OIDNewHeap);
+		elog(ERROR, "cache lookup failed for relation %u",
+			 RelationGetRelid(NewHeap));
 	relform = (Form_pg_class) GETSTRUCT(reltup);
 
 	relform->relpages = num_pages;
 	relform->reltuples = num_tuples;
 
 	/* Don't update the stats for pg_class.  See swap_relation_files. */
-	if (OIDOldHeap != RelationRelationId)
+	if (RelationGetRelid(OldHeap) != RelationRelationId)
 		CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
 	else
 		CacheInvalidateRelcacheByTuple(reltup);
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index bb639ef51f..a0158b1fcd 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -2218,15 +2218,14 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
 		{
 			ClusterParams cluster_params = {0};
 
-			/* close relation before vacuuming, but hold lock until commit */
-			relation_close(rel, NoLock);
-			rel = NULL;
-
 			if ((params->options & VACOPT_VERBOSE) != 0)
 				cluster_params.options |= CLUOPT_VERBOSE;
 
 			/* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
-			cluster_rel(relid, InvalidOid, &cluster_params);
+			cluster_rel(rel, InvalidOid, &cluster_params);
+			/* cluster_rel closes the relation, but keeps lock */
+
+			rel = NULL;
 		}
 		else
 			table_relation_vacuum(rel, params, bstrategy);
diff --git a/src/include/commands/cluster.h b/src/include/commands/cluster.h
index 4e32380417..2d8e363015 100644
--- a/src/include/commands/cluster.h
+++ b/src/include/commands/cluster.h
@@ -32,7 +32,7 @@ typedef struct ClusterParams
 } ClusterParams;
 
 extern void cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel);
-extern void cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params);
+extern void cluster_rel(Relation OldHeap, Oid indexOid, ClusterParams *params);
 extern void check_index_is_clusterable(Relation OldHeap, Oid indexOid,
 									   LOCKMODE lockmode);
 extern void mark_index_clustered(Relation rel, Oid indexOid, bool is_internal);
-- 
2.45.2

