From 2e5d2c47288d27a620af09214c82fd66f61fb605 Mon Sep 17 00:00:00 2001
From: Greg Stark <stark@mit.edu>
Date: Thu, 31 Mar 2022 15:48:38 -0400
Subject: [PATCH v6 1/3] Add warnings when old temporary tables are found to
 still be in use during autovacuum. Long lived sessions using temporary tables
 are required to vacuum them themselves.

For the warning to be useful modify checkTempNamespaceStatus to
return the backend pid using it so that we can inform super-user
which pid to terminate. Otherwise it's quite tricky to determine
as a user. Rename the function to avoid an incompatible ABI break.
---
 src/backend/access/transam/varsup.c | 12 ++++---
 src/backend/catalog/namespace.c     |  9 +++--
 src/backend/postmaster/autovacuum.c | 52 ++++++++++++++++++++++-------
 src/include/catalog/namespace.h     |  4 +--
 4 files changed, 57 insertions(+), 20 deletions(-)

diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c
index 748120a012..8b29573e9f 100644
--- a/src/backend/access/transam/varsup.c
+++ b/src/backend/access/transam/varsup.c
@@ -129,14 +129,16 @@ GetNewTransactionId(bool isSubXact)
 						 errmsg("database is not accepting commands to avoid wraparound data loss in database \"%s\"",
 								oldest_datname),
 						 errhint("Stop the postmaster and vacuum that database in single-user mode.\n"
-								 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
+								 "You might also need to commit or roll back old prepared transactions,\n"
+								 "drop temporary tables, or drop stale replication slots.")));
 			else
 				ereport(ERROR,
 						(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 						 errmsg("database is not accepting commands to avoid wraparound data loss in database with OID %u",
 								oldest_datoid),
 						 errhint("Stop the postmaster and vacuum that database in single-user mode.\n"
-								 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
+								 "You might also need to commit or roll back old prepared transactions,\n"
+								 "drop temporary tables, or drop stale replication slots.")));
 		}
 		else if (TransactionIdFollowsOrEquals(xid, xidWarnLimit))
 		{
@@ -149,14 +151,16 @@ GetNewTransactionId(bool isSubXact)
 								oldest_datname,
 								xidWrapLimit - xid),
 						 errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n"
-								 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
+								 "You might also need to commit or roll back old prepared transactions,\n"
+								 "drop temporary tables, or drop stale replication slots.")));
 			else
 				ereport(WARNING,
 						(errmsg("database with OID %u must be vacuumed within %u transactions",
 								oldest_datoid,
 								xidWrapLimit - xid),
 						 errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n"
-								 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
+								 "You might also need to commit or roll back old prepared transactions,\n"
+								 "drop temporary tables, or drop stale replication slots.")));
 		}
 
 		/* Re-acquire lock and start over */
diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index fafb9349cc..c1fd3ced95 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -3272,15 +3272,18 @@ isOtherTempNamespace(Oid namespaceId)
 
 /*
  * checkTempNamespaceStatus - is the given namespace owned and actively used
- * by a backend?
+ * by a backend? Optionally return the pid of the owning backend if there is
+ * one. Returned pid is only meaningful when TEMP_NAMESPACE_IN_USE but note
+ * below about race conditions.
  *
  * Note: this can be used while scanning relations in pg_class to detect
  * orphaned temporary tables or namespaces with a backend connected to a
  * given database.  The result may be out of date quickly, so the caller
  * must be careful how to handle this information.
+ *
  */
 TempNamespaceStatus
-checkTempNamespaceStatus(Oid namespaceId)
+checkTempNamespaceStatusAndPid(Oid namespaceId, pid_t *pid)
 {
 	PGPROC	   *proc;
 	int			backendId;
@@ -3307,6 +3310,8 @@ checkTempNamespaceStatus(Oid namespaceId)
 		return TEMP_NAMESPACE_IDLE;
 
 	/* Yup, so namespace is busy */
+	if (pid)
+		*pid = proc->pid;
 	return TEMP_NAMESPACE_IN_USE;
 }
 
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 681ef91b81..19e569a693 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -2085,6 +2085,8 @@ do_autovacuum(void)
 		bool		dovacuum;
 		bool		doanalyze;
 		bool		wraparound;
+		TempNamespaceStatus temp_status;
+		pid_t		temp_pid;
 
 		if (classForm->relkind != RELKIND_RELATION &&
 			classForm->relkind != RELKIND_MATVIEW)
@@ -2092,6 +2094,16 @@ do_autovacuum(void)
 
 		relid = classForm->oid;
 
+		/* Fetch reloptions and the pgstat entry for this table */
+		relopts = extract_autovac_opts(tuple, pg_class_desc);
+		tabentry = get_pgstat_tabentry_relid(relid, classForm->relisshared,
+											 shared, dbentry);
+
+		/* Check if it needs vacuum or analyze */
+		relation_needs_vacanalyze(relid, relopts, classForm, tabentry,
+								  effective_multixact_freeze_max_age,
+								  &dovacuum, &doanalyze, &wraparound);
+
 		/*
 		 * Check if it is a temp table (presumably, of some other backend's).
 		 * We cannot safely process other backends' temp tables.
@@ -2103,7 +2115,8 @@ do_autovacuum(void)
 			 * using the temporary schema.  Also, for safety, ignore it if the
 			 * namespace doesn't exist or isn't a temp namespace after all.
 			 */
-			if (checkTempNamespaceStatus(classForm->relnamespace) == TEMP_NAMESPACE_IDLE)
+			temp_status = checkTempNamespaceStatusAndPid(classForm->relnamespace, &temp_pid);
+			if (temp_status == TEMP_NAMESPACE_IDLE)
 			{
 				/*
 				 * The table seems to be orphaned -- although it might be that
@@ -2114,19 +2127,34 @@ do_autovacuum(void)
 				 */
 				orphan_oids = lappend_oid(orphan_oids, relid);
 			}
+			else if (temp_status == TEMP_NAMESPACE_NOT_TEMP)
+			{
+				elog(LOG, "autovacuum: found temporary table \"%s.%s.%s\" in non-temporary namespace",
+					 get_database_name(MyDatabaseId),
+					 get_namespace_name(classForm->relnamespace),
+					 NameStr(classForm->relname));
+			}
+			else if (temp_status == TEMP_NAMESPACE_IN_USE && wraparound)
+			{
+				/* The table is not orphaned -- however it seems to be in need
+				 * of a wraparound vacuum which we cannot do. Sessions using
+				 * long-lived temporary tables need to be responsible for
+				 * vacuuming them and failing to do so is endangering the
+				 * whole cluster.
+				 */
+				ereport(LOG,
+						(errmsg("autovacuum: cannot vacuum temporary table \"%s.%s.%s\" in danger of causing transaction wraparound",
+								get_database_name(MyDatabaseId),
+								get_namespace_name(classForm->relnamespace),
+								NameStr(classForm->relname)),
+						 errhint("Long-lived clients must vacuum temporary tables themselves periodically.\n"
+								 "As super-user drop this table or terminate this session with pg_terminate_backend(%lu).",
+								 (unsigned long)temp_pid)
+							));
+			}
 			continue;
 		}
 
-		/* Fetch reloptions and the pgstat entry for this table */
-		relopts = extract_autovac_opts(tuple, pg_class_desc);
-		tabentry = get_pgstat_tabentry_relid(relid, classForm->relisshared,
-											 shared, dbentry);
-
-		/* Check if it needs vacuum or analyze */
-		relation_needs_vacanalyze(relid, relopts, classForm, tabentry,
-								  effective_multixact_freeze_max_age,
-								  &dovacuum, &doanalyze, &wraparound);
-
 		/* Relations that need work are added to table_oids */
 		if (dovacuum || doanalyze)
 			table_oids = lappend_oid(table_oids, relid);
@@ -2273,7 +2301,7 @@ do_autovacuum(void)
 			continue;
 		}
 
-		if (checkTempNamespaceStatus(classForm->relnamespace) != TEMP_NAMESPACE_IDLE)
+		if (checkTempNamespaceStatusAndPid(classForm->relnamespace, NULL) != TEMP_NAMESPACE_IDLE)
 		{
 			UnlockRelationOid(relid, AccessExclusiveLock);
 			continue;
diff --git a/src/include/catalog/namespace.h b/src/include/catalog/namespace.h
index f963d82797..a861327cd8 100644
--- a/src/include/catalog/namespace.h
+++ b/src/include/catalog/namespace.h
@@ -39,7 +39,7 @@ typedef struct _FuncCandidateList
 }		   *FuncCandidateList;
 
 /*
- * Result of checkTempNamespaceStatus
+ * Result of checkTempNamespaceStatusAndPid
  */
 typedef enum TempNamespaceStatus
 {
@@ -155,7 +155,7 @@ extern bool isTempToastNamespace(Oid namespaceId);
 extern bool isTempOrTempToastNamespace(Oid namespaceId);
 extern bool isAnyTempNamespace(Oid namespaceId);
 extern bool isOtherTempNamespace(Oid namespaceId);
-extern TempNamespaceStatus checkTempNamespaceStatus(Oid namespaceId);
+extern TempNamespaceStatus checkTempNamespaceStatusAndPid(Oid namespaceId, pid_t *pid);
 extern int	GetTempNamespaceBackendId(Oid namespaceId);
 extern Oid	GetTempToastNamespace(void);
 extern void GetTempNamespaceState(Oid *tempNamespaceId,
-- 
2.35.1

