From 4515075b644d1e38920eb5bdaaa898e1698510a8 Mon Sep 17 00:00:00 2001
From: Greg Stark <stark@mit.edu>
Date: Tue, 22 Mar 2022 15:51:32 -0400
Subject: [PATCH v4 1/2]     Update relfrozenxmin when truncating temp tables

    Make ON COMMIT DELETE ROWS reset relfrozenxmin and other table stats
    like normal truncate. Otherwise even typical short-lived transactions
    using temporary tables can easily cause them to reach relfrozenxid.

    Also add warnings when old temporary tables are found to still be in
    use during autovacuum. Long lived sessions using temporary tables are
    required to vacuum them themselves.

    For the warning to be useful modify checkTempNamespaceStatus to
    return the backend pid using it so that we can inform super-user
    which pid to terminate. Otherwise it's quite tricky to determine
    as a user. Rename the function to avoid an incompatible ABI break.
---
 src/backend/access/transam/varsup.c | 12 ++++---
 src/backend/catalog/heap.c          | 54 +++++++++++++++++++++++++++++
 src/backend/catalog/namespace.c     |  9 +++--
 src/backend/postmaster/autovacuum.c | 48 ++++++++++++++++++-------
 src/include/catalog/namespace.h     |  4 +--
 5 files changed, 107 insertions(+), 20 deletions(-)

diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c
index 748120a012..8b29573e9f 100644
--- a/src/backend/access/transam/varsup.c
+++ b/src/backend/access/transam/varsup.c
@@ -129,14 +129,16 @@ GetNewTransactionId(bool isSubXact)
 						 errmsg("database is not accepting commands to avoid wraparound data loss in database \"%s\"",
 								oldest_datname),
 						 errhint("Stop the postmaster and vacuum that database in single-user mode.\n"
-								 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
+								 "You might also need to commit or roll back old prepared transactions,\n"
+								 "drop temporary tables, or drop stale replication slots.")));
 			else
 				ereport(ERROR,
 						(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 						 errmsg("database is not accepting commands to avoid wraparound data loss in database with OID %u",
 								oldest_datoid),
 						 errhint("Stop the postmaster and vacuum that database in single-user mode.\n"
-								 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
+								 "You might also need to commit or roll back old prepared transactions,\n"
+								 "drop temporary tables, or drop stale replication slots.")));
 		}
 		else if (TransactionIdFollowsOrEquals(xid, xidWarnLimit))
 		{
@@ -149,14 +151,16 @@ GetNewTransactionId(bool isSubXact)
 								oldest_datname,
 								xidWrapLimit - xid),
 						 errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n"
-								 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
+								 "You might also need to commit or roll back old prepared transactions,\n"
+								 "drop temporary tables, or drop stale replication slots.")));
 			else
 				ereport(WARNING,
 						(errmsg("database with OID %u must be vacuumed within %u transactions",
 								oldest_datoid,
 								xidWrapLimit - xid),
 						 errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n"
-								 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
+								 "You might also need to commit or roll back old prepared transactions,\n"
+								 "drop temporary tables, or drop stale replication slots.")));
 		}
 
 		/* Re-acquire lock and start over */
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 696fd5977e..84d5cc117c 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -30,6 +30,7 @@
 #include "postgres.h"
 
 #include "access/genam.h"
+#include "access/heapam.h"
 #include "access/multixact.h"
 #include "access/relation.h"
 #include "access/table.h"
@@ -70,6 +71,7 @@
 #include "utils/fmgroids.h"
 #include "utils/inval.h"
 #include "utils/lsyscache.h"
+#include "utils/snapmgr.h"
 #include "utils/syscache.h"
 
 
@@ -2980,6 +2982,48 @@ RelationTruncateIndexes(Relation heapRelation)
 	}
 }
 
+/*
+ * Reset the relfrozenxid and other stats to the same values used when
+ * creating tables. This is used after non-transactional truncation.
+ *
+ * This reduces the need for long-running programs to vacuum their own
+ * temporary tables (since they're not covered by autovacuum) at least in the
+ * case where they're ON COMMIT DELETE ROWS.
+ *
+ * see also src/backend/commands/vacuum.c vac_update_relstats()
+ * also see AddNewRelationTuple() above
+ */
+
+static void
+ResetVacStats(Relation rel)
+{
+	HeapTuple	ctup;
+	Form_pg_class pgcform;
+	Relation classRel;
+
+	/* Fetch a copy of the tuple to scribble on */
+	classRel = table_open(RelationRelationId, RowExclusiveLock);
+	ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(RelationGetRelid(rel)));
+	if (!HeapTupleIsValid(ctup))
+		elog(ERROR, "pg_class entry for relid %u vanished during truncation",
+			 RelationGetRelid(rel));
+	pgcform = (Form_pg_class) GETSTRUCT(ctup);
+
+	/*
+	 * Update relfrozenxid
+	 */
+
+	pgcform->relpages = 0;
+	pgcform->reltuples = -1;
+	pgcform->relallvisible = 0;
+	pgcform->relfrozenxid = RecentXmin;
+	pgcform->relminmxid = GetOldestMultiXactId();
+
+	heap_inplace_update(classRel, ctup);
+
+	table_close(classRel, RowExclusiveLock);
+}
+
 /*
  *	 heap_truncate
  *
@@ -2988,6 +3032,14 @@ RelationTruncateIndexes(Relation heapRelation)
  * This is not transaction-safe!  There is another, transaction-safe
  * implementation in commands/tablecmds.c.  We now use this only for
  * ON COMMIT truncation of temporary tables, where it doesn't matter.
+ *
+ * Or whenever a table's relfilenode was created within the same transaction
+ * such as when the table was created or truncated (normally) within this
+ * transaction.
+ *
+ * The correctness of this code depends on the fact that the table creation or
+ * truncation would be rolled back *including* the insert/update to the
+ * pg_class row that we update in place here.
  */
 void
 heap_truncate(List *relids)
@@ -3044,6 +3096,7 @@ heap_truncate_one_rel(Relation rel)
 
 	/* Truncate the underlying relation */
 	table_relation_nontransactional_truncate(rel);
+	ResetVacStats(rel);
 
 	/* If the relation has indexes, truncate the indexes too */
 	RelationTruncateIndexes(rel);
@@ -3055,6 +3108,7 @@ heap_truncate_one_rel(Relation rel)
 		Relation	toastrel = table_open(toastrelid, AccessExclusiveLock);
 
 		table_relation_nontransactional_truncate(toastrel);
+		ResetVacStats(rel);
 		RelationTruncateIndexes(toastrel);
 		/* keep the lock... */
 		table_close(toastrel, NoLock);
diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index fafb9349cc..c1fd3ced95 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -3272,15 +3272,18 @@ isOtherTempNamespace(Oid namespaceId)
 
 /*
  * checkTempNamespaceStatus - is the given namespace owned and actively used
- * by a backend?
+ * by a backend? Optionally return the pid of the owning backend if there is
+ * one. Returned pid is only meaningful when TEMP_NAMESPACE_IN_USE but note
+ * below about race conditions.
  *
  * Note: this can be used while scanning relations in pg_class to detect
  * orphaned temporary tables or namespaces with a backend connected to a
  * given database.  The result may be out of date quickly, so the caller
  * must be careful how to handle this information.
+ *
  */
 TempNamespaceStatus
-checkTempNamespaceStatus(Oid namespaceId)
+checkTempNamespaceStatusAndPid(Oid namespaceId, pid_t *pid)
 {
 	PGPROC	   *proc;
 	int			backendId;
@@ -3307,6 +3310,8 @@ checkTempNamespaceStatus(Oid namespaceId)
 		return TEMP_NAMESPACE_IDLE;
 
 	/* Yup, so namespace is busy */
+	if (pid)
+		*pid = proc->pid;
 	return TEMP_NAMESPACE_IN_USE;
 }
 
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 681ef91b81..0329b3b7e7 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -2085,6 +2085,8 @@ do_autovacuum(void)
 		bool		dovacuum;
 		bool		doanalyze;
 		bool		wraparound;
+		TempNamespaceStatus temp_status;
+		pid_t		temp_pid;
 
 		if (classForm->relkind != RELKIND_RELATION &&
 			classForm->relkind != RELKIND_MATVIEW)
@@ -2092,6 +2094,16 @@ do_autovacuum(void)
 
 		relid = classForm->oid;
 
+		/* Fetch reloptions and the pgstat entry for this table */
+		relopts = extract_autovac_opts(tuple, pg_class_desc);
+		tabentry = get_pgstat_tabentry_relid(relid, classForm->relisshared,
+											 shared, dbentry);
+
+		/* Check if it needs vacuum or analyze */
+		relation_needs_vacanalyze(relid, relopts, classForm, tabentry,
+								  effective_multixact_freeze_max_age,
+								  &dovacuum, &doanalyze, &wraparound);
+
 		/*
 		 * Check if it is a temp table (presumably, of some other backend's).
 		 * We cannot safely process other backends' temp tables.
@@ -2103,7 +2115,8 @@ do_autovacuum(void)
 			 * using the temporary schema.  Also, for safety, ignore it if the
 			 * namespace doesn't exist or isn't a temp namespace after all.
 			 */
-			if (checkTempNamespaceStatus(classForm->relnamespace) == TEMP_NAMESPACE_IDLE)
+			temp_status = checkTempNamespaceStatusAndPid(classForm->relnamespace, &temp_pid);
+			if (temp_status == TEMP_NAMESPACE_IDLE)
 			{
 				/*
 				 * The table seems to be orphaned -- although it might be that
@@ -2113,20 +2126,31 @@ do_autovacuum(void)
 				 * Remember it so we can try to delete it later.
 				 */
 				orphan_oids = lappend_oid(orphan_oids, relid);
+			} else if (temp_status == TEMP_NAMESPACE_NOT_TEMP) {
+				elog(LOG, "autovacuum: found temporary table \"%s.%s.%s\" in non-temporary namespace",
+					 get_database_name(MyDatabaseId),
+					 get_namespace_name(classForm->relnamespace),
+					 NameStr(classForm->relname));
+			} else if (temp_status == TEMP_NAMESPACE_IN_USE && wraparound) {
+				/* The table is not orphaned -- however it seems to be in need
+				 * of a wraparound vacuum which we cannot do. Sessions using
+				 * long-lived temporary tables need to be responsible for
+				 * vacuuming them and failing to do so is endangering the
+				 * whole cluster.
+				 */
+				ereport(LOG,
+						(errmsg("autovacuum: cannot vacuum temporary table \"%s.%s.%s\" in danger of causing transaction wraparound",
+								get_database_name(MyDatabaseId),
+								get_namespace_name(classForm->relnamespace),
+								NameStr(classForm->relname)),
+						 errhint("Long-lived clients must vacuum temporary tables themselves periodically.\n"
+								 "As super-user drop this table or terminate this session with pg_terminate_backend(%d).",
+								 temp_pid)
+							));
 			}
 			continue;
 		}
 
-		/* Fetch reloptions and the pgstat entry for this table */
-		relopts = extract_autovac_opts(tuple, pg_class_desc);
-		tabentry = get_pgstat_tabentry_relid(relid, classForm->relisshared,
-											 shared, dbentry);
-
-		/* Check if it needs vacuum or analyze */
-		relation_needs_vacanalyze(relid, relopts, classForm, tabentry,
-								  effective_multixact_freeze_max_age,
-								  &dovacuum, &doanalyze, &wraparound);
-
 		/* Relations that need work are added to table_oids */
 		if (dovacuum || doanalyze)
 			table_oids = lappend_oid(table_oids, relid);
@@ -2273,7 +2297,7 @@ do_autovacuum(void)
 			continue;
 		}
 
-		if (checkTempNamespaceStatus(classForm->relnamespace) != TEMP_NAMESPACE_IDLE)
+		if (checkTempNamespaceStatusAndPid(classForm->relnamespace, NULL) != TEMP_NAMESPACE_IDLE)
 		{
 			UnlockRelationOid(relid, AccessExclusiveLock);
 			continue;
diff --git a/src/include/catalog/namespace.h b/src/include/catalog/namespace.h
index f963d82797..a861327cd8 100644
--- a/src/include/catalog/namespace.h
+++ b/src/include/catalog/namespace.h
@@ -39,7 +39,7 @@ typedef struct _FuncCandidateList
 }		   *FuncCandidateList;
 
 /*
- * Result of checkTempNamespaceStatus
+ * Result of checkTempNamespaceStatusAndPid
  */
 typedef enum TempNamespaceStatus
 {
@@ -155,7 +155,7 @@ extern bool isTempToastNamespace(Oid namespaceId);
 extern bool isTempOrTempToastNamespace(Oid namespaceId);
 extern bool isAnyTempNamespace(Oid namespaceId);
 extern bool isOtherTempNamespace(Oid namespaceId);
-extern TempNamespaceStatus checkTempNamespaceStatus(Oid namespaceId);
+extern TempNamespaceStatus checkTempNamespaceStatusAndPid(Oid namespaceId, pid_t *pid);
 extern int	GetTempNamespaceBackendId(Oid namespaceId);
 extern Oid	GetTempToastNamespace(void);
 extern void GetTempNamespaceState(Oid *tempNamespaceId,
-- 
2.35.1

