From 319f4c304f5e9ecb0933c63696fb36205442276b Mon Sep 17 00:00:00 2001
From: Masahiro Ikeda <ikedamsh@oss.nttdata.com>
Date: Fri, 17 Jan 2025 17:46:19 +0900
Subject: [PATCH v1] Add logic to recheck if ANALYZE is needed after VACUUM
 finishes by autovacuum

In cases where VACUUM runs for a long time and frequent data updates
occur, the reported pg_class.reltuples value by VACUUM may become
extremely small, potentially leading to query performance degradation.

This update introduces a mechanism to detect if statistical information
has been updated during a prolonged VACUUM process, allowing ANALYZE to
be executed immediately to mitigate the issue.

Note that query performance degradation may still occur for a short
period before ANALYZE is executed. To fully prevent this, further
changes, such as skipping the update of pg_class.reltuples by VACUUM,
would be necessary.
---
 src/backend/postmaster/autovacuum.c           | 213 ++++++++++++++++--
 .../test_misc/t/008_recheck_autoanalyze.pl    |  91 ++++++++
 2 files changed, 289 insertions(+), 15 deletions(-)
 create mode 100644 src/test/modules/test_misc/t/008_recheck_autoanalyze.pl

diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 0ab921a169b..486c354cea3 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -201,6 +201,8 @@ typedef struct autovac_table
 	VacuumParams at_params;
 	double		at_storage_param_vac_cost_delay;
 	int			at_storage_param_vac_cost_limit;
+	float4		anl_scale_factor;
+	int			anl_base_thresh;
 	bool		at_dobalance;
 	bool		at_sharedrel;
 	char	   *at_relname;
@@ -334,12 +336,18 @@ static autovac_table *table_recheck_autovac(Oid relid, HTAB *table_toast_map,
 static void recheck_relation_needs_vacanalyze(Oid relid, AutoVacOpts *avopts,
 											  Form_pg_class classForm,
 											  int effective_multixact_freeze_max_age,
-											  bool *dovacuum, bool *doanalyze, bool *wraparound);
+											  bool *dovacuum, bool *doanalyze, bool *wraparound,
+											  float4 *anl_scale_factor, int *anl_base_thresh);
+static void recheck_relation_needs_analyze(TupleDesc pg_class_desc, autovac_table *tab);
+
 static void relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts,
 									  Form_pg_class classForm,
 									  PgStat_StatTabEntry *tabentry,
 									  int effective_multixact_freeze_max_age,
-									  bool *dovacuum, bool *doanalyze, bool *wraparound);
+									  bool *dovacuum, bool *doanalyze, bool *wraparound,
+									  float4 *anl_scale_factor, int *anl_base_thresh);
+static void relation_needs_analyze(autovac_table *tab, Form_pg_class classForm,
+								   PgStat_StatTabEntry *tabentry, bool *doanalyze);
 
 static void autovacuum_do_vac_analyze(autovac_table *tab,
 									  BufferAccessStrategy bstrategy);
@@ -1997,6 +2005,8 @@ do_autovacuum(void)
 		bool		dovacuum;
 		bool		doanalyze;
 		bool		wraparound;
+		float4 anl_scale_factor;
+		int anl_base_thresh;
 
 		if (classForm->relkind != RELKIND_RELATION &&
 			classForm->relkind != RELKIND_MATVIEW)
@@ -2037,7 +2047,8 @@ do_autovacuum(void)
 		/* Check if it needs vacuum or analyze */
 		relation_needs_vacanalyze(relid, relopts, classForm, tabentry,
 								  effective_multixact_freeze_max_age,
-								  &dovacuum, &doanalyze, &wraparound);
+								  &dovacuum, &doanalyze, &wraparound,
+								  &anl_scale_factor, &anl_base_thresh);
 
 		/* Relations that need work are added to table_oids */
 		if (dovacuum || doanalyze)
@@ -2090,6 +2101,8 @@ do_autovacuum(void)
 		bool		dovacuum;
 		bool		doanalyze;
 		bool		wraparound;
+		float4		anl_scale_factor;
+		int			anl_base_thresh;
 
 		/*
 		 * We cannot safely process other backends' temp tables, so skip 'em.
@@ -2120,7 +2133,8 @@ do_autovacuum(void)
 
 		relation_needs_vacanalyze(relid, relopts, classForm, tabentry,
 								  effective_multixact_freeze_max_age,
-								  &dovacuum, &doanalyze, &wraparound);
+								  &dovacuum, &doanalyze, &wraparound,
+								  &anl_scale_factor, &anl_base_thresh);
 
 		/* ignore analyze for toast tables */
 		if (dovacuum)
@@ -2470,6 +2484,80 @@ do_autovacuum(void)
 
 		did_vacuum = true;
 
+		/*
+		 * If only VACUUM was executed, recheck whether the pgstat data
+		 * indicates that ANALYZE is needed for this table.
+		 *
+		 * If VACUUM runs for a long time and frequent data updates occur,
+		 * the reported pg_class.reltuples value may become extremely small,
+		 * potentially leading to query performance degradation. Although
+		 * the next ANALYZE will correct this, the next VACUUM might run
+		 * for a long time again, and the degradation could persist for an
+		 * extended period. This allows ANALYZE to be executed immediately.
+		 */
+		if ((tab->at_params.options & VACOPT_VACUUM) &&
+			!(tab->at_params.options & VACOPT_ANALYZE))
+		{
+#ifdef USE_INJECTION_POINTS
+			char *rel_nspname = get_namespace_name(get_rel_namespace(relid));
+			if (strcmp(rel_nspname, "pg_catalog") != 0 &&
+				strcmp(rel_nspname, "information_schema") != 0 &&
+				strcmp(rel_nspname, "pg_toast") != 0)
+				INJECTION_POINT("before-recheck-autoanalyze");
+#endif
+			/* start transaction to fetch latest relation's relcache entry */
+			CommitTransactionCommand();
+			StartTransactionCommand();
+
+			recheck_relation_needs_analyze(pg_class_desc, tab);
+
+			if (tab->at_params.options & VACOPT_ANALYZE)
+			{
+				/*
+				 * We will abort vacuuming the current table if something errors out,
+				 * and continue with the next one in schedule; in particular, this
+				 * happens if we are interrupted with SIGINT.
+				 */
+				PG_TRY();
+				{
+					/* Use PortalContext for any per-table allocations */
+					MemoryContextSwitchTo(PortalContext);
+
+					/* have at it */
+					autovacuum_do_vac_analyze(tab, bstrategy);
+
+					/*
+					 * Clear a possible query-cancel signal, to avoid a late reaction
+					 * to an automatically-sent signal because of vacuuming the
+					 * current table (we're done with it, so it would make no sense to
+					 * cancel at this point.)
+					 */
+					QueryCancelPending = false;
+				}
+				PG_CATCH();
+				{
+					/*
+					 * Abort the transaction, start a new one, and proceed with the
+					 * next table in our list.
+					 */
+					HOLD_INTERRUPTS();
+					errcontext("automatic analyze of table \"%s.%s.%s\"",
+							   tab->at_datname, tab->at_nspname, tab->at_relname);
+					EmitErrorReport();
+
+					/* this resets ProcGlobal->statusFlags[i] too */
+					AbortOutOfAnyTransaction();
+					FlushErrorState();
+					MemoryContextReset(PortalContext);
+
+					/* restart our transaction for the following operations */
+					StartTransactionCommand();
+					RESUME_INTERRUPTS();
+				}
+				PG_END_TRY();
+			}
+		}
+
 		/* ProcGlobal->statusFlags[i] are reset at the next end of xact */
 
 		/* be tidy */
@@ -2706,6 +2794,49 @@ extract_autovac_opts(HeapTuple tup, TupleDesc pg_class_desc)
 	return av;
 }
 
+/*
+ * table_recheck_autoanalyze
+ *
+ * Recheck whether a table needs analyze after vacuum completes,
+ * and update tab->at_params.options.
+ */
+static void
+recheck_relation_needs_analyze(TupleDesc pg_class_desc, autovac_table *tab)
+{
+	HeapTuple	classTup;
+	Form_pg_class	classForm;
+	PgStat_StatTabEntry	*tabentry;
+	bool doanalyze = false;
+
+	/* ANALYZE refuses to work with pg_statistic */
+	if (tab->at_relid == StatisticRelationId)
+		return;
+
+	/* fetch the relation's relcache entry */
+	classTup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(tab->at_relid));
+	if (!HeapTupleIsValid(classTup))
+		return;
+	classForm = (Form_pg_class) GETSTRUCT(classTup);
+
+	/* ignore ANALYZE for toast tables */
+	if (classForm->relkind == RELKIND_TOASTVALUE)
+	{
+		heap_freetuple(classTup);
+		return;
+	}
+
+	/* fetch the pgstat table entry */
+	tabentry = pgstat_fetch_stat_tabentry_ext(classForm->relisshared,
+											  tab->at_relid);
+
+	relation_needs_analyze(tab, classForm, tabentry, &doanalyze);
+
+	/* update options */
+	if (doanalyze)
+		tab->at_params.options = VACOPT_ANALYZE;
+
+	heap_freetuple(classTup);
+}
 
 /*
  * table_recheck_autovac
@@ -2727,6 +2858,8 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map,
 	autovac_table *tab = NULL;
 	bool		wraparound;
 	AutoVacOpts *avopts;
+	float4 anl_scale_factor;
+	int anl_base_thresh;
 
 	/* fetch the relation's relcache entry */
 	classTup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
@@ -2752,7 +2885,8 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map,
 
 	recheck_relation_needs_vacanalyze(relid, avopts, classForm,
 									  effective_multixact_freeze_max_age,
-									  &dovacuum, &doanalyze, &wraparound);
+									  &dovacuum, &doanalyze, &wraparound,
+									  &anl_scale_factor, &anl_base_thresh);
 
 	/* OK, it needs something done */
 	if (doanalyze || dovacuum)
@@ -2830,6 +2964,8 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map,
 			avopts->vacuum_cost_limit : 0;
 		tab->at_storage_param_vac_cost_delay = avopts ?
 			avopts->vacuum_cost_delay : -1;
+		tab->anl_scale_factor = anl_scale_factor;
+		tab->anl_base_thresh = anl_base_thresh;
 		tab->at_relname = NULL;
 		tab->at_nspname = NULL;
 		tab->at_datname = NULL;
@@ -2862,7 +2998,9 @@ recheck_relation_needs_vacanalyze(Oid relid,
 								  int effective_multixact_freeze_max_age,
 								  bool *dovacuum,
 								  bool *doanalyze,
-								  bool *wraparound)
+								  bool *wraparound,
+								  float4 *anl_scale_factor,
+								  int *anl_base_thresh)
 {
 	PgStat_StatTabEntry *tabentry;
 
@@ -2872,7 +3010,8 @@ recheck_relation_needs_vacanalyze(Oid relid,
 
 	relation_needs_vacanalyze(relid, avopts, classForm, tabentry,
 							  effective_multixact_freeze_max_age,
-							  dovacuum, doanalyze, wraparound);
+							  dovacuum, doanalyze, wraparound,
+							  anl_scale_factor, anl_base_thresh);
 
 	/* ignore ANALYZE for toast tables */
 	if (classForm->relkind == RELKIND_TOASTVALUE)
@@ -2925,7 +3064,9 @@ relation_needs_vacanalyze(Oid relid,
  /* output params below */
 						  bool *dovacuum,
 						  bool *doanalyze,
-						  bool *wraparound)
+						  bool *wraparound,
+						  float4 *anl_scale_factor,
+						  int *anl_base_thresh)
 {
 	bool		force_vacuum;
 	bool		av_enabled;
@@ -2933,11 +3074,9 @@ relation_needs_vacanalyze(Oid relid,
 
 	/* constants from reloptions or GUC variables */
 	int			vac_base_thresh,
-				vac_ins_base_thresh,
-				anl_base_thresh;
+				vac_ins_base_thresh;
 	float4		vac_scale_factor,
-				vac_ins_scale_factor,
-				anl_scale_factor;
+				vac_ins_scale_factor;
 
 	/* thresholds calculated from above constants */
 	float4		vacthresh,
@@ -2983,11 +3122,11 @@ relation_needs_vacanalyze(Oid relid,
 		? relopts->vacuum_ins_threshold
 		: autovacuum_vac_ins_thresh;
 
-	anl_scale_factor = (relopts && relopts->analyze_scale_factor >= 0)
+	*anl_scale_factor = (relopts && relopts->analyze_scale_factor >= 0)
 		? relopts->analyze_scale_factor
 		: autovacuum_anl_scale;
 
-	anl_base_thresh = (relopts && relopts->analyze_threshold >= 0)
+	*anl_base_thresh = (relopts && relopts->analyze_threshold >= 0)
 		? relopts->analyze_threshold
 		: autovacuum_anl_thresh;
 
@@ -3048,7 +3187,7 @@ relation_needs_vacanalyze(Oid relid,
 
 		vacthresh = (float4) vac_base_thresh + vac_scale_factor * reltuples;
 		vacinsthresh = (float4) vac_ins_base_thresh + vac_ins_scale_factor * reltuples;
-		anlthresh = (float4) anl_base_thresh + anl_scale_factor * reltuples;
+		anlthresh = (float4) *anl_base_thresh + *anl_scale_factor * reltuples;
 
 		/*
 		 * Note that we don't need to take special consideration for stat
@@ -3085,6 +3224,50 @@ relation_needs_vacanalyze(Oid relid,
 		*doanalyze = false;
 }
 
+/*
+ * relation_needs_analyze
+ *
+ * Check whether a relation needs to be analyzed. It reuses analyze equation
+ * parameters when called from relation_needs_vacanalyze().
+ */
+static void
+relation_needs_analyze(autovac_table *tab,
+					   Form_pg_class classForm,
+					   PgStat_StatTabEntry *tabentry,
+ /* output params below */
+					   bool *doanalyze)
+{
+	/*
+	 * If we found stats for the table, and autovacuum is currently enabled,
+	 * make a threshold-based decision whether to analyze.
+	 */
+	if (PointerIsValid(tabentry) && AutoVacuumingActive())
+	{
+		float4		reltuples;
+		float4		anlthresh;
+		float4		anltuples;
+
+		Assert(classForm != NULL);
+		Assert(OidIsValid(tab->at_relid));
+
+		reltuples = classForm->reltuples;
+		anltuples = tabentry->mod_since_analyze;
+
+		/* Vacuum could update the value to -1 */
+		if (reltuples < 0)
+			reltuples = 0;
+
+		/* Determine if this table needs analyze. */
+		anlthresh = (float4) tab->anl_base_thresh + tab->anl_scale_factor * reltuples;
+		*doanalyze = (anltuples > anlthresh);
+
+		elog(DEBUG3, "%s: anl: %.0f (threshold %.0f)",
+			 NameStr(classForm->relname), anltuples, anlthresh);
+	}
+	else
+		*doanalyze = false;
+}
+
 /*
  * autovacuum_do_vac_analyze
  *		Vacuum and/or analyze the specified table
diff --git a/src/test/modules/test_misc/t/008_recheck_autoanalyze.pl b/src/test/modules/test_misc/t/008_recheck_autoanalyze.pl
new file mode 100644
index 00000000000..bd64ec7bc36
--- /dev/null
+++ b/src/test/modules/test_misc/t/008_recheck_autoanalyze.pl
@@ -0,0 +1,91 @@
+# Copyright (c) 2025, PostgreSQL Global Development Group
+
+# Tests rechecking whether an ANALYZE is required after an autovacuum
+# performs only a VACUUM on a table, even if the table didn't initially
+# require an ANALYZE when the VACUUM started.
+
+use strict;
+use warnings FATAL => 'all';
+use PostgreSQL::Test::Cluster;
+use Test::More;
+
+if ($ENV{enable_injection_points} ne 'yes')
+{
+	plan skip_all => 'Injection points not supported by this build';
+}
+
+# Initialize postgres
+my $psql_err = '';
+my $psql_out = '';
+my $node = PostgreSQL::Test::Cluster->new('node');
+$node->init;
+
+$node->append_conf(
+	'postgresql.conf', qq[
+# This ensures a quick worker spawn.
+autovacuum_naptime = 1s
+log_autovacuum_min_duration = 0
+]);
+$node->start;
+
+# Check if the extension injection_points is available, as it may be
+# possible that this script is run with installcheck, where the module
+# would not be installed by default.
+if (!$node->check_extension('injection_points'))
+{
+	plan skip_all => 'Extension injection_points not installed';
+}
+
+$node->safe_psql('postgres', 'CREATE EXTENSION injection_points;');
+
+# From this point, autovacuum worker will wait after vacuuming one heap relation.
+$node->safe_psql('postgres',
+	"SELECT injection_points_attach('before-recheck-autoanalyze', 'wait');");
+
+# Grab location in logs of primary
+my $offset = -s $node->logfile;
+
+# Create a table and insert 70 rows of data, which exceeds the value of
+# autovacuum_vacuum_insert_threshold and doesn't exceeds the value of
+# autovacuum_analyze_threshold.
+$node->safe_psql(
+	'postgres', qq[
+    CREATE TABLE public.test (id int) WITH
+    (autovacuum_vacuum_insert_threshold = 50, autovacuum_analyze_threshold = 100);
+    INSERT INTO public.test SELECT generate_series(1, 70);
+]);
+
+# Wait until an autovacuum worker starts.
+$node->wait_for_event('autovacuum worker', 'before-recheck-autoanalyze');
+
+# And grab one of them.
+my $av_pid = $node->safe_psql(
+	'postgres',
+    "SELECT pid FROM pg_stat_activity "
+  . "WHERE backend_type = 'autovacuum worker' AND wait_event = 'before-recheck-autoanalyze';");
+
+# insert data to exceeds the value of autovacuum_analyze_threshold
+$node->safe_psql('postgres',
+    "INSERT INTO public.test SELECT generate_series(1, 70);");
+
+# Wakeup the injection point.
+$node->safe_psql('postgres',
+	"SELECT injection_points_wakeup('before-recheck-autoanalyze');");
+
+# Wait for the autovacuum worker to exit before scanning the logs.
+$node->poll_query_until('postgres',
+		"SELECT count(*) = 0 FROM pg_stat_activity "
+	  . "WHERE pid = '$av_pid' AND backend_type = 'autovacuum worker';");
+
+# Check that ANALYZE is executed.
+ok($node->log_contains(
+		qr/\[$av_pid\] LOG:  automatic analyze of table "postgres.public.test"/,
+		$offset),
+		"ANALYZE triggered by recheck after vacuum");
+
+# Release injection point.
+$node->safe_psql('postgres',
+	"SELECT injection_points_detach('before-recheck-autoanalyze');");
+
+$node->stop;
+done_testing();
-- 
2.34.1

