From c2e29150e923f9782ce24a7a4e7d6f2d7445b543 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Fri, 4 Oct 2024 17:06:04 -0400
Subject: [PATCH v3 2/2] Trigger more frequent autovacuums with relallfrozen

Calculate the insert threshold for triggering an autovacuum of a
relation based on the number of unfrozen pages. By only considering the
"active" (unfrozen) portion of the table when calculating how many
tuples to add to the insert threshold, we can trigger more frequent
vacuums of insert-heavy tables and increase the chances of vacuuming
those pages when they still reside in shared buffers.

Author: Melanie Plageman
Reviewed-by: Greg Sabino Mullane
---
 doc/src/sgml/config.sgml                      |  4 +-
 src/backend/postmaster/autovacuum.c           | 37 ++++++++++++++++++-
 src/backend/utils/misc/postgresql.conf.sample |  4 +-
 3 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index dc401087dc6..3bd22f7f1e7 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8710,10 +8710,10 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
       </term>
       <listitem>
        <para>
-        Specifies a fraction of the table size to add to
+        Specifies a fraction of the active (unfrozen) table size to add to
         <varname>autovacuum_vacuum_insert_threshold</varname>
         when deciding whether to trigger a <command>VACUUM</command>.
-        The default is 0.2 (20% of table size).
+        The default is 0.2 (20% of active table size).
         This parameter can only be set in the <filename>postgresql.conf</filename>
         file or on the server command line;
         but the setting can be overridden for individual tables by
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index dc3cf87abab..364b46f672d 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -2922,7 +2922,12 @@ relation_needs_vacanalyze(Oid relid,
 {
 	bool		force_vacuum;
 	bool		av_enabled;
-	float4		reltuples;		/* pg_class.reltuples */
+
+	/* From pg_class */
+	float4		reltuples;
+	int32		relpages;
+	int32		relallfrozen;
+	int32		relallvisible;
 
 	/* constants from reloptions or GUC variables */
 	int			vac_base_thresh,
@@ -3030,6 +3035,10 @@ relation_needs_vacanalyze(Oid relid,
 	 */
 	if (PointerIsValid(tabentry) && AutoVacuumingActive())
 	{
+		float4		pcnt_unfrozen = 1;
+
+		relpages = classForm->relpages;
+		relallfrozen = classForm->relallfrozen;
 		reltuples = classForm->reltuples;
 		vactuples = tabentry->dead_tuples;
 		instuples = tabentry->ins_since_vacuum;
@@ -3039,8 +3048,32 @@ relation_needs_vacanalyze(Oid relid,
 		if (reltuples < 0)
 			reltuples = 0;
 
+		/*
+		 * If the table has been vacuumed and we have reliable data for
+		 * relallfrozen and relallvisible, calculate the unfrozen percentage
+		 * of the table to modify insert scale factor. This helps us decide
+		 * whether or not to vacuum an insert-heavy table based on the number
+		 * of inserts to the "active" part of the table.
+		 *
+		 * If relallfrozen is -1, that means relallvisible was updated
+		 * manually and we can't rely on relallfrozen.
+		 */
+		if (relpages > 0 && reltuples > 0 && relallfrozen > -1)
+		{
+			relallvisible = classForm->relallvisible;
+
+			if (relallvisible > relpages)
+				relallvisible = relpages;
+
+			if (relallfrozen > relallvisible)
+				relallfrozen = relallvisible;
+
+			pcnt_unfrozen = 1 - ((float4) relallfrozen / relpages);
+		}
+
 		vacthresh = (float4) vac_base_thresh + vac_scale_factor * reltuples;
-		vacinsthresh = (float4) vac_ins_base_thresh + vac_ins_scale_factor * reltuples;
+		vacinsthresh = (float4) vac_ins_base_thresh +
+			vac_ins_scale_factor * reltuples * pcnt_unfrozen;
 		anlthresh = (float4) anl_base_thresh + anl_scale_factor * reltuples;
 
 		/*
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 39a3ac23127..a66f16e838a 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -670,8 +670,8 @@
 #autovacuum_analyze_threshold = 50	# min number of row updates before
 					# analyze
 #autovacuum_vacuum_scale_factor = 0.2	# fraction of table size before vacuum
-#autovacuum_vacuum_insert_scale_factor = 0.2	# fraction of inserts over table
-						# size before insert vacuum
+#autovacuum_vacuum_insert_scale_factor = 0.2	# fraction of inserts over active
+						# table size before insert vacuum
 #autovacuum_analyze_scale_factor = 0.1	# fraction of table size before analyze
 #autovacuum_freeze_max_age = 200000000	# maximum XID age before forced vacuum
 					# (change requires restart)
-- 
2.34.1

