From 2f3be0eb8754ad1b684d27625bd59183a5511832 Mon Sep 17 00:00:00 2001
From: Vasuki M <vasukianand0119@gmail.com>
Date: Thu, 22 Jan 2026 11:33:10 +0530
Subject: [PATCH] Introduce an opt-in SMART option for ANALYZE that skips
 relations which have not been modified since their last analyze, based on
 pg_stat counters.

A relation is skipped only if:
- it has been analyzed before (manual or auto-analyze), and
- n_mod_since_analyze == 0

Relations that have never been analyzed are always analyzed normally.
The default ANALYZE behavior is unchanged unless SMART is explicitly
specified.

This can reduce unnecessary ANALYZE work in databases with many
mostly-static tables.

Regression tests are included to verify that:
- SMART ANALYZE does not skip never-analyzed tables
- only modified tables are re-analyzed when SMART is used
---
 src/backend/commands/analyze.c              | 30 +++++++++
 src/backend/commands/vacuum.c               | 15 ++++-
 src/include/commands/vacuum.h               |  2 +-
 src/test/regress/expected/analyze_smart.out | 71 +++++++++++++++++++++
 src/test/regress/parallel_schedule          |  1 +
 src/test/regress/sql/analyze_smart.sql      | 58 +++++++++++++++++
 6 files changed, 175 insertions(+), 2 deletions(-)
 create mode 100644 src/test/regress/expected/analyze_smart.out
 create mode 100644 src/test/regress/sql/analyze_smart.sql

diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index a4834241..536c6209 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -45,6 +45,7 @@
 #include "storage/bufmgr.h"
 #include "storage/procarray.h"
 #include "utils/attoptcache.h"
+#include "utils/relcache.h"
 #include "utils/datum.h"
 #include "utils/guc.h"
 #include "utils/lsyscache.h"
@@ -140,6 +141,31 @@ analyze_rel(Oid relid, RangeVar *relation,
 	onerel = vacuum_open_relation(relid, relation, params.options & ~(VACOPT_VACUUM),
 								  params.log_analyze_min_duration >= 0,
 								  ShareUpdateExclusiveLock);
+	/* SMART ANALYZE: skip unchanged relations that have been analyzed before and
+	 * have not changed since the last analyze.
+	 */
+	if ((params.options & VACOPT_SMART_ANALYZE) &&
+		onerel->rd_rel->relkind == RELKIND_RELATION)
+	{
+		PgStat_StatTabEntry *tabstat;
+
+		tabstat = pgstat_fetch_stat_tabentry(RelationGetRelid(onerel));
+
+		if (tabstat != NULL &&
+			(tabstat->last_analyze_time != 0 ||
+			tabstat->last_autoanalyze_time != 0) &&
+			tabstat->mod_since_analyze == 0)
+		{
+
+			elog(DEBUG1,
+				"SMART ANALYZE: skipping relation \"%s\" (OID %u), no modifications since last analyze",
+				RelationGetRelationName(onerel),
+				RelationGetRelid(onerel));
+
+			table_close(onerel, ShareUpdateExclusiveLock);
+			return;
+		}
+	}
 
 	/* leave if relation could not be opened or locked */
 	if (!onerel)
@@ -314,6 +340,10 @@ do_analyze_rel(Relation onerel, const VacuumParams params,
 	PgStat_Counter startreadtime = 0;
 	PgStat_Counter startwritetime = 0;
 
+	elog(DEBUG1, "ANALYZE processing relation \"%s\" (OID %u)",
+		RelationGetRelationName(onerel),
+		RelationGetRelid(onerel));
+
 	verbose = (params.options & VACOPT_VERBOSE) != 0;
 	instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
 							  params.log_analyze_min_duration >= 0));
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 03932f45..3e838476 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -165,6 +165,7 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
 	BufferAccessStrategy bstrategy = NULL;
 	bool		verbose = false;
 	bool		skip_locked = false;
+	bool            smart = false;
 	bool		analyze = false;
 	bool		freeze = false;
 	bool		full = false;
@@ -229,6 +230,9 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
 
 			ring_size = result;
 		}
+		else if (strcmp(opt->defname, "smart") == 0)
+			smart = defGetBoolean(opt);
+
 		else if (!vacstmt->is_vacuumcmd)
 			ereport(ERROR,
 					(errcode(ERRCODE_SYNTAX_ERROR),
@@ -306,6 +310,7 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
 		(verbose ? VACOPT_VERBOSE : 0) |
 		(skip_locked ? VACOPT_SKIP_LOCKED : 0) |
 		(analyze ? VACOPT_ANALYZE : 0) |
+		(smart ? VACOPT_SMART_ANALYZE : 0) |
 		(freeze ? VACOPT_FREEZE : 0) |
 		(full ? VACOPT_FULL : 0) |
 		(disable_page_skipping ? VACOPT_DISABLE_PAGE_SKIPPING : 0) |
@@ -315,7 +320,7 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
 		(only_database_stats ? VACOPT_ONLY_DATABASE_STATS : 0);
 
 	/* sanity checks on options */
-	Assert(params.options & (VACOPT_VACUUM | VACOPT_ANALYZE));
+	Assert(params.options & (VACOPT_VACUUM | VACOPT_ANALYZE | VACOPT_SMART_ANALYZE));
 	Assert((params.options & VACOPT_VACUUM) ||
 		   !(params.options & (VACOPT_FULL | VACOPT_FREEZE)));
 
@@ -351,6 +356,14 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
 		}
 	}
 
+	/*
+	 * SMART is only meaningful with ANALYZE.
+	 */
+	if ((params.options & VACOPT_SMART_ANALYZE) &&
+		!(params.options & VACOPT_ANALYZE))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				errmsg("SMART option requires ANALYZE")));
 
 	/*
 	 * Sanity check DISABLE_PAGE_SKIPPING option.
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index e885a4b9..08533ec7 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -188,7 +188,7 @@ typedef struct VacAttrStats
 #define VACOPT_DISABLE_PAGE_SKIPPING 0x100	/* don't skip any pages */
 #define VACOPT_SKIP_DATABASE_STATS 0x200	/* skip vac_update_datfrozenxid() */
 #define VACOPT_ONLY_DATABASE_STATS 0x400	/* only vac_update_datfrozenxid() */
-
+#define VACOPT_SMART_ANALYZE   0x00010000  /* skip unchanged relations during ANALYZE */
 /*
  * Values used by index_cleanup and truncate params.
  *
diff --git a/src/test/regress/expected/analyze_smart.out b/src/test/regress/expected/analyze_smart.out
new file mode 100644
index 00000000..4ae555e1
--- /dev/null
+++ b/src/test/regress/expected/analyze_smart.out
@@ -0,0 +1,71 @@
+--
+-- SMART ANALYZE should not skip tables that were never analyzed
+--
+CREATE TABLE sa_never (id int);
+-- Run SMART ANALYZE directly (no prior ANALYZE)
+ANALYZE (SMART) sa_never;
+-- Verify SMART ANALYZE ran or not
+SELECT
+    last_analyze IS NOT NULL AS analyzed,
+    n_mod_since_analyze
+FROM pg_stat_user_tables
+WHERE relname = 'sa_never';
+ analyzed | n_mod_since_analyze 
+----------+---------------------
+ t        |                   0
+(1 row)
+
+--
+-- SMART ANALYZE regression test
+--
+CREATE TABLE sa1 (id int);
+CREATE TABLE sa2 (id int);
+-- Initial analyze so stats exist
+ANALYZE;
+-- Modify only sa1
+INSERT INTO sa1 VALUES (1);
+-- Make sure stats snapshot is fresh
+SELECT pg_stat_clear_snapshot();
+ pg_stat_clear_snapshot 
+------------------------
+ 
+(1 row)
+
+-- Check modifications
+SELECT relname, n_mod_since_analyze
+FROM pg_stat_user_tables
+WHERE relname IN ('sa1', 'sa2')
+ORDER BY relname;
+ relname | n_mod_since_analyze 
+---------+---------------------
+ sa1     |                   0
+ sa2     |                   0
+(2 rows)
+
+-- Run SMART ANALYZE on both tables
+ANALYZE (SMART) sa1, sa2;
+-- Refresh stats again
+SELECT pg_stat_clear_snapshot();
+ pg_stat_clear_snapshot 
+------------------------
+ 
+(1 row)
+
+-- Verify post-conditions:
+-- sa1 has n_mod_since_analyze = 0 because it was analyzed
+-- sa2 has n_mod_since_analyze = 0 because it was skipped and unchanged
+SELECT
+    relname,
+    n_mod_since_analyze = 0 AS reset_after_smart_analyze
+FROM pg_stat_user_tables
+WHERE relname IN ('sa1', 'sa2')
+ORDER BY relname;
+ relname | reset_after_smart_analyze 
+---------+---------------------------
+ sa1     | t
+ sa2     | t
+(2 rows)
+
+DROP TABLE sa1;
+DROP TABLE sa2;
+DROP TABLE sa_never;
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 021d57f6..f3379fc5 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -90,6 +90,7 @@ test: rules psql psql_crosstab psql_pipeline amutils stats_ext collate.linux.utf
 test: select_parallel
 test: write_parallel
 test: vacuum_parallel
+test: analyze_smart
 
 # Run this alone, because concurrent DROP TABLE would make non-superuser
 # "ANALYZE;" fail with "relation with OID $n does not exist".
diff --git a/src/test/regress/sql/analyze_smart.sql b/src/test/regress/sql/analyze_smart.sql
new file mode 100644
index 00000000..f7e6733c
--- /dev/null
+++ b/src/test/regress/sql/analyze_smart.sql
@@ -0,0 +1,58 @@
+--
+-- SMART ANALYZE should not skip tables that were never analyzed
+--
+
+CREATE TABLE sa_never (id int);
+
+-- Run SMART ANALYZE directly (no prior ANALYZE)
+ANALYZE (SMART) sa_never;
+
+-- Verify SMART ANALYZE ran or not
+SELECT
+    last_analyze IS NOT NULL AS analyzed,
+    n_mod_since_analyze
+FROM pg_stat_user_tables
+WHERE relname = 'sa_never';
+
+
+--
+-- SMART ANALYZE regression test
+--
+
+CREATE TABLE sa1 (id int);
+CREATE TABLE sa2 (id int);
+
+-- Initial analyze so stats exist
+ANALYZE;
+
+-- Modify only sa1
+INSERT INTO sa1 VALUES (1);
+
+-- Make sure stats snapshot is fresh
+SELECT pg_stat_clear_snapshot();
+
+-- Check modifications
+SELECT relname, n_mod_since_analyze
+FROM pg_stat_user_tables
+WHERE relname IN ('sa1', 'sa2')
+ORDER BY relname;
+
+-- Run SMART ANALYZE on both tables
+ANALYZE (SMART) sa1, sa2;
+
+-- Refresh stats again
+SELECT pg_stat_clear_snapshot();
+
+-- Verify post-conditions:
+-- sa1 has n_mod_since_analyze = 0 because it was analyzed
+-- sa2 has n_mod_since_analyze = 0 because it was skipped and unchanged
+SELECT
+    relname,
+    n_mod_since_analyze = 0 AS reset_after_smart_analyze
+FROM pg_stat_user_tables
+WHERE relname IN ('sa1', 'sa2')
+ORDER BY relname;
+
+DROP TABLE sa1;
+DROP TABLE sa2;
+DROP TABLE sa_never;
-- 
2.43.0

