From 5d2f5e092ef326f72100d6d47ba1b5cb207e62ba Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sat, 29 Mar 2025 14:50:45 -0400
Subject: [PATCH v10 1/6] Support cached plans that work from a parse-analyzed
 Query.

Up to now, plancache.c dealt only with raw parse trees as the
starting point for a cached plan.  However, we'd like to use
this infrastructure for SQL functions, and in the case of a
new-style SQL function we'll only have the stored querytree,
which corresponds to an analyzed-but-not-rewritten Query.

Fortunately, we can make plancache.c handle that scenario
with only minor modifications; the biggest change is in
RevalidateCachedQuery() where we will need to apply only
pg_rewrite_query not pg_analyze_and_rewrite.

This patch just installs the infrastructure; there's no
caller as yet.

Author: Alexander Pyhalov <a.pyhalov@postgrespro.ru>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/8216639.NyiUUSuA9g@aivenlaptop
---
 src/backend/parser/analyze.c        |  39 +++++++
 src/backend/utils/cache/plancache.c | 158 +++++++++++++++++++++-------
 src/include/parser/analyze.h        |   1 +
 src/include/utils/plancache.h       |  23 +++-
 4 files changed, 179 insertions(+), 42 deletions(-)

diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 76f58b3aca3..1f4d6adda52 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -591,6 +591,45 @@ analyze_requires_snapshot(RawStmt *parseTree)
 	return stmt_requires_parse_analysis(parseTree);
 }
 
+/*
+ * query_requires_rewrite_plan()
+ *		Returns true if rewriting or planning is non-trivial for this Query.
+ *
+ * This is much like stmt_requires_parse_analysis(), but applies one step
+ * further down the pipeline.
+ *
+ * We do not provide an equivalent of analyze_requires_snapshot(): callers
+ * can assume that any rewriting or planning activity needs a snapshot.
+ */
+bool
+query_requires_rewrite_plan(Query *query)
+{
+	bool		result;
+
+	if (query->commandType != CMD_UTILITY)
+	{
+		/* All optimizable statements require rewriting/planning */
+		result = true;
+	}
+	else
+	{
+		/* This list should match stmt_requires_parse_analysis() */
+		switch (nodeTag(query->utilityStmt))
+		{
+			case T_DeclareCursorStmt:
+			case T_ExplainStmt:
+			case T_CreateTableAsStmt:
+			case T_CallStmt:
+				result = true;
+				break;
+			default:
+				result = false;
+				break;
+		}
+	}
+	return result;
+}
+
 /*
  * transformDeleteStmt -
  *	  transforms a Delete Statement
diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c
index 6c2979d5c82..5983927a4c2 100644
--- a/src/backend/utils/cache/plancache.c
+++ b/src/backend/utils/cache/plancache.c
@@ -14,7 +14,7 @@
  * Cache invalidation is driven off sinval events.  Any CachedPlanSource
  * that matches the event is marked invalid, as is its generic CachedPlan
  * if it has one.  When (and if) the next demand for a cached plan occurs,
- * parse analysis and rewrite is repeated to build a new valid query tree,
+ * parse analysis and/or rewrite is repeated to build a new valid query tree,
  * and then planning is performed as normal.  We also force re-analysis and
  * re-planning if the active search_path is different from the previous time
  * or, if RLS is involved, if the user changes or the RLS environment changes.
@@ -63,6 +63,7 @@
 #include "nodes/nodeFuncs.h"
 #include "optimizer/optimizer.h"
 #include "parser/analyze.h"
+#include "rewrite/rewriteHandler.h"
 #include "storage/lmgr.h"
 #include "tcop/pquery.h"
 #include "tcop/utility.h"
@@ -74,18 +75,6 @@
 #include "utils/syscache.h"
 
 
-/*
- * We must skip "overhead" operations that involve database access when the
- * cached plan's subject statement is a transaction control command or one
- * that requires a snapshot not to be set yet (such as SET or LOCK).  More
- * generally, statements that do not require parse analysis/rewrite/plan
- * activity never need to be revalidated, so we can treat them all like that.
- * For the convenience of postgres.c, treat empty statements that way too.
- */
-#define StmtPlanRequiresRevalidation(plansource)  \
-	((plansource)->raw_parse_tree != NULL && \
-	 stmt_requires_parse_analysis((plansource)->raw_parse_tree))
-
 /*
  * This is the head of the backend's list of "saved" CachedPlanSources (i.e.,
  * those that are in long-lived storage and are examined for sinval events).
@@ -100,6 +89,8 @@ static dlist_head saved_plan_list = DLIST_STATIC_INIT(saved_plan_list);
 static dlist_head cached_expression_list = DLIST_STATIC_INIT(cached_expression_list);
 
 static void ReleaseGenericPlan(CachedPlanSource *plansource);
+static bool StmtPlanRequiresRevalidation(CachedPlanSource *plansource);
+static bool BuildingPlanRequiresSnapshot(CachedPlanSource *plansource);
 static List *RevalidateCachedQuery(CachedPlanSource *plansource,
 								   QueryEnvironment *queryEnv,
 								   bool release_generic);
@@ -166,7 +157,7 @@ InitPlanCache(void)
 }
 
 /*
- * CreateCachedPlan: initially create a plan cache entry.
+ * CreateCachedPlan: initially create a plan cache entry for a raw parse tree.
  *
  * Creation of a cached plan is divided into two steps, CreateCachedPlan and
  * CompleteCachedPlan.  CreateCachedPlan should be called after running the
@@ -220,6 +211,7 @@ CreateCachedPlan(RawStmt *raw_parse_tree,
 	plansource = (CachedPlanSource *) palloc0(sizeof(CachedPlanSource));
 	plansource->magic = CACHEDPLANSOURCE_MAGIC;
 	plansource->raw_parse_tree = copyObject(raw_parse_tree);
+	plansource->analyzed_parse_tree = NULL;
 	plansource->query_string = pstrdup(query_string);
 	MemoryContextSetIdentifier(source_context, plansource->query_string);
 	plansource->commandTag = commandTag;
@@ -255,6 +247,34 @@ CreateCachedPlan(RawStmt *raw_parse_tree,
 	return plansource;
 }
 
+/*
+ * CreateCachedPlanForQuery: initially create a plan cache entry for a Query.
+ *
+ * This is used in the same way as CreateCachedPlan, except that the source
+ * query has already been through parse analysis, and the plancache will never
+ * try to re-do that step.
+ *
+ * Currently this is used only for new-style SQL functions, where we have a
+ * Query from the function's prosqlbody, but no source text.  The query_string
+ * is typically empty, but is required anyway.
+ */
+CachedPlanSource *
+CreateCachedPlanForQuery(Query *analyzed_parse_tree,
+						 const char *query_string,
+						 CommandTag commandTag)
+{
+	CachedPlanSource *plansource;
+	MemoryContext oldcxt;
+
+	/* Rather than duplicating CreateCachedPlan, just do this: */
+	plansource = CreateCachedPlan(NULL, query_string, commandTag);
+	oldcxt = MemoryContextSwitchTo(plansource->context);
+	plansource->analyzed_parse_tree = copyObject(analyzed_parse_tree);
+	MemoryContextSwitchTo(oldcxt);
+
+	return plansource;
+}
+
 /*
  * CreateOneShotCachedPlan: initially create a one-shot plan cache entry.
  *
@@ -289,6 +309,7 @@ CreateOneShotCachedPlan(RawStmt *raw_parse_tree,
 	plansource = (CachedPlanSource *) palloc0(sizeof(CachedPlanSource));
 	plansource->magic = CACHEDPLANSOURCE_MAGIC;
 	plansource->raw_parse_tree = raw_parse_tree;
+	plansource->analyzed_parse_tree = NULL;
 	plansource->query_string = query_string;
 	plansource->commandTag = commandTag;
 	plansource->param_types = NULL;
@@ -566,6 +587,42 @@ ReleaseGenericPlan(CachedPlanSource *plansource)
 	}
 }
 
+/*
+ * We must skip "overhead" operations that involve database access when the
+ * cached plan's subject statement is a transaction control command or one
+ * that requires a snapshot not to be set yet (such as SET or LOCK).  More
+ * generally, statements that do not require parse analysis/rewrite/plan
+ * activity never need to be revalidated, so we can treat them all like that.
+ * For the convenience of postgres.c, treat empty statements that way too.
+ */
+static bool
+StmtPlanRequiresRevalidation(CachedPlanSource *plansource)
+{
+	if (plansource->raw_parse_tree != NULL)
+		return stmt_requires_parse_analysis(plansource->raw_parse_tree);
+	else if (plansource->analyzed_parse_tree != NULL)
+		return query_requires_rewrite_plan(plansource->analyzed_parse_tree);
+	/* empty query never needs revalidation */
+	return false;
+}
+
+/*
+ * Determine if creating a plan for this CachedPlanSource requires a snapshot.
+ * In fact this function matches StmtPlanRequiresRevalidation(), but we want
+ * to preserve the distinction between stmt_requires_parse_analysis() and
+ * analyze_requires_snapshot().
+ */
+static bool
+BuildingPlanRequiresSnapshot(CachedPlanSource *plansource)
+{
+	if (plansource->raw_parse_tree != NULL)
+		return analyze_requires_snapshot(plansource->raw_parse_tree);
+	else if (plansource->analyzed_parse_tree != NULL)
+		return query_requires_rewrite_plan(plansource->analyzed_parse_tree);
+	/* empty query never needs a snapshot */
+	return false;
+}
+
 /*
  * RevalidateCachedQuery: ensure validity of analyzed-and-rewritten query tree.
  *
@@ -592,7 +649,6 @@ RevalidateCachedQuery(CachedPlanSource *plansource,
 					  bool release_generic)
 {
 	bool		snapshot_set;
-	RawStmt    *rawtree;
 	List	   *tlist;			/* transient query-tree list */
 	List	   *qlist;			/* permanent query-tree list */
 	TupleDesc	resultDesc;
@@ -615,7 +671,10 @@ RevalidateCachedQuery(CachedPlanSource *plansource,
 	/*
 	 * If the query is currently valid, we should have a saved search_path ---
 	 * check to see if that matches the current environment.  If not, we want
-	 * to force replan.
+	 * to force replan.  (We could almost ignore this consideration when
+	 * working from an analyzed parse tree; but there are scenarios where
+	 * planning can have search_path-dependent results, for example if it
+	 * inlines an old-style SQL function.)
 	 */
 	if (plansource->is_valid)
 	{
@@ -662,9 +721,9 @@ RevalidateCachedQuery(CachedPlanSource *plansource,
 	}
 
 	/*
-	 * Discard the no-longer-useful query tree.  (Note: we don't want to do
-	 * this any earlier, else we'd not have been able to release locks
-	 * correctly in the race condition case.)
+	 * Discard the no-longer-useful rewritten query tree.  (Note: we don't
+	 * want to do this any earlier, else we'd not have been able to release
+	 * locks correctly in the race condition case.)
 	 */
 	plansource->is_valid = false;
 	plansource->query_list = NIL;
@@ -711,25 +770,48 @@ RevalidateCachedQuery(CachedPlanSource *plansource,
 	}
 
 	/*
-	 * Run parse analysis and rule rewriting.  The parser tends to scribble on
-	 * its input, so we must copy the raw parse tree to prevent corruption of
-	 * the cache.
+	 * Run parse analysis (if needed) and rule rewriting.
 	 */
-	rawtree = copyObject(plansource->raw_parse_tree);
-	if (rawtree == NULL)
-		tlist = NIL;
-	else if (plansource->parserSetup != NULL)
-		tlist = pg_analyze_and_rewrite_withcb(rawtree,
-											  plansource->query_string,
-											  plansource->parserSetup,
-											  plansource->parserSetupArg,
-											  queryEnv);
+	if (plansource->raw_parse_tree != NULL)
+	{
+		/* Source is raw parse tree */
+		RawStmt    *rawtree;
+
+		/*
+		 * The parser tends to scribble on its input, so we must copy the raw
+		 * parse tree to prevent corruption of the cache.
+		 */
+		rawtree = copyObject(plansource->raw_parse_tree);
+		if (plansource->parserSetup != NULL)
+			tlist = pg_analyze_and_rewrite_withcb(rawtree,
+												  plansource->query_string,
+												  plansource->parserSetup,
+												  plansource->parserSetupArg,
+												  queryEnv);
+		else
+			tlist = pg_analyze_and_rewrite_fixedparams(rawtree,
+													   plansource->query_string,
+													   plansource->param_types,
+													   plansource->num_params,
+													   queryEnv);
+	}
+	else if (plansource->analyzed_parse_tree != NULL)
+	{
+		/* Source is pre-analyzed query, so we only need to rewrite */
+		Query	   *analyzed_tree;
+
+		/* The rewriter scribbles on its input, too, so copy */
+		analyzed_tree = copyObject(plansource->analyzed_parse_tree);
+		/* Acquire locks needed before rewriting ... */
+		AcquireRewriteLocks(analyzed_tree, true, false);
+		/* ... and do it */
+		tlist = pg_rewrite_query(analyzed_tree);
+	}
 	else
-		tlist = pg_analyze_and_rewrite_fixedparams(rawtree,
-												   plansource->query_string,
-												   plansource->param_types,
-												   plansource->num_params,
-												   queryEnv);
+	{
+		/* Empty query, nothing to do */
+		tlist = NIL;
+	}
 
 	/* Release snapshot if we got one */
 	if (snapshot_set)
@@ -963,8 +1045,7 @@ BuildCachedPlan(CachedPlanSource *plansource, List *qlist,
 	 */
 	snapshot_set = false;
 	if (!ActiveSnapshotSet() &&
-		plansource->raw_parse_tree &&
-		analyze_requires_snapshot(plansource->raw_parse_tree))
+		BuildingPlanRequiresSnapshot(plansource))
 	{
 		PushActiveSnapshot(GetTransactionSnapshot());
 		snapshot_set = true;
@@ -1703,6 +1784,7 @@ CopyCachedPlan(CachedPlanSource *plansource)
 	newsource = (CachedPlanSource *) palloc0(sizeof(CachedPlanSource));
 	newsource->magic = CACHEDPLANSOURCE_MAGIC;
 	newsource->raw_parse_tree = copyObject(plansource->raw_parse_tree);
+	newsource->analyzed_parse_tree = copyObject(plansource->analyzed_parse_tree);
 	newsource->query_string = pstrdup(plansource->query_string);
 	MemoryContextSetIdentifier(source_context, newsource->query_string);
 	newsource->commandTag = plansource->commandTag;
diff --git a/src/include/parser/analyze.h b/src/include/parser/analyze.h
index f1bd18c49f2..f29ed03b476 100644
--- a/src/include/parser/analyze.h
+++ b/src/include/parser/analyze.h
@@ -52,6 +52,7 @@ extern Query *transformStmt(ParseState *pstate, Node *parseTree);
 
 extern bool stmt_requires_parse_analysis(RawStmt *parseTree);
 extern bool analyze_requires_snapshot(RawStmt *parseTree);
+extern bool query_requires_rewrite_plan(Query *query);
 
 extern const char *LCS_asString(LockClauseStrength strength);
 extern void CheckSelectLocking(Query *qry, LockClauseStrength strength);
diff --git a/src/include/utils/plancache.h b/src/include/utils/plancache.h
index 199cc323a28..5930fcb50f0 100644
--- a/src/include/utils/plancache.h
+++ b/src/include/utils/plancache.h
@@ -25,7 +25,8 @@
 #include "utils/resowner.h"
 
 
-/* Forward declaration, to avoid including parsenodes.h here */
+/* Forward declarations, to avoid including parsenodes.h here */
+struct Query;
 struct RawStmt;
 
 /* possible values for plan_cache_mode */
@@ -45,12 +46,22 @@ extern PGDLLIMPORT int plan_cache_mode;
 
 /*
  * CachedPlanSource (which might better have been called CachedQuery)
- * represents a SQL query that we expect to use multiple times.  It stores
- * the query source text, the raw parse tree, and the analyzed-and-rewritten
+ * represents a SQL query that we expect to use multiple times.  It stores the
+ * query source text, the source parse tree, and the analyzed-and-rewritten
  * query tree, as well as adjunct data.  Cache invalidation can happen as a
  * result of DDL affecting objects used by the query.  In that case we discard
  * the analyzed-and-rewritten query tree, and rebuild it when next needed.
  *
+ * There are two ways in which the source query can be represented: either
+ * as a raw parse tree, or as an analyzed-but-not-rewritten parse tree.
+ * In the latter case we expect that cache invalidation need not affect
+ * the parse-analysis results, only the rewriting and planning steps.
+ * Only one of raw_parse_tree and analyzed_parse_tree can be non-NULL.
+ * (If both are NULL, the CachedPlanSource represents an empty query.)
+ * Note that query_string is typically just an empty string when the
+ * source query is an analyzed parse tree; also, param_types, num_params,
+ * parserSetup, and parserSetupArg will not be used.
+ *
  * An actual execution plan, represented by CachedPlan, is derived from the
  * CachedPlanSource when we need to execute the query.  The plan could be
  * either generic (usable with any set of plan parameters) or custom (for a
@@ -78,7 +89,7 @@ extern PGDLLIMPORT int plan_cache_mode;
  * though it may be useful if the CachedPlan can be discarded early.)
  *
  * A CachedPlanSource has two associated memory contexts: one that holds the
- * struct itself, the query source text and the raw parse tree, and another
+ * struct itself, the query source text and the source parse tree, and another
  * context that holds the rewritten query tree and associated data.  This
  * allows the query tree to be discarded easily when it is invalidated.
  *
@@ -94,6 +105,7 @@ typedef struct CachedPlanSource
 {
 	int			magic;			/* should equal CACHEDPLANSOURCE_MAGIC */
 	struct RawStmt *raw_parse_tree; /* output of raw_parser(), or NULL */
+	struct Query *analyzed_parse_tree;	/* analyzed parse tree, or NULL */
 	const char *query_string;	/* source text of query */
 	CommandTag	commandTag;		/* command tag for query */
 	Oid		   *param_types;	/* array of parameter type OIDs, or NULL */
@@ -196,6 +208,9 @@ extern void ReleaseAllPlanCacheRefsInOwner(ResourceOwner owner);
 extern CachedPlanSource *CreateCachedPlan(struct RawStmt *raw_parse_tree,
 										  const char *query_string,
 										  CommandTag commandTag);
+extern CachedPlanSource *CreateCachedPlanForQuery(struct Query *analyzed_parse_tree,
+												  const char *query_string,
+												  CommandTag commandTag);
 extern CachedPlanSource *CreateOneShotCachedPlan(struct RawStmt *raw_parse_tree,
 												 const char *query_string,
 												 CommandTag commandTag);
-- 
2.43.5

