Improve cursor handling in pg_stat_statements

Started by Imseih (AWS), Samiover 3 years ago1 messages
#1Imseih (AWS), Sami
simseih@amazon.com
1 attachment(s)

Hi Hackers,

A recent observation in the way pg_stat_statements
handles CURSORS appears to be undesirable. This
was also recently brought up by Fujii Masao in the thread [1]/messages/by-id/37d32e91-4a08-afaf-a3a8-fd0578e4db50@oss.nttdata.com.

The findings are:
1. DECLARE CURSOR statements are not normalized.
2. The statistics are aggregated on the FETCH
statement.
3. Planning time is not tracked for DECLARE CURSOR
statements.

For #1, the concern is for applications that heavily
use cursors could end up seeing heavy pgss thrashing if
the query parameter change often.

For #2, since the FETCH statement only deals with a
cursor name, similar cursor names with different
underlying cursor statements will be lumped into
the same entry, which is absolutely incorrect from a
statistics perspective. Even if the same cursor name
is always for the same underlying statement, the pgss
user has to do extra parsing work to figure out which
underlying SQL statement is for the cursor.

For #3, planning time for cursors not being considered is
because pgss always sets queryId to 0 for utility statements,
and pgss_planner is taught to ignore queryId's = 0. This should
not be the case if the UTILITY statement has an underlying
optimizable statement.

I have attached v01-improve-cursor-tracking-in-pg_stat_statements.patch
which does the following:

## without the patch

postgres=# begin;
BEGIN
postgres=*# declare c1 cursor for select * from foo where id = 1;
DECLARE CURSOR
postgres=*# fetch c1; close c1;
id
----
1
(1 row)

CLOSE CURSOR
postgres=*# declare c1 cursor for select * from foo where id = 2;
DECLARE CURSOR
postgres=*# fetch c1; close c1;
id
----
2
(1 row)

CLOSE CURSOR
postgres=*# select query, calls from pg_stat_statements where query like '%c1%';
query | calls | plans
----------------------------------------------------------------------+--------+-------
declare c1 cursor for select * from foo where id = 1 | 1 | 0
declare c1 cursor for select * from foo where id = 2 | 1 | 0
close c1 | 2 | 0
fetch c1 | 2 | 0
(4 rows)

### with the patch

postgres=# begin;
BEGIN
postgres=*# declare c1 cursor for select * from foo where id = 1;
DECLARE CURSOR
postgres=*# fetch c1;
id
----
1
(1 row)

postgres=*# close c1;
CLOSE CURSOR
postgres=*# declare c1 cursor for select * from foo where id = 1;
DECLARE CURSOR
postgres=*# fetch c1;
id
----
1
(1 row)

postgres=*# close c1;
CLOSE CURSOR
postgres=*# select query, calls from pg_stat_statements where query like '%c1%';
postgres=*# select query, calls, plans from pg_stat_statements where query like '%c1%';
query | calls | plans
------------------------------------------------------------------------+------+-------
declare c1 cursor for select * from foo where id = $1 | 2 | 2
(1 row)

We can see that:
A. without the patch, planning stats were not considered,
but with the patch they are.
B. With the patch, the queries are normalized,
reducing the # of entries.
C. With the patch, the cursor stats are tracked by the top
level DECLARE CURSOR statement instead of the FETCH. This
means all FETCHes to the same cursor, regardless of the
FETCH options, will be tracked together. This to me is
more reasonable than lumping all FETCH stats for a cursor,
even if the cursors underlying statement is different.
D. The CLOSE <cursor> statement is also not tracked any
longer with the patch.
E. For queryId Jumbling, an underlying statement will not have
the same queryId as a similar statement that is not
run in a cursor. i.e. "select * from tab" will have a differet
queryId from "declare cursor c1 for select * from tab"

Feedback on this patch will be appreciated.

Regards,

[1]: /messages/by-id/37d32e91-4a08-afaf-a3a8-fd0578e4db50@oss.nttdata.com

--
Sami Imseih
Amazon Web Services (AWS)

Attachments:

v01-improve-cursor-tracking-in-pg_stat_statements.patchapplication/octet-stream; name=v01-improve-cursor-tracking-in-pg_stat_statements.patchDownload
From 06400764c63b14bc5d5811c4eda230f0d850bfe6 Mon Sep 17 00:00:00 2001
From: "Imseih (AWS)" <simseih@88665a22795f.ant.amazon.com>
Date: Mon, 12 Sep 2022 12:42:16 -0500
Subject: [PATCH 1/1] improve cursor tracking in pg_stat_statements

---
 .../expected/pg_stat_statements.out           |  9 +--
 .../pg_stat_statements/pg_stat_statements.c   | 76 +++++++++++++++++--
 .../sql/pg_stat_statements.sql                |  2 +-
 src/backend/commands/portalcmds.c             |  8 ++
 src/backend/executor/execMain.c               |  8 ++
 src/backend/tcop/pquery.c                     |  2 +
 src/backend/tcop/utility.c                    | 17 ++++-
 src/backend/utils/misc/queryjumble.c          | 69 +++++++++++------
 src/include/executor/execdesc.h               |  3 +
 9 files changed, 154 insertions(+), 40 deletions(-)

diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out
index ff0166f..7d9dfc1 100644
--- a/contrib/pg_stat_statements/expected/pg_stat_statements.out
+++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out
@@ -570,7 +570,7 @@ COPY pgss_ctas (a, b) FROM STDIN;
 CREATE MATERIALIZED VIEW pgss_matv AS SELECT * FROM pgss_ctas;
 REFRESH MATERIALIZED VIEW pgss_matv;
 BEGIN;
-DECLARE pgss_cursor CURSOR FOR SELECT * FROM pgss_matv;
+DECLARE pgss_cursor CURSOR FOR SELECT * FROM pgss_matv WHERE a BETWEEN 1 AND 13;
 FETCH NEXT pgss_cursor;
  a |  b   
 ---+------
@@ -608,15 +608,12 @@ SELECT query, plans, calls, rows FROM pg_stat_statements ORDER BY query COLLATE
  COPY pgss_ctas (a, b) FROM STDIN                                                    |     0 |     1 |    3
  CREATE MATERIALIZED VIEW pgss_matv AS SELECT * FROM pgss_ctas                       |     0 |     1 |   13
  CREATE TABLE pgss_ctas AS SELECT a, 'ctas' b FROM generate_series(1, 10) a          |     0 |     1 |   10
- DECLARE pgss_cursor CURSOR FOR SELECT * FROM pgss_matv                              |     0 |     1 |    0
- FETCH FORWARD 5 pgss_cursor                                                         |     0 |     1 |    5
- FETCH FORWARD ALL pgss_cursor                                                       |     0 |     1 |    7
- FETCH NEXT pgss_cursor                                                              |     0 |     1 |    1
+ DECLARE pgss_cursor CURSOR FOR SELECT * FROM pgss_matv WHERE a BETWEEN $1 AND $2    |     1 |     3 |   13
  REFRESH MATERIALIZED VIEW pgss_matv                                                 |     0 |     1 |   13
  SELECT generate_series(1, 10) c INTO pgss_select_into                               |     0 |     1 |   10
  SELECT pg_stat_statements_reset()                                                   |     0 |     1 |    1
  SELECT query, plans, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C" |     1 |     0 |    0
-(13 rows)
+(10 rows)
 
 --
 -- Track user activity and reset them
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index ba868f0..cf8ccaf 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -105,7 +105,14 @@ static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
  */
 #define PGSS_HANDLED_UTILITY(n)		(!IsA(n, ExecuteStmt) && \
 									!IsA(n, PrepareStmt) && \
-									!IsA(n, DeallocateStmt))
+									!IsA(n, DeallocateStmt) && \
+									!IsA(n, FetchStmt) && \
+									!IsA(n, ClosePortalStmt))
+
+/*
+ * Check if a statement is a DECLARE CURSOR STATEMENT
+ */
+#define PGSS_IS_CURSOR(n) (IsA(n, DeclareCursorStmt))
 
 /*
  * Extension version number, for supporting older extension versions' objects
@@ -831,8 +838,12 @@ pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate)
 	 * Clear queryId for prepared statements related utility, as those will
 	 * inherit from the underlying statement's one (except DEALLOCATE which is
 	 * entirely untracked).
+	 *
+	 * Note that we do not want to clear the queryId of a DECLARE CURSOR utlity
+	 * statement, since we will rely on this queryId to track the underlying
+	 * cursor statement when a FETCH is called on the cursor.
 	 */
-	if (query->utilityStmt)
+	if (query->utilityStmt && !PGSS_IS_CURSOR(query->utilityStmt))
 	{
 		if (pgss_track_utility && !PGSS_HANDLED_UTILITY(query->utilityStmt))
 			query->queryId = UINT64CONST(0);
@@ -992,23 +1003,59 @@ pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
 }
 
 /*
- * ExecutorRun hook: all we need do is track nesting depth
+ * ExecutorRun hook: For all cases except a cursor statement,
+ * we want to track the nested depth.
+ * For cursor statements, we do not want to track the statement
+ * as a nested query, since we are not tracking the FETCH calls
+ * at all.
  */
 static void
 pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count,
 				 bool execute_once)
 {
-	exec_nested_level++;
+	if (!queryDesc->is_cursor_statement)
+		exec_nested_level++;
+
+	if(queryDesc->plannedstmt->queryId != UINT64CONST(0) &&
+	   !queryDesc->totaltime && queryDesc->is_cursor_statement)
+	{
+		MemoryContext oldcxt;
+
+		oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
+		queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL, false);
+		MemoryContextSwitchTo(oldcxt);
+	}
+
 	PG_TRY();
 	{
 		if (prev_ExecutorRun)
 			prev_ExecutorRun(queryDesc, direction, count, execute_once);
 		else
 			standard_ExecutorRun(queryDesc, direction, count, execute_once);
+
+		if(queryDesc->is_cursor_statement && queryDesc->totaltime)
+		{
+			InstrEndLoop(queryDesc->totaltime);
+
+			pgss_store(queryDesc->sourceText,
+					   queryDesc->plannedstmt->queryId,
+					   queryDesc->plannedstmt->stmt_location,
+					   queryDesc->plannedstmt->stmt_len,
+					   PGSS_EXEC,
+					   queryDesc->totaltime->total * 1000.0,	/* convert to msec */
+					   queryDesc->estate->es_processed,
+					   &queryDesc->totaltime->bufusage,
+					   &queryDesc->totaltime->walusage,
+					   queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL,
+					   NULL);
+
+			queryDesc->totaltime = NULL;
+		}
 	}
 	PG_FINALLY();
 	{
-		exec_nested_level--;
+		if (!queryDesc->is_cursor_statement)
+			exec_nested_level--;
 	}
 	PG_END_TRY();
 }
@@ -1086,7 +1133,7 @@ pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
 	/*
 	 * Force utility statements to get queryId zero.  We do this even in cases
 	 * where the statement contains an optimizable statement for which a
-	 * queryId could be derived (such as EXPLAIN or DECLARE CURSOR).  For such
+	 * queryId could be derived (such as EXPLAIN).  For such
 	 * cases, runtime control will first go through ProcessUtility and then
 	 * the executor, and we don't want the executor hooks to do anything,
 	 * since we are already measuring the statement's costs at the utility
@@ -1097,10 +1144,14 @@ pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
 	 * that user configured another extension to handle utility statements
 	 * only.
 	 */
-	if (pgss_enabled(exec_nested_level) && pgss_track_utility)
+	if (pgss_enabled(exec_nested_level) && pgss_track_utility &&
+		!PGSS_IS_CURSOR(pstmt->utilityStmt))
 		pstmt->queryId = UINT64CONST(0);
 
 	/*
+	 * Notes for PREPARED statements and CURSORS:
+	 *
+	 * PREPARED:
 	 * If it's an EXECUTE statement, we don't track it and don't increment the
 	 * nesting level.  This allows the cycles to be charged to the underlying
 	 * PREPARE instead (by the Executor hooks), which is much more useful.
@@ -1113,9 +1164,18 @@ pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
 	 * cases where planning time is not included at all.
 	 *
 	 * Likewise, we don't track execution of DEALLOCATE.
+	 *
+	 * CURSORS:
+	 * As the case with PREPARE and EXECUTE, we track a cursors underlying
+	 * statement when a FETCH is called or when track_planning is enabled.
+	 * The FETCH and CLOSE of a cursor are not tracked at all.
+	 *
+	 * Note that tracking does not differentiate between different FETCH
+	 * options from the same cursor, they all get lumped together.
 	 */
 	if (pgss_track_utility && pgss_enabled(exec_nested_level) &&
-		PGSS_HANDLED_UTILITY(parsetree))
+		PGSS_HANDLED_UTILITY(parsetree) &&
+		!PGSS_IS_CURSOR(pstmt->utilityStmt))
 	{
 		instr_time	start;
 		instr_time	duration;
diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
index a01f183..7510b39 100644
--- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql
+++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
@@ -267,7 +267,7 @@ COPY pgss_ctas (a, b) FROM STDIN;
 CREATE MATERIALIZED VIEW pgss_matv AS SELECT * FROM pgss_ctas;
 REFRESH MATERIALIZED VIEW pgss_matv;
 BEGIN;
-DECLARE pgss_cursor CURSOR FOR SELECT * FROM pgss_matv;
+DECLARE pgss_cursor CURSOR FOR SELECT * FROM pgss_matv WHERE a BETWEEN 1 AND 13;
 FETCH NEXT pgss_cursor;
 FETCH FORWARD 5 pgss_cursor;
 FETCH FORWARD ALL pgss_cursor;
diff --git a/src/backend/commands/portalcmds.c b/src/backend/commands/portalcmds.c
index 9902c5c..599fbd7 100644
--- a/src/backend/commands/portalcmds.c
+++ b/src/backend/commands/portalcmds.c
@@ -145,6 +145,14 @@ PerformCursorOpen(ParseState *pstate, DeclareCursorStmt *cstmt, ParamListInfo pa
 	 */
 	PortalStart(portal, params, 0, GetActiveSnapshot());
 
+	/*
+	 * Now, set the queryId in the portals queryDesc
+	 */
+	portal->queryDesc->plannedstmt->queryId = query->queryId;
+	portal->queryDesc->plannedstmt->stmt_len = query->stmt_len;
+	portal->queryDesc->plannedstmt->stmt_location = query->stmt_location;
+	portal->queryDesc->is_cursor_statement = true;
+
 	Assert(portal->strategy == PORTAL_ONE_SELECT);
 
 	/*
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index ef2fd46..d58f2a8 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -301,6 +301,14 @@ ExecutorRun(QueryDesc *queryDesc,
 			ScanDirection direction, uint64 count,
 			bool execute_once)
 {
+	/*
+	 * To report the queryId of the query being worked on by a FETCH
+	 * command, this can only be done during ExecuteRun.
+	 * As in the case with ExecuteStart, it's harmless to report the
+	 * query_id multiple times.
+	 */
+	pgstat_report_query_id(queryDesc->plannedstmt->queryId, false);
+
 	if (ExecutorRun_hook)
 		(*ExecutorRun_hook) (queryDesc, direction, count, execute_once);
 	else
diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c
index 5aa5a35..cee5409 100644
--- a/src/backend/tcop/pquery.c
+++ b/src/backend/tcop/pquery.c
@@ -95,6 +95,8 @@ CreateQueryDesc(PlannedStmt *plannedstmt,
 	/* not yet executed */
 	qd->already_executed = false;
 
+	qd->is_cursor_statement = false;
+
 	return qd;
 }
 
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index aa00815..e8dc842 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -693,8 +693,21 @@ standard_ProcessUtility(PlannedStmt *pstmt,
 			 * Portal (cursor) manipulation
 			 */
 		case T_DeclareCursorStmt:
-			PerformCursorOpen(pstate, (DeclareCursorStmt *) parsetree, params,
-							  isTopLevel);
+			{
+				/*
+				 * Set the cursor statement to the queryId
+				 * jumbled for the Declare Cursor Statement.
+				 */
+				DeclareCursorStmt *cstmt = (DeclareCursorStmt *) parsetree;
+				Query *query = (Query *) cstmt->query;
+
+				query->queryId = pstmt->queryId;
+				query->stmt_len = pstmt->stmt_len;
+				query->stmt_location = pstmt->stmt_location;
+
+				PerformCursorOpen(pstate, (DeclareCursorStmt *) parsetree, params,
+								  isTopLevel);
+			}
 			break;
 
 		case T_ClosePortalStmt:
diff --git a/src/backend/utils/misc/queryjumble.c b/src/backend/utils/misc/queryjumble.c
index a67487e..ec4a670 100644
--- a/src/backend/utils/misc/queryjumble.c
+++ b/src/backend/utils/misc/queryjumble.c
@@ -104,7 +104,15 @@ JumbleQuery(Query *query, const char *querytext)
 
 	Assert(IsQueryIdEnabled());
 
-	if (query->utilityStmt)
+	/*
+	 * With the expection of Declare Cursor statements, all utility
+	 * statements are jumbled purely on textual comparison via
+	 * comppute_utility_query_id.
+	 *
+	 * All other statements are Jumbled on selected fields
+	 * from the Query parse tree via JumbleQueryInternal.
+	 */
+	if (query->utilityStmt && !IsA(query->utilityStmt, DeclareCursorStmt))
 	{
 		query->queryId = compute_utility_query_id(querytext,
 												  query->stmt_location,
@@ -240,28 +248,43 @@ static void
 JumbleQueryInternal(JumbleState *jstate, Query *query)
 {
 	Assert(IsA(query, Query));
-	Assert(query->utilityStmt == NULL);
-
-	APP_JUMB(query->commandType);
-	/* resultRelation is usually predictable from commandType */
-	JumbleExpr(jstate, (Node *) query->cteList);
-	JumbleRangeTable(jstate, query->rtable);
-	JumbleExpr(jstate, (Node *) query->jointree);
-	JumbleExpr(jstate, (Node *) query->targetList);
-	JumbleExpr(jstate, (Node *) query->onConflict);
-	JumbleExpr(jstate, (Node *) query->returningList);
-	JumbleExpr(jstate, (Node *) query->groupClause);
-	APP_JUMB(query->groupDistinct);
-	JumbleExpr(jstate, (Node *) query->groupingSets);
-	JumbleExpr(jstate, query->havingQual);
-	JumbleExpr(jstate, (Node *) query->windowClause);
-	JumbleExpr(jstate, (Node *) query->distinctClause);
-	JumbleExpr(jstate, (Node *) query->sortClause);
-	JumbleExpr(jstate, query->limitOffset);
-	JumbleExpr(jstate, query->limitCount);
-	APP_JUMB(query->limitOption);
-	JumbleRowMarks(jstate, query->rowMarks);
-	JumbleExpr(jstate, query->setOperations);
+	Assert(query->utilityStmt == NULL || IsA(query->utilityStmt, DeclareCursorStmt));
+
+	/*
+	 * For a Declare Cursor statement, we only jumble the query portion of the
+	 * statement. We also jumble the type of the node to differentiate between
+	 * similar queries called from a Declare cursor vs standalone.
+	 */
+	if (query->utilityStmt)
+	{
+		DeclareCursorStmt *dcs = (DeclareCursorStmt *) query->utilityStmt;
+		APP_JUMB(dcs->type);
+		JumbleQueryInternal(jstate, (Query *) dcs->query);
+	}
+	else
+	{
+		APP_JUMB(query->commandType);
+		/* resultRelation is usually predictable from commandType */
+		JumbleExpr(jstate, (Node *) query->cteList);
+
+		JumbleRangeTable(jstate, query->rtable);
+		JumbleExpr(jstate, (Node *) query->jointree);
+		JumbleExpr(jstate, (Node *) query->targetList);
+		JumbleExpr(jstate, (Node *) query->onConflict);
+		JumbleExpr(jstate, (Node *) query->returningList);
+		JumbleExpr(jstate, (Node *) query->groupClause);
+		APP_JUMB(query->groupDistinct);
+		JumbleExpr(jstate, (Node *) query->groupingSets);
+		JumbleExpr(jstate, query->havingQual);
+		JumbleExpr(jstate, (Node *) query->windowClause);
+		JumbleExpr(jstate, (Node *) query->distinctClause);
+		JumbleExpr(jstate, (Node *) query->sortClause);
+		JumbleExpr(jstate, query->limitOffset);
+		JumbleExpr(jstate, query->limitCount);
+		APP_JUMB(query->limitOption);
+		JumbleRowMarks(jstate, query->rowMarks);
+		JumbleExpr(jstate, query->setOperations);
+	}
 }
 
 /*
diff --git a/src/include/executor/execdesc.h b/src/include/executor/execdesc.h
index e79e2c0..89fd7ba 100644
--- a/src/include/executor/execdesc.h
+++ b/src/include/executor/execdesc.h
@@ -53,6 +53,9 @@ typedef struct QueryDesc
 
 	/* This is always set NULL by the core system, but plugins can change it */
 	struct Instrumentation *totaltime;	/* total time spent in ExecutorRun */
+
+	/* True is this queryDesc is a Cursors statement */
+	bool   is_cursor_statement;
 } QueryDesc;
 
 /* in pquery.c */
-- 
2.32.1 (Apple Git-133)