From 601b81a4bdce056e7533521072f9a14e7257e541 Mon Sep 17 00:00:00 2001
From: Bruce Momjian <bruce@momjian.us>
Date: Mon, 22 Mar 2021 17:43:23 -0400
Subject: [PATCH] qid-02-display_over_qid-01-jumble squash commit

---
 .../pg_stat_statements/pg_stat_statements.c   | 112 +++++++-----------
 doc/src/sgml/config.sgml                      |  29 +++--
 doc/src/sgml/monitoring.sgml                  |  16 +++
 src/backend/catalog/system_views.sql          |   1 +
 src/backend/executor/execMain.c               |   9 ++
 src/backend/executor/execParallel.c           |  14 ++-
 src/backend/executor/nodeGather.c             |   3 +-
 src/backend/executor/nodeGatherMerge.c        |   4 +-
 src/backend/parser/analyze.c                  |   5 +
 src/backend/postmaster/pgstat.c               |  65 ++++++++++
 src/backend/tcop/postgres.c                   |   5 +
 src/backend/utils/adt/pgstatfuncs.c           |   7 +-
 src/backend/utils/error/elog.c                |   9 +-
 src/backend/utils/misc/postgresql.conf.sample |   1 +
 src/backend/utils/misc/queryjumble.c          |  29 +++--
 src/include/catalog/pg_proc.dat               |   6 +-
 src/include/executor/execParallel.h           |   3 +-
 src/include/pgstat.h                          |   5 +
 src/include/utils/queryjumble.h               |   2 +-
 src/test/regress/expected/rules.out           |   9 +-
 20 files changed, 224 insertions(+), 110 deletions(-)

diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index bd8c96728c..f62b9a2bfd 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -65,6 +65,7 @@
 #include "tcop/utility.h"
 #include "utils/acl.h"
 #include "utils/builtins.h"
+#include "utils/queryjumble.h"
 #include "utils/memutils.h"
 #include "utils/timestamp.h"
 
@@ -99,6 +100,14 @@ static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
 #define USAGE_DEALLOC_PERCENT	5	/* free this % of entries at once */
 #define IS_STICKY(c)	((c.calls[PGSS_PLAN] + c.calls[PGSS_EXEC]) == 0)
 
+/*
+ * Utility statements that pgss_ProcessUtility and pgss_post_parse_analyze
+ * ignores.
+ */
+#define PGSS_HANDLED_UTILITY(n)		(!IsA(n, ExecuteStmt) && \
+									!IsA(n, PrepareStmt) && \
+									!IsA(n, DeallocateStmt))
+
 /*
  * Extension version number, for supporting older extension versions' objects
  */
@@ -307,7 +316,6 @@ static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
 								ProcessUtilityContext context, ParamListInfo params,
 								QueryEnvironment *queryEnv,
 								DestReceiver *dest, QueryCompletion *qc);
-static uint64 pgss_hash_string(const char *str, int len);
 static void pgss_store(const char *query, uint64 queryId,
 					   int query_location, int query_len,
 					   pgssStoreKind kind,
@@ -804,16 +812,14 @@ pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate)
 		return;
 
 	/*
-	 * Utility statements get queryId zero.  We do this even in cases where
-	 * the statement contains an optimizable statement for which a queryId
-	 * could be derived (such as EXPLAIN or DECLARE CURSOR).  For such cases,
-	 * runtime control will first go through ProcessUtility and then the
-	 * executor, and we don't want the executor hooks to do anything, since we
-	 * are already measuring the statement's costs at the utility level.
+	 * Clear queryId for prepared statements related utility, as those will
+	 * inherit from the underlying statement's one (except DEALLOCATE which is
+	 * entirely untracked).
 	 */
 	if (query->utilityStmt)
 	{
-		query->queryId = UINT64CONST(0);
+		if (pgss_track_utility && !PGSS_HANDLED_UTILITY(query->utilityStmt))
+			query->queryId = UINT64CONST(0);
 		return;
 	}
 
@@ -1055,6 +1061,23 @@ pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
 					DestReceiver *dest, QueryCompletion *qc)
 {
 	Node	   *parsetree = pstmt->utilityStmt;
+	uint64		saved_queryId = pstmt->queryId;
+
+	/*
+	 * Force utility statements to get queryId zero.  We do this even in cases
+	 * where the statement contains an optimizable statement for which a
+	 * queryId could be derived (such as EXPLAIN or DECLARE CURSOR).  For such
+	 * cases, runtime control will first go through ProcessUtility and then the
+	 * executor, and we don't want the executor hooks to do anything, since we
+	 * are already measuring the statement's costs at the utility level.
+	 *
+	 * Note that this is only done if pg_stat_statements is enabled and
+	 * configured to track utility statements, in the unlikely possibility
+	 * that user configured another extension to handle utility statements
+	 * only.
+	 */
+	if (pgss_enabled(exec_nested_level) && pgss_track_utility)
+		pstmt->queryId = UINT64CONST(0);
 
 	/*
 	 * If it's an EXECUTE statement, we don't track it and don't increment the
@@ -1071,9 +1094,7 @@ pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
 	 * Likewise, we don't track execution of DEALLOCATE.
 	 */
 	if (pgss_track_utility && pgss_enabled(exec_nested_level) &&
-		!IsA(parsetree, ExecuteStmt) &&
-		!IsA(parsetree, PrepareStmt) &&
-		!IsA(parsetree, DeallocateStmt))
+		PGSS_HANDLED_UTILITY(parsetree))
 	{
 		instr_time	start;
 		instr_time	duration;
@@ -1128,7 +1149,7 @@ pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
 		WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
 
 		pgss_store(queryString,
-				   0,			/* signal that it's a utility stmt */
+				   saved_queryId,
 				   pstmt->stmt_location,
 				   pstmt->stmt_len,
 				   PGSS_EXEC,
@@ -1151,23 +1172,12 @@ pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
 	}
 }
 
-/*
- * Given an arbitrarily long query string, produce a hash for the purposes of
- * identifying the query, without normalizing constants.  Used when hashing
- * utility statements.
- */
-static uint64
-pgss_hash_string(const char *str, int len)
-{
-	return DatumGetUInt64(hash_any_extended((const unsigned char *) str,
-											len, 0));
-}
-
 /*
  * Store some statistics for a statement.
  *
- * If queryId is 0 then this is a utility statement and we should compute
- * a suitable queryId internally.
+ * If queryId is 0 then this is a utility statement for which we couldn't
+ * compute a queryId during parse analysis, and we should compute a suitable
+ * queryId internally.
  *
  * If jstate is not NULL then we're trying to create an entry for which
  * we have no statistics as yet; we just want to record the normalized
@@ -1198,52 +1208,18 @@ pgss_store(const char *query, uint64 queryId,
 		return;
 
 	/*
-	 * Confine our attention to the relevant part of the string, if the query
-	 * is a portion of a multi-statement source string.
-	 *
-	 * First apply starting offset, unless it's -1 (unknown).
-	 */
-	if (query_location >= 0)
-	{
-		Assert(query_location <= strlen(query));
-		query += query_location;
-		/* Length of 0 (or -1) means "rest of string" */
-		if (query_len <= 0)
-			query_len = strlen(query);
-		else
-			Assert(query_len <= strlen(query));
-	}
-	else
-	{
-		/* If query location is unknown, distrust query_len as well */
-		query_location = 0;
-		query_len = strlen(query);
-	}
-
-	/*
-	 * Discard leading and trailing whitespace, too.  Use scanner_isspace()
-	 * not libc's isspace(), because we want to match the lexer's behavior.
+	 * Nothing to do if compute_query_id isn't enabled and no other module
+	 * computed a query identifier.
 	 */
-	while (query_len > 0 && scanner_isspace(query[0]))
-		query++, query_location++, query_len--;
-	while (query_len > 0 && scanner_isspace(query[query_len - 1]))
-		query_len--;
+	if (queryId == UINT64CONST(0))
+		return;
 
 	/*
-	 * For utility statements, we just hash the query string to get an ID.
+	 * Confine our attention to the relevant part of the string, if the query
+	 * is a portion of a multi-statement source string, and update query
+	 * location and length if needed.
 	 */
-	if (queryId == UINT64CONST(0))
-	{
-		queryId = pgss_hash_string(query, query_len);
-
-		/*
-		 * If we are unlucky enough to get a hash of zero(invalid), use
-		 * queryID as 2 instead, queryID 1 is already in use for normal
-		 * statements.
-		 */
-		if (queryId == UINT64CONST(0))
-			queryId = UINT64CONST(2);
-	}
+	query = CleanQuerytext(query, &query_location, &query_len);
 
 	/* Set up key for hashtable search */
 	key.userid = GetUserId();
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 89f7daf11f..a3034beddc 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -6943,6 +6943,15 @@ local0.*    /var/log/postgresql
              session processes</entry>
              <entry>no</entry>
             </row>
+            <row>
+             <entry><literal>%Q</literal></entry>
+             <entry>query identifier of the current query.  Query
+             identifiers are not computed by default, so this field
+             will be zero unless <xref linkend="guc-compute-query-id"/>
+             parameter is enabled or a third-party module that computes
+             query identifiers is configured.</entry>
+             <entry>yes</entry>
+            </row>
             <row>
              <entry><literal>%%</literal></entry>
              <entry>Literal <literal>%</literal></entry>
@@ -7419,8 +7428,8 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
       <listitem>
        <para>
         Enables the collection of information on the currently
-        executing command of each session, along with the time when
-        that command began execution. This parameter is on by
+        executing command of each session, along with its identifier and the
+        time when that command began execution. This parameter is on by
         default. Note that even when enabled, this information is not
         visible to all users, only to superusers and the user owning
         the session being reported on, so it should not represent a
@@ -7569,12 +7578,16 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
       </term>
       <listitem>
        <para>
-        Enables in-core computation of a query identifier.  The <xref
-        linkend="pgstatstatements"/> extension requires a query identifier
-        to be computed.  Note that an external module can alternatively
-        be used if the in-core query identifier computation method
-        isn't acceptable.  In this case, in-core computation should
-        remain disabled.  The default is <literal>off</literal>.
+        Enables in-core computation of a query identifier.
+        Query identifiers can be displayed in the <link
+        linkend="monitoring-pg-stat-activity-view"><structname>pg_stat_activity</structname></link>
+        view, or emitted in the log if configured via the <xref
+        linkend="guc-log-line-prefix"/> parameter.  The <xref
+        linkend="pgstatstatements"/> extension also requires a query
+        identifier to be computed.  Note that an external module can
+        alternatively be used if the in-core query identifier computation
+        specification isn't acceptable.  In this case, in-core computation
+        must be disabled.  The default is <literal>off</literal>.
        </para>
        <note>
         <para>
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index db4b4e460c..c2ef473cc5 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -910,6 +910,22 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
       </para></entry>
      </row>
 
+    <row>
+     <entry role="catalog_table_entry"><para role="column_definition">
+      <structfield>queryid</structfield> <type>bigint</type>
+     </para>
+     <para>
+      Identifier of this backend's most recent query. If
+      <structfield>state</structfield> is <literal>active</literal> this
+      field shows the identifier of the currently executing query. In
+      all other states, it shows the identifier of last query that was
+      executed.  Query identifiers are not computed by default so this
+      field will be null unless <xref linkend="guc-compute-query-id"/>
+      parameter is enabled or a third-party module that computes query
+      identifiers is configured.
+     </para></entry>
+    </row>
+
      <row>
       <entry role="catalog_table_entry"><para role="column_definition">
        <structfield>query</structfield> <type>text</type>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 0dca65dc7b..012d86217f 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -764,6 +764,7 @@ CREATE VIEW pg_stat_activity AS
             S.state,
             S.backend_xid,
             s.backend_xmin,
+            S.queryid,
             S.query,
             S.backend_type
     FROM pg_stat_get_activity(NULL) AS S
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 0648dd82ba..2d1c7690cb 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -54,6 +54,7 @@
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
 #include "parser/parsetree.h"
+#include "pgstat.h"
 #include "storage/bufmgr.h"
 #include "storage/lmgr.h"
 #include "tcop/utility.h"
@@ -128,6 +129,14 @@ static void EvalPlanQualStart(EPQState *epqstate, Plan *planTree);
 void
 ExecutorStart(QueryDesc *queryDesc, int eflags)
 {
+	/*
+	 * In some cases (e.g. an EXECUTE statement) a query execution will skip
+	 * parse analysis, which means that the queryid won't be reported.  Note
+	 * that it's harmless to report the queryid multiple time, as the call will
+	 * be ignored if the top level queryid has already been reported.
+	 */
+	pgstat_report_queryid(queryDesc->plannedstmt->queryId, false);
+
 	if (ExecutorStart_hook)
 		(*ExecutorStart_hook) (queryDesc, eflags);
 	else
diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c
index c95d5170e4..26f1994a31 100644
--- a/src/backend/executor/execParallel.c
+++ b/src/backend/executor/execParallel.c
@@ -124,7 +124,7 @@ typedef struct ExecParallelInitializeDSMContext
 } ExecParallelInitializeDSMContext;
 
 /* Helper functions that run in the parallel leader. */
-static char *ExecSerializePlan(Plan *plan, EState *estate);
+static char *ExecSerializePlan(Plan *plan, EState *estate, uint64 queryId);
 static bool ExecParallelEstimate(PlanState *node,
 								 ExecParallelEstimateContext *e);
 static bool ExecParallelInitializeDSM(PlanState *node,
@@ -143,7 +143,7 @@ static DestReceiver *ExecParallelGetReceiver(dsm_segment *seg, shm_toc *toc);
  * Create a serialized representation of the plan to be sent to each worker.
  */
 static char *
-ExecSerializePlan(Plan *plan, EState *estate)
+ExecSerializePlan(Plan *plan, EState *estate, uint64 queryId)
 {
 	PlannedStmt *pstmt;
 	ListCell   *lc;
@@ -174,7 +174,7 @@ ExecSerializePlan(Plan *plan, EState *estate)
 	 */
 	pstmt = makeNode(PlannedStmt);
 	pstmt->commandType = CMD_SELECT;
-	pstmt->queryId = UINT64CONST(0);
+	pstmt->queryId = queryId;
 	pstmt->hasReturning = false;
 	pstmt->hasModifyingCTE = false;
 	pstmt->canSetTag = true;
@@ -578,7 +578,8 @@ ExecParallelSetupTupleQueues(ParallelContext *pcxt, bool reinitialize)
 ParallelExecutorInfo *
 ExecInitParallelPlan(PlanState *planstate, EState *estate,
 					 Bitmapset *sendParams, int nworkers,
-					 int64 tuples_needed)
+					 int64 tuples_needed,
+					 uint64 queryId)
 {
 	ParallelExecutorInfo *pei;
 	ParallelContext *pcxt;
@@ -620,7 +621,7 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate,
 	pei->planstate = planstate;
 
 	/* Fix up and serialize plan to be sent to workers. */
-	pstmt_data = ExecSerializePlan(planstate->plan, estate);
+	pstmt_data = ExecSerializePlan(planstate->plan, estate, queryId);
 
 	/* Create a parallel context. */
 	pcxt = CreateParallelContext("postgres", "ParallelQueryMain", nworkers);
@@ -1403,8 +1404,9 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc)
 	/* Setting debug_query_string for individual workers */
 	debug_query_string = queryDesc->sourceText;
 
-	/* Report workers' query for monitoring purposes */
+	/* Report workers' query and queryId for monitoring purposes */
 	pgstat_report_activity(STATE_RUNNING, debug_query_string);
+	pgstat_report_queryid(queryDesc->plannedstmt->queryId, false);
 
 	/* Attach to the dynamic shared memory area. */
 	area_space = shm_toc_lookup(toc, PARALLEL_KEY_DSA, false);
diff --git a/src/backend/executor/nodeGather.c b/src/backend/executor/nodeGather.c
index 9e1dc464cb..04c860f678 100644
--- a/src/backend/executor/nodeGather.c
+++ b/src/backend/executor/nodeGather.c
@@ -172,7 +172,8 @@ ExecGather(PlanState *pstate)
 												 estate,
 												 gather->initParam,
 												 gather->num_workers,
-												 node->tuples_needed);
+												 node->tuples_needed,
+												 pgstat_get_my_queryid());
 			else
 				ExecParallelReinitialize(node->ps.lefttree,
 										 node->pei,
diff --git a/src/backend/executor/nodeGatherMerge.c b/src/backend/executor/nodeGatherMerge.c
index aa5743cebf..32f74e8c23 100644
--- a/src/backend/executor/nodeGatherMerge.c
+++ b/src/backend/executor/nodeGatherMerge.c
@@ -24,6 +24,7 @@
 #include "lib/binaryheap.h"
 #include "miscadmin.h"
 #include "optimizer/optimizer.h"
+#include "pgstat.h"
 #include "utils/memutils.h"
 #include "utils/rel.h"
 
@@ -216,7 +217,8 @@ ExecGatherMerge(PlanState *pstate)
 												 estate,
 												 gm->initParam,
 												 gm->num_workers,
-												 node->tuples_needed);
+												 node->tuples_needed,
+												 pgstat_get_my_queryid());
 			else
 				ExecParallelReinitialize(node->ps.lefttree,
 										 node->pei,
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index c565c80365..d125ef7f98 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -44,6 +44,7 @@
 #include "parser/parse_target.h"
 #include "parser/parse_type.h"
 #include "parser/parsetree.h"
+#include "pgstat.h"
 #include "rewrite/rewriteManip.h"
 #include "utils/builtins.h"
 #include "utils/guc.h"
@@ -130,6 +131,8 @@ parse_analyze(RawStmt *parseTree, const char *sourceText,
 
 	free_parsestate(pstate);
 
+	pgstat_report_queryid(query->queryId, false);
+
 	return query;
 }
 
@@ -167,6 +170,8 @@ parse_analyze_varparams(RawStmt *parseTree, const char *sourceText,
 
 	free_parsestate(pstate);
 
+	pgstat_report_queryid(query->queryId, false);
+
 	return query;
 }
 
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 208a33692f..2419a2b003 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -3381,6 +3381,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
 			beentry->st_activity_start_timestamp = 0;
 			/* st_xact_start_timestamp and wait_event_info are also disabled */
 			beentry->st_xact_start_timestamp = 0;
+			beentry->st_queryid = 0;
 			proc->wait_event_info = 0;
 			PGSTAT_END_WRITE_ACTIVITY(beentry);
 		}
@@ -3435,6 +3436,14 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
 	beentry->st_state = state;
 	beentry->st_state_start_timestamp = current_timestamp;
 
+	/*
+	 * If a new query is started, we reset the query identifier as it'll only
+	 * be known after parse analysis, to avoid reporting last query's
+	 * identifier.
+	 */
+	if (state == STATE_RUNNING)
+		beentry->st_queryid = 0;
+
 	if (cmd_str != NULL)
 	{
 		memcpy((char *) beentry->st_activity_raw, cmd_str, len);
@@ -3445,6 +3454,48 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
 	PGSTAT_END_WRITE_ACTIVITY(beentry);
 }
 
+/* --------
+ * pgstat_report_queryid() -
+ *
+ *	Called to update top-level query identifier.
+ * --------
+ */
+void
+pgstat_report_queryid(uint64 queryId, bool force)
+{
+	volatile PgBackendStatus *beentry = MyBEEntry;
+
+	if (!beentry)
+		return;
+
+	/*
+	 * if track_activities is disabled, st_queryid should already have been
+	 * reset
+	 */
+	if (!pgstat_track_activities)
+		return;
+
+	/*
+	 * We only report the top-level query identifiers.  The stored queryid is
+	 * reset when a backend calls pgstat_report_activity(STATE_RUNNING), or
+	 * with an explicit call to this function using the force flag.  If the
+	 * saved query identifier is not zero it means that it's not a top-level
+	 * command, so ignore the one provided unless it's an explicit call to
+	 * reset the identifier.
+	 */
+	if (beentry->st_queryid != 0 && !force)
+		return;
+
+	/*
+	 * Update my status entry, following the protocol of bumping
+	 * st_changecount before and after.  We use a volatile pointer here to
+	 * ensure the compiler doesn't try to get cute.
+	 */
+	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+	beentry->st_queryid = queryId;
+	PGSTAT_END_WRITE_ACTIVITY(beentry);
+}
+
 /*-----------
  * pgstat_progress_start_command() -
  *
@@ -5178,6 +5229,20 @@ pgstat_get_db_entry(Oid databaseid, bool create)
 	return result;
 }
 
+/* ----------
+ * pgstat_get_my_queryid() -
+ *
+ *	Return current backend's query identifier.
+ */
+uint64
+pgstat_get_my_queryid(void)
+{
+	if (!MyBEEntry)
+		return 0;
+
+	return MyBEEntry->st_queryid;
+}
+
 
 /*
  * Lookup the hash table entry for the specified table. If no hash
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 7e034b72b1..d66cee79f0 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -692,6 +692,8 @@ pg_analyze_and_rewrite_params(RawStmt *parsetree,
 
 	free_parsestate(pstate);
 
+	pgstat_report_queryid(query->queryId, false);
+
 	if (log_parser_stats)
 		ShowUsage("PARSE ANALYSIS STATISTICS");
 
@@ -910,6 +912,7 @@ pg_plan_queries(List *querytrees, const char *query_string, int cursorOptions,
 			stmt->utilityStmt = query->utilityStmt;
 			stmt->stmt_location = query->stmt_location;
 			stmt->stmt_len = query->stmt_len;
+			stmt->queryId = query->queryId;
 		}
 		else
 		{
@@ -1026,6 +1029,8 @@ exec_simple_query(const char *query_string)
 		DestReceiver *receiver;
 		int16		format;
 
+		pgstat_report_queryid(0, true);
+
 		/*
 		 * Get the command name for use in status display (it also becomes the
 		 * default completion tag, down inside PortalRun).  Set ps_status and
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 5102227a60..8e81eef8cb 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -569,7 +569,7 @@ pg_stat_get_progress_info(PG_FUNCTION_ARGS)
 Datum
 pg_stat_get_activity(PG_FUNCTION_ARGS)
 {
-#define PG_STAT_GET_ACTIVITY_COLS	29
+#define PG_STAT_GET_ACTIVITY_COLS	30
 	int			num_backends = pgstat_fetch_stat_numbackends();
 	int			curr_backend;
 	int			pid = PG_ARGISNULL(0) ? -1 : PG_GETARG_INT32(0);
@@ -914,6 +914,10 @@ pg_stat_get_activity(PG_FUNCTION_ARGS)
 				values[27] = BoolGetDatum(false);	/* GSS Encryption not in
 													 * use */
 			}
+			if (beentry->st_queryid == 0)
+				nulls[29] = true;
+			else
+				values[29] = DatumGetUInt64(beentry->st_queryid);
 		}
 		else
 		{
@@ -941,6 +945,7 @@ pg_stat_get_activity(PG_FUNCTION_ARGS)
 			nulls[26] = true;
 			nulls[27] = true;
 			nulls[28] = true;
+			nulls[29] = true;
 		}
 
 		tuplestore_putvalues(tupstore, tupdesc, values, nulls);
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index e729ebece7..7aa484c5ed 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -77,7 +77,6 @@
 #include "postmaster/postmaster.h"
 #include "postmaster/syslogger.h"
 #include "storage/ipc.h"
-#include "storage/proc.h"
 #include "tcop/tcopprot.h"
 #include "utils/guc.h"
 #include "utils/memutils.h"
@@ -2685,6 +2684,14 @@ log_line_prefix(StringInfo buf, ErrorData *edata)
 				else
 					appendStringInfoString(buf, unpack_sql_state(edata->sqlerrcode));
 				break;
+			case 'Q':
+				if (padding != 0)
+					appendStringInfo(buf, "%*ld", padding,
+							pgstat_get_my_queryid());
+				else
+					appendStringInfo(buf, "%ld",
+							pgstat_get_my_queryid());
+				break;
 			default:
 				/* format error - ignore it */
 				break;
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 01493ed3d4..47d6e2019b 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -542,6 +542,7 @@
 					#   %t = timestamp without milliseconds
 					#   %m = timestamp with milliseconds
 					#   %n = timestamp with milliseconds (as a Unix epoch)
+					#   %Q = query ID (0 if none or not computed)
 					#   %i = command tag
 					#   %e = SQL state
 					#   %c = session ID
diff --git a/src/backend/utils/misc/queryjumble.c b/src/backend/utils/misc/queryjumble.c
index ae84fcac6e..b0a5731ef7 100644
--- a/src/backend/utils/misc/queryjumble.c
+++ b/src/backend/utils/misc/queryjumble.c
@@ -39,7 +39,7 @@
 
 #define JUMBLE_SIZE				1024	/* query serialization buffer size */
 
-static uint64 compute_utility_queryid(const char *str, int query_len);
+static uint64 compute_utility_queryid(const char *str, int query_location, int query_len);
 static void AppendJumble(JumbleState *jstate,
 						 const unsigned char *item, Size size);
 static void JumbleQueryInternal(JumbleState *jstate, Query *query);
@@ -53,7 +53,7 @@ static void RecordConstLocation(JumbleState *jstate, int location);
  * relevant part of the string.
  */
 const char *
-clean_querytext(const char *query, int *location, int *len)
+CleanQuerytext(const char *query, int *location, int *len)
 {
 	int query_location = *location;
 	int query_len = *len;
@@ -97,17 +97,9 @@ JumbleQuery(Query *query, const char *querytext)
 	JumbleState *jstate = NULL;
 	if (query->utilityStmt)
 	{
-		const char *sql;
-		int query_location = query->stmt_location;
-		int query_len = query->stmt_len;
-
-		/*
-		 * Confine our attention to the relevant part of the string, if the
-		 * query is a portion of a multi-statement source string.
-		 */
-		sql = clean_querytext(querytext, &query_location, &query_len);
-
-		query->queryId = compute_utility_queryid(sql, query_len);
+		query->queryId = compute_utility_queryid(querytext,
+												 query->stmt_location,
+												 query->stmt_len);
 	}
 	else
 	{
@@ -143,11 +135,18 @@ JumbleQuery(Query *query, const char *querytext)
  * Compute a query identifier for the given utility query string.
  */
 static uint64
-compute_utility_queryid(const char *str, int query_len)
+compute_utility_queryid(const char *query_text, int query_location, int query_len)
 {
 	uint64 queryId;
+	const char *sql;
+
+	/*
+	 * Confine our attention to the relevant part of the string, if the
+	 * query is a portion of a multi-statement source string.
+	 */
+	sql = CleanQuerytext(query_text, &query_location, &query_len);
 
-	queryId = DatumGetUInt64(hash_any_extended((const unsigned char *) str,
+	queryId = DatumGetUInt64(hash_any_extended((const unsigned char *) sql,
 											   query_len, 0));
 
 	/*
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index e259531f60..9550de0798 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5249,9 +5249,9 @@
   proname => 'pg_stat_get_activity', prorows => '100', proisstrict => 'f',
   proretset => 't', provolatile => 's', proparallel => 'r',
   prorettype => 'record', proargtypes => 'int4',
-  proallargtypes => '{int4,oid,int4,oid,text,text,text,text,text,timestamptz,timestamptz,timestamptz,timestamptz,inet,text,int4,xid,xid,text,bool,text,text,int4,text,numeric,text,bool,text,bool,int4}',
-  proargmodes => '{i,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}',
-  proargnames => '{pid,datid,pid,usesysid,application_name,state,query,wait_event_type,wait_event,xact_start,query_start,backend_start,state_change,client_addr,client_hostname,client_port,backend_xid,backend_xmin,backend_type,ssl,sslversion,sslcipher,sslbits,ssl_client_dn,ssl_client_serial,ssl_issuer_dn,gss_auth,gss_princ,gss_enc,leader_pid}',
+  proallargtypes => '{int4,oid,int4,oid,text,text,text,text,text,timestamptz,timestamptz,timestamptz,timestamptz,inet,text,int4,xid,xid,text,bool,text,text,int4,text,numeric,text,bool,text,bool,int4,int8}',
+  proargmodes => '{i,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}',
+  proargnames => '{pid,datid,pid,usesysid,application_name,state,query,wait_event_type,wait_event,xact_start,query_start,backend_start,state_change,client_addr,client_hostname,client_port,backend_xid,backend_xmin,backend_type,ssl,sslversion,sslcipher,sslbits,ssl_client_dn,ssl_client_serial,ssl_issuer_dn,gss_auth,gss_princ,gss_enc,leader_pid,queryid}',
   prosrc => 'pg_stat_get_activity' },
 { oid => '3318',
   descr => 'statistics: information about progress of backends running maintenance command',
diff --git a/src/include/executor/execParallel.h b/src/include/executor/execParallel.h
index 3888175a2f..e0e08e0b27 100644
--- a/src/include/executor/execParallel.h
+++ b/src/include/executor/execParallel.h
@@ -39,7 +39,8 @@ typedef struct ParallelExecutorInfo
 
 extern ParallelExecutorInfo *ExecInitParallelPlan(PlanState *planstate,
 												  EState *estate, Bitmapset *sendParam, int nworkers,
-												  int64 tuples_needed);
+												  int64 tuples_needed,
+												  uint64 queryId);
 extern void ExecParallelCreateReaders(ParallelExecutorInfo *pei);
 extern void ExecParallelFinish(ParallelExecutorInfo *pei);
 extern void ExecParallelCleanup(ParallelExecutorInfo *pei);
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index be43c04802..09d36a1e23 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -1263,6 +1263,9 @@ typedef struct PgBackendStatus
 	ProgressCommandType st_progress_command;
 	Oid			st_progress_command_target;
 	int64		st_progress_param[PGSTAT_NUM_PROGRESS_PARAM];
+
+	/* query identifier, optionally computed using post_parse_analyze_hook */
+	uint64		st_queryid;
 } PgBackendStatus;
 
 /*
@@ -1457,6 +1460,7 @@ extern void pgstat_initialize(void);
 extern void pgstat_bestart(void);
 
 extern void pgstat_report_activity(BackendState state, const char *cmd_str);
+extern void pgstat_report_queryid(uint64 queryId, bool force);
 extern void pgstat_report_tempfile(size_t filesize);
 extern void pgstat_report_appname(const char *appname);
 extern void pgstat_report_xact_timestamp(TimestampTz tstamp);
@@ -1465,6 +1469,7 @@ extern const char *pgstat_get_wait_event_type(uint32 wait_event_info);
 extern const char *pgstat_get_backend_current_activity(int pid, bool checkUser);
 extern const char *pgstat_get_crashed_backend_activity(int pid, char *buffer,
 													   int buflen);
+extern uint64 pgstat_get_my_queryid(void);
 
 extern void pgstat_progress_start_command(ProgressCommandType cmdtype,
 										  Oid relid);
diff --git a/src/include/utils/queryjumble.h b/src/include/utils/queryjumble.h
index 14087eea43..520cd4f43e 100644
--- a/src/include/utils/queryjumble.h
+++ b/src/include/utils/queryjumble.h
@@ -52,7 +52,7 @@ typedef struct JumbleState
 	int			highest_extern_param_id;
 } JumbleState;
 
-const char *clean_querytext(const char *query, int *location, int *len);
+const char *CleanQuerytext(const char *query, int *location, int *len);
 JumbleState *JumbleQuery(Query *query, const char *querytext);
 
 #endif							/* QUERYJUMBLE_H */
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 9b12cc122a..ff3506d5d7 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1762,9 +1762,10 @@ pg_stat_activity| SELECT s.datid,
     s.state,
     s.backend_xid,
     s.backend_xmin,
+    s.queryid,
     s.query,
     s.backend_type
-   FROM ((pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, wait_event_type, wait_event, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port, backend_xid, backend_xmin, backend_type, ssl, sslversion, sslcipher, sslbits, ssl_client_dn, ssl_client_serial, ssl_issuer_dn, gss_auth, gss_princ, gss_enc, leader_pid)
+   FROM ((pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, wait_event_type, wait_event, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port, backend_xid, backend_xmin, backend_type, ssl, sslversion, sslcipher, sslbits, ssl_client_dn, ssl_client_serial, ssl_issuer_dn, gss_auth, gss_princ, gss_enc, leader_pid, queryid)
      LEFT JOIN pg_database d ON ((s.datid = d.oid)))
      LEFT JOIN pg_authid u ON ((s.usesysid = u.oid)));
 pg_stat_all_indexes| SELECT c.oid AS relid,
@@ -1876,7 +1877,7 @@ pg_stat_gssapi| SELECT s.pid,
     s.gss_auth AS gss_authenticated,
     s.gss_princ AS principal,
     s.gss_enc AS encrypted
-   FROM pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, wait_event_type, wait_event, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port, backend_xid, backend_xmin, backend_type, ssl, sslversion, sslcipher, sslbits, ssl_client_dn, ssl_client_serial, ssl_issuer_dn, gss_auth, gss_princ, gss_enc, leader_pid)
+   FROM pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, wait_event_type, wait_event, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port, backend_xid, backend_xmin, backend_type, ssl, sslversion, sslcipher, sslbits, ssl_client_dn, ssl_client_serial, ssl_issuer_dn, gss_auth, gss_princ, gss_enc, leader_pid, queryid)
   WHERE (s.client_port IS NOT NULL);
 pg_stat_progress_analyze| SELECT s.pid,
     s.datid,
@@ -2046,7 +2047,7 @@ pg_stat_replication| SELECT s.pid,
     w.sync_priority,
     w.sync_state,
     w.reply_time
-   FROM ((pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, wait_event_type, wait_event, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port, backend_xid, backend_xmin, backend_type, ssl, sslversion, sslcipher, sslbits, ssl_client_dn, ssl_client_serial, ssl_issuer_dn, gss_auth, gss_princ, gss_enc, leader_pid)
+   FROM ((pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, wait_event_type, wait_event, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port, backend_xid, backend_xmin, backend_type, ssl, sslversion, sslcipher, sslbits, ssl_client_dn, ssl_client_serial, ssl_issuer_dn, gss_auth, gss_princ, gss_enc, leader_pid, queryid)
      JOIN pg_stat_get_wal_senders() w(pid, state, sent_lsn, write_lsn, flush_lsn, replay_lsn, write_lag, flush_lag, replay_lag, sync_priority, sync_state, reply_time) ON ((s.pid = w.pid)))
      LEFT JOIN pg_authid u ON ((s.usesysid = u.oid)));
 pg_stat_replication_slots| SELECT s.slot_name,
@@ -2076,7 +2077,7 @@ pg_stat_ssl| SELECT s.pid,
     s.ssl_client_dn AS client_dn,
     s.ssl_client_serial AS client_serial,
     s.ssl_issuer_dn AS issuer_dn
-   FROM pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, wait_event_type, wait_event, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port, backend_xid, backend_xmin, backend_type, ssl, sslversion, sslcipher, sslbits, ssl_client_dn, ssl_client_serial, ssl_issuer_dn, gss_auth, gss_princ, gss_enc, leader_pid)
+   FROM pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, wait_event_type, wait_event, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port, backend_xid, backend_xmin, backend_type, ssl, sslversion, sslcipher, sslbits, ssl_client_dn, ssl_client_serial, ssl_issuer_dn, gss_auth, gss_princ, gss_enc, leader_pid, queryid)
   WHERE (s.client_port IS NOT NULL);
 pg_stat_subscription| SELECT su.oid AS subid,
     su.subname,
-- 
2.20.1

