From fac0cc4973ff5a2350b7e26c53291b11a7fbb1be Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Tue, 11 Mar 2025 15:47:52 +0900
Subject: [PATCH] Add node offsets in query jumbling computations

---
 src/backend/nodes/gen_node_support.pl         |  6 +-
 src/backend/nodes/queryjumblefuncs.c          | 73 ++++++++++------
 .../pg_stat_statements/expected/select.out    | 87 ++++++++++++++++++-
 contrib/pg_stat_statements/sql/select.sql     | 20 +++++
 4 files changed, 157 insertions(+), 29 deletions(-)

diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 1a657f7e0aea..bb4168aeac55 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -1301,7 +1301,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
 		if (($t =~ /^(\w+)\*$/ or $t =~ /^struct\s+(\w+)\*$/)
 			and elem $1, @node_types)
 		{
-			print $jff "\tJUMBLE_NODE($f);\n"
+			print $jff "\tJUMBLE_NODE($f, offsetof($n, $f));\n"
 			  unless $query_jumble_ignore;
 		}
 		elsif ($t eq 'ParseLoc')
@@ -1315,12 +1315,12 @@ _jumble${n}(JumbleState *jstate, Node *node)
 		}
 		elsif ($t eq 'char*')
 		{
-			print $jff "\tJUMBLE_STRING($f);\n"
+			print $jff "\tJUMBLE_STRING($f, offsetof($n, $f));\n"
 			  unless $query_jumble_ignore;
 		}
 		else
 		{
-			print $jff "\tJUMBLE_FIELD($f);\n"
+			print $jff "\tJUMBLE_FIELD($f, offsetof($n, $f));\n"
 			  unless $query_jumble_ignore;
 		}
 	}
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index b103a2819366..11e43872753a 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -52,9 +52,9 @@ int			compute_query_id = COMPUTE_QUERY_ID_AUTO;
 bool		query_id_enabled = false;
 
 static void AppendJumble(JumbleState *jstate,
-						 const unsigned char *item, Size size);
+						 const unsigned char *item, Size size, int seed);
 static void RecordConstLocation(JumbleState *jstate, int location);
-static void _jumbleNode(JumbleState *jstate, Node *node);
+static void _jumbleNode(JumbleState *jstate, Node *node, int seed);
 static void _jumbleA_Const(JumbleState *jstate, Node *node);
 static void _jumbleList(JumbleState *jstate, Node *node);
 static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
@@ -127,7 +127,7 @@ JumbleQuery(Query *query)
 	jstate->highest_extern_param_id = 0;
 
 	/* Compute query ID and mark the Query node with it */
-	_jumbleNode(jstate, (Node *) query);
+	_jumbleNode(jstate, (Node *) query, 0);
 	query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble,
 													  jstate->jumble_len,
 													  0));
@@ -163,9 +163,13 @@ EnableQueryId(void)
 /*
  * AppendJumble: Append a value that is substantive in a given query to
  * the current jumble.
+ *
+ * The "seed" value would normally be the offset of a node member to force
+ * more entropy in the query ID generated.  There are exceptions in some of
+ * the custom query jumbling functions, like the List one.
  */
 static void
-AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
+AppendJumble(JumbleState *jstate, const unsigned char *item, Size size, int seed)
 {
 	unsigned char *jumble = jstate->jumble;
 	Size		jumble_len = jstate->jumble_len;
@@ -185,6 +189,8 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
 
 			start_hash = DatumGetUInt64(hash_any_extended(jumble,
 														  JUMBLE_SIZE, 0));
+			start_hash = hash_combine64(start_hash, seed);
+
 			memcpy(jumble, &start_hash, sizeof(start_hash));
 			jumble_len = sizeof(start_hash);
 		}
@@ -223,24 +229,24 @@ RecordConstLocation(JumbleState *jstate, int location)
 	}
 }
 
-#define JUMBLE_NODE(item) \
-	_jumbleNode(jstate, (Node *) expr->item)
+#define JUMBLE_NODE(item, seed) \
+	_jumbleNode(jstate, (Node *) expr->item, seed)
 #define JUMBLE_LOCATION(location) \
 	RecordConstLocation(jstate, expr->location)
-#define JUMBLE_FIELD(item) \
-	AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
+#define JUMBLE_FIELD(item, seed) \
+	AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item), seed)
 #define JUMBLE_FIELD_SINGLE(item) \
-	AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
-#define JUMBLE_STRING(str) \
+	AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item), 0)
+#define JUMBLE_STRING(str, seed) \
 do { \
 	if (expr->str) \
-		AppendJumble(jstate, (const unsigned char *) (expr->str), strlen(expr->str) + 1); \
+		AppendJumble(jstate, (const unsigned char *) (expr->str), strlen(expr->str) + 1, seed); \
 } while(0)
 
 #include "queryjumblefuncs.funcs.c"
 
 static void
-_jumbleNode(JumbleState *jstate, Node *node)
+_jumbleNode(JumbleState *jstate, Node *node, int seed)
 {
 	Node	   *expr = node;
 
@@ -250,11 +256,13 @@ _jumbleNode(JumbleState *jstate, Node *node)
 	/* Guard against stack overflow due to overly complex expressions */
 	check_stack_depth();
 
+	JUMBLE_FIELD_SINGLE(seed);
+
 	/*
 	 * We always emit the node's NodeTag, then any additional fields that are
 	 * considered significant, and then we recurse to any child nodes.
 	 */
-	JUMBLE_FIELD(type);
+	JUMBLE_FIELD(type, offsetof(Node, type));
 
 	switch (nodeTag(expr))
 	{
@@ -295,29 +303,43 @@ _jumbleNode(JumbleState *jstate, Node *node)
 	}
 }
 
+/*
+ * Custom query jumbling function for a List node.  Note that the
+ * "seed" is defined as the item number in a list.
+ */
 static void
 _jumbleList(JumbleState *jstate, Node *node)
 {
 	List	   *expr = (List *) node;
 	ListCell   *l;
+	int			num = 0;
 
 	switch (expr->type)
 	{
 		case T_List:
 			foreach(l, expr)
-				_jumbleNode(jstate, lfirst(l));
+				_jumbleNode(jstate, lfirst(l), num);
 			break;
 		case T_IntList:
 			foreach(l, expr)
+			{
 				JUMBLE_FIELD_SINGLE(lfirst_int(l));
+				JUMBLE_FIELD_SINGLE(num);
+			}
 			break;
 		case T_OidList:
 			foreach(l, expr)
+			{
 				JUMBLE_FIELD_SINGLE(lfirst_oid(l));
+				JUMBLE_FIELD_SINGLE(num);
+			}
 			break;
 		case T_XidList:
 			foreach(l, expr)
+			{
 				JUMBLE_FIELD_SINGLE(lfirst_xid(l));
+				JUMBLE_FIELD_SINGLE(num);
+			}
 			break;
 		default:
 			elog(ERROR, "unrecognized list node type: %d",
@@ -331,26 +353,27 @@ _jumbleA_Const(JumbleState *jstate, Node *node)
 {
 	A_Const    *expr = (A_Const *) node;
 
-	JUMBLE_FIELD(isnull);
+	JUMBLE_FIELD(isnull, offsetof(A_Const, isnull));
+
 	if (!expr->isnull)
 	{
-		JUMBLE_FIELD(val.node.type);
+		JUMBLE_FIELD(val.node.type, offsetof(A_Const, val));
 		switch (nodeTag(&expr->val))
 		{
 			case T_Integer:
-				JUMBLE_FIELD(val.ival.ival);
+				JUMBLE_FIELD(val.ival.ival, T_Integer);
 				break;
 			case T_Float:
-				JUMBLE_STRING(val.fval.fval);
+				JUMBLE_STRING(val.fval.fval, T_Float);
 				break;
 			case T_Boolean:
-				JUMBLE_FIELD(val.boolval.boolval);
+				JUMBLE_FIELD(val.boolval.boolval, T_Boolean);
 				break;
 			case T_String:
-				JUMBLE_STRING(val.sval.sval);
+				JUMBLE_STRING(val.sval.sval, T_String);
 				break;
 			case T_BitString:
-				JUMBLE_STRING(val.bsval.bsval);
+				JUMBLE_STRING(val.bsval.bsval, T_BitString);
 				break;
 			default:
 				elog(ERROR, "unrecognized node type: %d",
@@ -365,15 +388,15 @@ _jumbleVariableSetStmt(JumbleState *jstate, Node *node)
 {
 	VariableSetStmt *expr = (VariableSetStmt *) node;
 
-	JUMBLE_FIELD(kind);
-	JUMBLE_STRING(name);
+	JUMBLE_FIELD(kind, offsetof(VariableSetStmt, kind));
+	JUMBLE_STRING(name, offsetof(VariableSetStmt, name));
 
 	/*
 	 * Account for the list of arguments in query jumbling only if told by the
 	 * parser.
 	 */
 	if (expr->jumble_args)
-		JUMBLE_NODE(args);
-	JUMBLE_FIELD(is_local);
+		JUMBLE_NODE(args, offsetof(VariableSetStmt, args));
+	JUMBLE_FIELD(is_local, offsetof(VariableSetStmt, is_local));
 	JUMBLE_LOCATION(location);
 }
diff --git a/contrib/pg_stat_statements/expected/select.out b/contrib/pg_stat_statements/expected/select.out
index 37a30af034a6..1587d2cafb3a 100644
--- a/contrib/pg_stat_statements/expected/select.out
+++ b/contrib/pg_stat_statements/expected/select.out
@@ -19,6 +19,86 @@ SELECT 1 AS "int";
    1
 (1 row)
 
+-- LIMIT and OFFSET patterns
+-- These require more entropy with parsing node offsets.
+SELECT 1 AS "int" LIMIT 1;
+ int 
+-----
+   1
+(1 row)
+
+SELECT 1 AS "int" LIMIT 2;
+ int 
+-----
+   1
+(1 row)
+
+SELECT 1 AS "int" OFFSET 1;
+ int 
+-----
+(0 rows)
+
+SELECT 1 AS "int" OFFSET 2;
+ int 
+-----
+(0 rows)
+
+SELECT 1 AS "int" OFFSET 1 LIMIT 1;
+ int 
+-----
+(0 rows)
+
+SELECT 1 AS "int" OFFSET 2 LIMIT 2;
+ int 
+-----
+(0 rows)
+
+SELECT 1 AS "int" LIMIT 1 OFFSET 1;
+ int 
+-----
+(0 rows)
+
+SELECT 1 AS "int" LIMIT 3 OFFSET 3;
+ int 
+-----
+(0 rows)
+
+SELECT 1 AS "int" OFFSET 1 FETCH FIRST 2 ROW ONLY;
+ int 
+-----
+(0 rows)
+
+SELECT 1 AS "int" OFFSET 2 FETCH FIRST 3 ROW ONLY;
+ int 
+-----
+(0 rows)
+
+-- DISTINCT and ORDER BY patterns
+-- These require more entropy with parsing node offsets.
+SELECT DISTINCT 1 AS "int";
+ int 
+-----
+   1
+(1 row)
+
+SELECT DISTINCT 2 AS "int";
+ int 
+-----
+   2
+(1 row)
+
+SELECT 1 AS "int" ORDER BY 1;
+ int 
+-----
+   1
+(1 row)
+
+SELECT 2 AS "int" ORDER BY 1;
+ int 
+-----
+   2
+(1 row)
+
 /* this comment should not appear in the output */
 SELECT 'hello'
   -- but this one will appear
@@ -135,9 +215,14 @@ SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C";
      3 |    3 | SELECT $1 + $2 + $3 AS "add"
      1 |    1 | SELECT $1 AS "float"
      2 |    2 | SELECT $1 AS "int"
+     2 |    2 | SELECT $1 AS "int" LIMIT $2
+     2 |    0 | SELECT $1 AS "int" OFFSET $2
+     6 |    0 | SELECT $1 AS "int" OFFSET $2 LIMIT $3
+     2 |    2 | SELECT $1 AS "int" ORDER BY 1
      1 |    2 | SELECT $1 AS i UNION SELECT $2 ORDER BY i
      1 |    1 | SELECT $1 || $2
      1 |    1 | SELECT $1, $2 LIMIT $3
+     2 |    2 | SELECT DISTINCT $1 AS "int"
      0 |    0 | SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C"
      1 |    1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
      1 |    2 | WITH t(f) AS (                                                              +
@@ -145,7 +230,7 @@ SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C";
        |      | )                                                                           +
        |      |   SELECT f FROM t ORDER BY f
      1 |    1 | select $1::jsonb ? $2
-(12 rows)
+(17 rows)
 
 SELECT pg_stat_statements_reset() IS NOT NULL AS t;
  t 
diff --git a/contrib/pg_stat_statements/sql/select.sql b/contrib/pg_stat_statements/sql/select.sql
index e0be58d5e24b..4dcfa8ef74dc 100644
--- a/contrib/pg_stat_statements/sql/select.sql
+++ b/contrib/pg_stat_statements/sql/select.sql
@@ -12,6 +12,26 @@ SELECT pg_stat_statements_reset() IS NOT NULL AS t;
 --
 SELECT 1 AS "int";
 
+-- LIMIT and OFFSET patterns
+-- These require more entropy with parsing node offsets.
+SELECT 1 AS "int" LIMIT 1;
+SELECT 1 AS "int" LIMIT 2;
+SELECT 1 AS "int" OFFSET 1;
+SELECT 1 AS "int" OFFSET 2;
+SELECT 1 AS "int" OFFSET 1 LIMIT 1;
+SELECT 1 AS "int" OFFSET 2 LIMIT 2;
+SELECT 1 AS "int" LIMIT 1 OFFSET 1;
+SELECT 1 AS "int" LIMIT 3 OFFSET 3;
+SELECT 1 AS "int" OFFSET 1 FETCH FIRST 2 ROW ONLY;
+SELECT 1 AS "int" OFFSET 2 FETCH FIRST 3 ROW ONLY;
+
+-- DISTINCT and ORDER BY patterns
+-- These require more entropy with parsing node offsets.
+SELECT DISTINCT 1 AS "int";
+SELECT DISTINCT 2 AS "int";
+SELECT 1 AS "int" ORDER BY 1;
+SELECT 2 AS "int" ORDER BY 1;
+
 /* this comment should not appear in the output */
 SELECT 'hello'
   -- but this one will appear
-- 
2.47.2