From c78150fea9a49aa2ab06f4b03b1e5beea4457366 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Mon, 17 Mar 2025 09:38:22 +0900
Subject: [PATCH v4] Add more entropy to query jumbling

A counter tracking the number of nodes computed is added, and all nodes
use it in their computation.  This matters also for NULL nodes, that
were ignored from the computation up to now.
---
 src/include/nodes/queryjumble.h               |  7 ++
 src/backend/nodes/queryjumblefuncs.c          | 10 +++
 .../pg_stat_statements/expected/select.out    | 87 ++++++++++++++++++-
 contrib/pg_stat_statements/sql/select.sql     | 20 +++++
 4 files changed, 123 insertions(+), 1 deletion(-)

diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 50eb95665872..aa1126f50162 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -37,6 +37,13 @@ typedef struct JumbleState
 	/* Number of bytes used in jumble[] */
 	Size		jumble_len;
 
+	/*
+	 * Number of Nodes included in the computation.  This counter is
+	 * incremented each time a node is added to the jumbling computation,
+	 * and is added to the jumbling to increase its entropy.
+	 */
+	int			node_count;
+
 	/* Array of locations of constants that should be removed */
 	LocationLen *clocations;
 
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index b103a2819366..ec74b92d5c3c 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -120,6 +120,7 @@ JumbleQuery(Query *query)
 	/* Set up workspace for query jumbling */
 	jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE);
 	jstate->jumble_len = 0;
+	jstate->node_count = 0;
 	jstate->clocations_buf_size = 32;
 	jstate->clocations = (LocationLen *)
 		palloc(jstate->clocations_buf_size * sizeof(LocationLen));
@@ -244,6 +245,15 @@ _jumbleNode(JumbleState *jstate, Node *node)
 {
 	Node	   *expr = node;
 
+	/*
+	 * Increment the node count, and add it to the jumbling.  This operation
+	 * is done before checking if a Node is NULL, so as even a NULL node is
+	 * counted in the computation, without depending on its data, with some
+	 * data that we know to be unique for each computation.
+	 */
+	jstate->node_count++;
+	JUMBLE_FIELD_SINGLE(jstate->node_count);
+
 	if (expr == NULL)
 		return;
 
diff --git a/contrib/pg_stat_statements/expected/select.out b/contrib/pg_stat_statements/expected/select.out
index 37a30af034a6..1587d2cafb3a 100644
--- a/contrib/pg_stat_statements/expected/select.out
+++ b/contrib/pg_stat_statements/expected/select.out
@@ -19,6 +19,86 @@ SELECT 1 AS "int";
    1
 (1 row)
 
+-- LIMIT and OFFSET patterns
+-- These require more entropy with parsing node offsets.
+SELECT 1 AS "int" LIMIT 1;
+ int 
+-----
+   1
+(1 row)
+
+SELECT 1 AS "int" LIMIT 2;
+ int 
+-----
+   1
+(1 row)
+
+SELECT 1 AS "int" OFFSET 1;
+ int 
+-----
+(0 rows)
+
+SELECT 1 AS "int" OFFSET 2;
+ int 
+-----
+(0 rows)
+
+SELECT 1 AS "int" OFFSET 1 LIMIT 1;
+ int 
+-----
+(0 rows)
+
+SELECT 1 AS "int" OFFSET 2 LIMIT 2;
+ int 
+-----
+(0 rows)
+
+SELECT 1 AS "int" LIMIT 1 OFFSET 1;
+ int 
+-----
+(0 rows)
+
+SELECT 1 AS "int" LIMIT 3 OFFSET 3;
+ int 
+-----
+(0 rows)
+
+SELECT 1 AS "int" OFFSET 1 FETCH FIRST 2 ROW ONLY;
+ int 
+-----
+(0 rows)
+
+SELECT 1 AS "int" OFFSET 2 FETCH FIRST 3 ROW ONLY;
+ int 
+-----
+(0 rows)
+
+-- DISTINCT and ORDER BY patterns
+-- These require more entropy with parsing node offsets.
+SELECT DISTINCT 1 AS "int";
+ int 
+-----
+   1
+(1 row)
+
+SELECT DISTINCT 2 AS "int";
+ int 
+-----
+   2
+(1 row)
+
+SELECT 1 AS "int" ORDER BY 1;
+ int 
+-----
+   1
+(1 row)
+
+SELECT 2 AS "int" ORDER BY 1;
+ int 
+-----
+   2
+(1 row)
+
 /* this comment should not appear in the output */
 SELECT 'hello'
   -- but this one will appear
@@ -135,9 +215,14 @@ SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C";
      3 |    3 | SELECT $1 + $2 + $3 AS "add"
      1 |    1 | SELECT $1 AS "float"
      2 |    2 | SELECT $1 AS "int"
+     2 |    2 | SELECT $1 AS "int" LIMIT $2
+     2 |    0 | SELECT $1 AS "int" OFFSET $2
+     6 |    0 | SELECT $1 AS "int" OFFSET $2 LIMIT $3
+     2 |    2 | SELECT $1 AS "int" ORDER BY 1
      1 |    2 | SELECT $1 AS i UNION SELECT $2 ORDER BY i
      1 |    1 | SELECT $1 || $2
      1 |    1 | SELECT $1, $2 LIMIT $3
+     2 |    2 | SELECT DISTINCT $1 AS "int"
      0 |    0 | SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C"
      1 |    1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
      1 |    2 | WITH t(f) AS (                                                              +
@@ -145,7 +230,7 @@ SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C";
        |      | )                                                                           +
        |      |   SELECT f FROM t ORDER BY f
      1 |    1 | select $1::jsonb ? $2
-(12 rows)
+(17 rows)
 
 SELECT pg_stat_statements_reset() IS NOT NULL AS t;
  t 
diff --git a/contrib/pg_stat_statements/sql/select.sql b/contrib/pg_stat_statements/sql/select.sql
index e0be58d5e24b..4dcfa8ef74dc 100644
--- a/contrib/pg_stat_statements/sql/select.sql
+++ b/contrib/pg_stat_statements/sql/select.sql
@@ -12,6 +12,26 @@ SELECT pg_stat_statements_reset() IS NOT NULL AS t;
 --
 SELECT 1 AS "int";
 
+-- LIMIT and OFFSET patterns
+-- These require more entropy with parsing node offsets.
+SELECT 1 AS "int" LIMIT 1;
+SELECT 1 AS "int" LIMIT 2;
+SELECT 1 AS "int" OFFSET 1;
+SELECT 1 AS "int" OFFSET 2;
+SELECT 1 AS "int" OFFSET 1 LIMIT 1;
+SELECT 1 AS "int" OFFSET 2 LIMIT 2;
+SELECT 1 AS "int" LIMIT 1 OFFSET 1;
+SELECT 1 AS "int" LIMIT 3 OFFSET 3;
+SELECT 1 AS "int" OFFSET 1 FETCH FIRST 2 ROW ONLY;
+SELECT 1 AS "int" OFFSET 2 FETCH FIRST 3 ROW ONLY;
+
+-- DISTINCT and ORDER BY patterns
+-- These require more entropy with parsing node offsets.
+SELECT DISTINCT 1 AS "int";
+SELECT DISTINCT 2 AS "int";
+SELECT 1 AS "int" ORDER BY 1;
+SELECT 2 AS "int" ORDER BY 1;
+
 /* this comment should not appear in the output */
 SELECT 'hello'
   -- but this one will appear
-- 
2.47.2

