From e119d74ddd5cd5550246213fc66cd75cae606571 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Mon, 16 Oct 2023 16:52:27 +0200
Subject: [PATCH v15 4/4] Add query_id_const_merge_threshold

Extend query_id_const_merge to allow merging only if the number of
elements is larger than specified value, which could be configured using
new GUC query_id_const_merge_threshold.
---
 .../pg_stat_statements/expected/merging.out   | 64 +++++++++++++++++++
 contrib/pg_stat_statements/sql/merging.sql    | 17 +++++
 doc/src/sgml/config.sgml                      | 15 +++++
 doc/src/sgml/pgstatstatements.sgml            |  6 ++
 src/backend/nodes/queryjumblefuncs.c          | 12 +++-
 src/backend/utils/misc/guc_tables.c           | 11 ++++
 src/backend/utils/misc/postgresql.conf.sample |  1 +
 src/include/nodes/queryjumble.h               |  1 +
 8 files changed, 126 insertions(+), 1 deletion(-)

diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
index 1bb75a93045..81811b2db0d 100644
--- a/contrib/pg_stat_statements/expected/merging.out
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -218,4 +218,68 @@ FROM cte;
 --------
 (0 rows)
 
+-- With the threshold
+SET query_id_const_merge_threshold = 10;
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                   query                                   | calls 
+---------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) |     1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries])                |     2
+ SELECT pg_stat_statements_reset()                                         |     1
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                         query                         | calls 
+-------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) |     1
+ SELECT pg_stat_statements_reset()                     |     1
+(2 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4)                  |     1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries])             |     1
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     1
+(4 rows)
+
 RESET query_id_const_merge;
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
index 75960159a1d..1dc0fef9984 100644
--- a/contrib/pg_stat_statements/sql/merging.sql
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -68,4 +68,21 @@ WITH cte AS (
 SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
 FROM cte;
 
+-- With the threshold
+SET query_id_const_merge_threshold = 10;
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
 RESET query_id_const_merge;
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 4fd997284c9..c31d4806c1c 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8219,6 +8219,21 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-query-id-const-merge-threshold" xreflabel="query_id_const_merge_threshold">
+      <term><varname>query_id_const_merge_threshold</varname> (<type>integer</type>)
+      <indexterm>
+       <primary><varname>query_id_const_merge_threshold</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        If <xref linkend="guc-query-id-const-merge"/> parameter is enabled,
+        specifies the minimal number of element an array must have to apply
+        constants merge.
+       </para>
+      </listitem>
+     </varlistentry>
+
      </variablelist>
 
     </sect2>
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 1f6e6a0e76b..eb00c1ecd2a 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -566,6 +566,12 @@ query | SELECT pg_stat_statements_reset()
 calls | 1
 </screen>
 
+   Such constants merging could be configured apply only starting from certain
+   number of constants in the array. The threshold could be specified using
+   <xref linkend="guc-query-id-const-merge"/>.
+  </para>
+
+  <para>
    But there is a small chance of hash collisions causing unrelated queries to
    be merged into one entry. (This cannot happen for queries belonging to
    different users or databases, however.)
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index b198fe2579e..5d3acf1115c 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -47,6 +47,9 @@ int			compute_query_id = COMPUTE_QUERY_ID_AUTO;
 /* Whether to merge constants in a list when computing query_id */
 bool		query_id_const_merge = false;
 
+/* Lower threshold for the list length to merge constants when computing query_id */
+bool		query_id_const_merge_threshold = 1;
+
 /* True when compute_query_id is ON, or AUTO and a module requests them */
 bool		query_id_enabled = false;
 
@@ -227,7 +230,8 @@ RecordConstLocation(JumbleState *jstate, int location, int magnitude)
 
 /*
  * Verify if the provided list contains could be merged down, which means it
- * contains only constant expressions.
+ * contains only constant expressions and the list contains more than
+ * query_id_const_merge_threshold elements.
  *
  * Return value is the order of magnitude (i.e. how many digits it has) for
  * length of the list (to use for representation purposes later on) if merging
@@ -251,6 +255,12 @@ IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
 		return 0;
 	}
 
+	if (elements->length < query_id_const_merge_threshold)
+	{
+		/* The list is not large enough */
+		return 0;
+	}
+
 	firstExpr = linitial(elements);
 
 	/*
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index fbb50adb9f9..ee239ae7302 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -3518,6 +3518,17 @@ struct config_int ConfigureNamesInt[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"query_id_const_merge_threshold", PGC_SUSET, STATS_MONITORING,
+			gettext_noop("Sets lower threshold for an array length to apply"
+						 " constants merging when computing query identifier."),
+			gettext_noop("Not used if query_id_const_merge is disabled"),
+		},
+		&query_id_const_merge_threshold,
+		1, 1, INT_MAX,
+		NULL, NULL, NULL
+	},
+
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 3397780dc72..6cc30e116c4 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -629,6 +629,7 @@
 #log_planner_stats = off
 #log_executor_stats = off
 #query_id_const_merge = off
+#query_id_const_merge_threshold = 1
 
 #------------------------------------------------------------------------------
 # AUTOVACUUM
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index cfa99efc14e..8f823c3c491 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -72,6 +72,7 @@ enum ComputeQueryIdType
 /* GUC parameters */
 extern PGDLLIMPORT int compute_query_id;
 extern PGDLLIMPORT bool query_id_const_merge;
+extern PGDLLIMPORT bool query_id_const_merge_threshold;
 
 extern const char *CleanQuerytext(const char *query, int *location, int *len);
 extern JumbleState *JumbleQuery(Query *query);
-- 
2.41.0

