pg_stat_statements and "IN" conditions
Hi
I would like to start another thread to follow up on [1]/messages/by-id/CAF42k=JCfHMJtkAVXCzBn2XBxDC83xb4VhV7jU7enPnZ0CfEQQ@mail.gmail.com, mostly to bump up the
topic. Just to remind, it's about how pg_stat_statements jumbling ArrayExpr in
queries like:
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
The current implementation produces different jumble hash for every different
number of arguments for essentially the same query. Unfortunately a lot of ORMs
like to generate these types of queries, which in turn leads to
pg_stat_statements pollution. Ideally we want to prevent this and have only one
record for such a query.
As the result of [1]/messages/by-id/CAF42k=JCfHMJtkAVXCzBn2XBxDC83xb4VhV7jU7enPnZ0CfEQQ@mail.gmail.com I've identified two highlighted approaches to improve this
situation:
* Reduce the generated ArrayExpr to an array Const immediately, in cases where
all the inputs are Consts.
* Make repeating Const to contribute nothing to the resulting hash.
I've tried to prototype both approaches to find out pros/cons and be more
specific. Attached patches could not be considered a completed piece of work,
but they seem to work, mostly pass the tests and demonstrate the point. I would
like to get some high level input about them and ideally make it clear what is
the preferred solution to continue with.
# Reducing ArrayExpr to an array Const
IIUC this requires producing a Const with ArrayType constvalue in
transformAExprIn for ScalarArrayOpExpr. This could be a general improvement,
since apparently it's being done later anyway. But it deals only with Const,
which leaves more on the table, e.g. Params and other similar types of
duplication we observe when repeating constants are wrapped into VALUES.
Another point here is that it's quite possible this approach will still require
corresponding changes in pg_stat_statements, since just preventing duplicates
to show also loses the information. Ideally we also need to have some
understanding how many elements are actually there and display it, e.g. in
cases when there is just one outlier query that contains a huge IN list.
# Contribute nothing to the hash
I guess there could be multiple ways of doing this, but the first idea I had in
mind is to skip jumbling when necessary. At the same time it can be implemented
more centralized for different types of queries (although in the attached patch
there are only Const & Values). In the simplest case we just identify sequence
of constants of the same type, which just ignores any other cases when stuff is
mixed. But I believe it's something that could be considered a rare corner case
and it's better to start with the simplest solution.
Having said that I believe the second approach of contributing nothing to the
hash sounds more appealing, but would love to hear other opinions.
[1]: /messages/by-id/CAF42k=JCfHMJtkAVXCzBn2XBxDC83xb4VhV7jU7enPnZ0CfEQQ@mail.gmail.com
Attachments:
0001-Reduce-ArrayExpr-into-const-array.patchapplication/octet-stream; name=0001-Reduce-ArrayExpr-into-const-array.patchDownload
From 27973d144f592249e2b8d9a548be3d563f58b737 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Mon, 10 Aug 2020 18:27:28 +0200
Subject: [PATCH] Reduce ArrayExpr into const array
In case if ArrayExpr generated for ScalarArrayOpExpr contains only
consts, reduce it to a const array. One of the advantages of that is
pg_stat_statements will consider this kind of array as a single entity
no matter how many elements are there, which will partially solve the
duplication problem (when the same query mentioned in pg_stat_statements
multiple times due to different number of elements in e.g. IN condition)
---
src/backend/parser/parse_expr.c | 65 ++++++++++++++++++++++++++++++---
1 file changed, 59 insertions(+), 6 deletions(-)
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index f69976cc8c..6aea674810 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -32,6 +32,7 @@
#include "parser/parse_relation.h"
#include "parser/parse_target.h"
#include "parser/parse_type.h"
+#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/date.h"
#include "utils/lsyscache.h"
@@ -1301,6 +1302,7 @@ transformAExprIn(ParseState *pstate, A_Expr *a)
*/
List *aexprs;
ArrayExpr *newa;
+ bool all_const = true;
aexprs = NIL;
foreach(l, rnonvars)
@@ -1310,6 +1312,10 @@ transformAExprIn(ParseState *pstate, A_Expr *a)
rexpr = coerce_to_common_type(pstate, rexpr,
scalar_type,
"IN");
+
+ if (!IsA(rexpr, Const))
+ all_const = false;
+
aexprs = lappend(aexprs, rexpr);
}
newa = makeNode(ArrayExpr);
@@ -1320,12 +1326,59 @@ transformAExprIn(ParseState *pstate, A_Expr *a)
newa->multidims = false;
newa->location = -1;
- result = (Node *) make_scalar_array_op(pstate,
- a->name,
- useOr,
- lexpr,
- (Node *) newa,
- a->location);
+ /* if all elements are const reduce ArrayExpr to a const as well */
+ if (all_const)
+ {
+ ArrayType *const_array;
+ Datum *elems = (Datum *) palloc(sizeof(Datum) * aexprs->length);
+ bool *nulls = (bool *) palloc(sizeof(bool) * aexprs->length);
+
+ int dims[1];
+ int lbs[1];
+
+ bool elembyval;
+ int16 elemlen;
+ char elemalign;
+ int i = 0;
+
+ foreach(l, aexprs)
+ {
+ Const *expr = (Const *) lfirst(l);
+
+ elems[i] = expr->constvalue;
+ nulls[i] = expr->constisnull;
+ i++;
+ }
+
+ /* ArrayExpr would have only one dimention */
+ dims[0] = aexprs->length;
+ lbs[0] = 1;
+
+ get_typlenbyvalalign(scalar_type, &elemlen,
+ &elembyval, &elemalign);
+ const_array = construct_md_array(elems, nulls, 1, dims, lbs,
+ scalar_type, elemlen,
+ elembyval, elemalign);
+
+ result = (Node *) makeConst(array_type, -1,
+ newa->array_collid, -1,
+ PointerGetDatum(const_array),
+ false, false);
+
+ result = (Node *) make_scalar_array_op(pstate,
+ a->name,
+ useOr,
+ lexpr,
+ (Node *) result,
+ a->location);
+ }
+ else
+ result = (Node *) make_scalar_array_op(pstate,
+ a->name,
+ useOr,
+ lexpr,
+ (Node *) newa,
+ a->location);
/* Consider only the Vars (if any) in the loop below */
rexprs = rvars;
--
2.21.0
0001-Limit-jumbling-for-repeating-constants.patchapplication/octet-stream; name=0001-Limit-jumbling-for-repeating-constants.patchDownload
From fd8c3139151179615ac01a972bf30970b76cd24a Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Mon, 20 Jul 2020 17:11:14 +0200
Subject: [PATCH] Limit jumbling for repeating constants
In case if in pg_stat_statements we found an ArrayExpr or RTE_VALUES
containing only constants, skip jumbling for those with position higher
than a predefined threshold. This helps to avoid problem of duplicated
statements, when the same query is repeated multiple times due to
different number of arguments in the expression, e.g.
WHERE col IN (1, 2, 3, ...)
---
.../pg_stat_statements/pg_stat_statements.c | 110 +++++++++++++++++-
1 file changed, 108 insertions(+), 2 deletions(-)
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 14cad19afb..c96bcade81 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -115,6 +115,8 @@ static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
+#define MERGE_THRESHOLD 5 /* after which number of similar nodes
+ start to merge then if asked */
/*
* Extension version number, for supporting older extension versions' objects
*/
@@ -374,6 +376,7 @@ static void JumbleQuery(pgssJumbleState *jstate, Query *query);
static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable);
static void JumbleRowMarks(pgssJumbleState *jstate, List *rowMarks);
static void JumbleExpr(pgssJumbleState *jstate, Node *node);
+static bool JumbleExprList(pgssJumbleState *jstate, Node *node);
static void RecordConstLocation(pgssJumbleState *jstate, int location);
static char *generate_normalized_query(pgssJumbleState *jstate, const char *query,
int query_loc, int *query_len_p, int encoding);
@@ -2647,7 +2650,7 @@ JumbleRangeTable(pgssJumbleState *jstate, List *rtable)
JumbleExpr(jstate, (Node *) rte->tablefunc);
break;
case RTE_VALUES:
- JumbleExpr(jstate, (Node *) rte->values_lists);
+ JumbleExprList(jstate, (Node *) rte->values_lists);
break;
case RTE_CTE:
@@ -2691,6 +2694,109 @@ JumbleRowMarks(pgssJumbleState *jstate, List *rowMarks)
}
}
+static bool
+JumbleExprList(pgssJumbleState *jstate, Node *node)
+{
+ ListCell *temp;
+ Node *firstExpr = NULL;
+ bool merged = false;
+ bool allConst = true;
+ int currentExprIdx;
+
+ if (node == NULL)
+ return merged;
+
+ Assert(IsA(node, List));
+ firstExpr = (Node *) lfirst(list_head((List *) node));
+
+ /* Guard against stack overflow due to overly complex expressions */
+ check_stack_depth();
+
+ /*
+ * We always emit the node's NodeTag, then any additional fields that are
+ * considered significant, and then we recurse to any child nodes.
+ */
+ APP_JUMB(node->type);
+
+ /*
+ * If the first expression is a constant or a list of constants, try to
+ * merge the following if they're constants as well. Otherwise do
+ * JumbleExpr as usual.
+ */
+ switch (nodeTag(firstExpr))
+ {
+ case T_List:
+ currentExprIdx = 0;
+
+ foreach(temp, (List *) firstExpr)
+ {
+ Node * subExpr = (Node *) lfirst(temp);
+
+ if (!IsA(subExpr, Const))
+ {
+ allConst = false;
+ break;
+ }
+ }
+
+ foreach(temp, (List *) node)
+ {
+ Node *expr = (Node *) lfirst(temp);
+ ListCell *lc;
+
+ foreach(lc, (List *) expr)
+ {
+ Node * subExpr = (Node *) lfirst(lc);
+
+ if (!IsA(subExpr, Const))
+ {
+ allConst = false;
+ break;
+ }
+ }
+
+ if (!equal(expr, firstExpr) && allConst &&
+ currentExprIdx > MERGE_THRESHOLD)
+ {
+ merged = true;
+ continue;
+ }
+
+ JumbleExpr(jstate, expr);
+ currentExprIdx++;
+ }
+ break;
+
+ case T_Const:
+ currentExprIdx = 0;
+
+ foreach(temp, (List *) node)
+ {
+ Node *expr = (Node *) lfirst(temp);
+
+ if (!equal(expr, firstExpr) && IsA(expr, Const) &&
+ currentExprIdx > MERGE_THRESHOLD)
+ {
+ merged = true;
+ continue;
+ }
+
+ JumbleExpr(jstate, expr);
+ currentExprIdx++;
+ }
+ break;
+
+ default:
+ foreach(temp, (List *) node)
+ {
+ JumbleExpr(jstate, (Node *) lfirst(temp));
+ }
+ break;
+ }
+
+ return merged;
+}
+
/*
* Jumble an expression tree
*
@@ -2928,7 +3034,7 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
}
break;
case T_ArrayExpr:
- JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements);
+ JumbleExprList(jstate, (Node *) ((ArrayExpr *) node)->elements);
break;
case T_RowExpr:
JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args);
--
2.21.0
On Wed, Aug 12, 2020 at 06:19:02PM +0200, Dmitry Dolgov wrote:
I would like to start another thread to follow up on [1], mostly to bump up the
topic. Just to remind, it's about how pg_stat_statements jumbling ArrayExpr in
queries like:SELECT something FROM table WHERE col IN (1, 2, 3, ...)
The current implementation produces different jumble hash for every different
number of arguments for essentially the same query. Unfortunately a lot of ORMs
like to generate these types of queries, which in turn leads to
pg_stat_statements pollution. Ideally we want to prevent this and have only one
record for such a query.As the result of [1] I've identified two highlighted approaches to improve this
situation:* Reduce the generated ArrayExpr to an array Const immediately, in cases where
all the inputs are Consts.* Make repeating Const to contribute nothing to the resulting hash.
I've tried to prototype both approaches to find out pros/cons and be more
specific. Attached patches could not be considered a completed piece of work,
but they seem to work, mostly pass the tests and demonstrate the point. I would
like to get some high level input about them and ideally make it clear what is
the preferred solution to continue with.
I've implemented the second approach mentioned above, this version was
tested on our test clusters for some time without visible issues. Will
create a CF item and would appreciate any feedback.
Attachments:
v1-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/x-diff; charset=us-asciiDownload
From ece03928184d88add6629f5eba3ccc9e4fa5e7b8 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Tue, 17 Nov 2020 16:18:08 +0100
Subject: [PATCH v1] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on number of parameters, because every element of ArrayExpr is
jumbled. Make Consts contribute nothing to the jumble hash if they're
part of a series and at position further that specified threshold. Do
the same for similar queries with VALUES as well.
---
.../expected/pg_stat_statements.out | 614 +++++++++++++++++-
.../pg_stat_statements/pg_stat_statements.c | 208 +++++-
.../sql/pg_stat_statements.sql | 115 ++++
3 files changed, 925 insertions(+), 12 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out
index 2a303a7f07..9d0fe074ae 100644
--- a/contrib/pg_stat_statements/expected/pg_stat_statements.out
+++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out
@@ -205,7 +205,7 @@ SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
INSERT INTO test VALUES(generate_series($1, $2), $3) | 1 | 10
SELECT * FROM test ORDER BY a | 1 | 12
SELECT * FROM test WHERE a > $1 ORDER BY a | 2 | 4
- SELECT * FROM test WHERE a IN ($1, $2, $3, $4, $5) | 1 | 8
+ SELECT * FROM test WHERE a IN ($1, $2, $3, $4, ...) | 1 | 8
SELECT pg_stat_statements_reset() | 1 | 1
SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 | 0
UPDATE test SET b = $1 WHERE a = $2 | 6 | 6
@@ -861,4 +861,616 @@ SELECT query, plans, calls, rows FROM pg_stat_statements ORDER BY query COLLATE
SELECT query, plans, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 | 0 | 0
(6 rows)
+--
+-- Consts merging
+--
+SET pg_stat_statements.merge_threshold = 5;
+CREATE TABLE test_merge (id int);
+-- IN queries
+-- Normal
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id
+----
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id
+----
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id
+----
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id
+----
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id
+----
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id
+----
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id
+----
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id
+----
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id
+----
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id
+----
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id
+----
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id
+----
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id
+----
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 6
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id
+----
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id
+----
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id
+----
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id
+----
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id
+----
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id
+----
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 6
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id
+----
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id
+----
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id
+----
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id
+----
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id
+----
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id
+----
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id
+----
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id
+----
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id
+----
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id
+----
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id
+----
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+-- VALUES queries
+-- Normal
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+(3 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+(4 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+(5 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+(6 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+(7 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+(8 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3)) q | 1
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4)) q | 1
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(5 rows)
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+(4 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+(5 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+(6 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+(7 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+(8 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4)) q | 1
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(4 rows)
+
+-- With gaps on the treshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+(4 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4)) q | 1
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+(5 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+(6 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
DROP EXTENSION pg_stat_statements;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index dd963c4644..389a2e8e18 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -231,6 +231,8 @@ typedef struct pgssLocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+ bool merged; /* whether or not the location was marked as
+ duplicate */
} pgssLocationLen;
/*
@@ -299,6 +301,7 @@ static const struct config_enum_entry track_options[] =
static int pgss_max; /* max # statements to track */
static int pgss_track; /* tracking level */
+static int pgss_merge_threshold; /* minumum number of consts for merge */
static bool pgss_track_utility; /* whether to track utility commands */
static bool pgss_track_planning; /* whether to track planning duration */
static bool pgss_save; /* whether to save stats across shutdown */
@@ -374,7 +377,9 @@ static void JumbleQuery(pgssJumbleState *jstate, Query *query);
static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable);
static void JumbleRowMarks(pgssJumbleState *jstate, List *rowMarks);
static void JumbleExpr(pgssJumbleState *jstate, Node *node);
-static void RecordConstLocation(pgssJumbleState *jstate, int location);
+static bool JumbleExprList(pgssJumbleState *jstate, Node *node);
+static void RecordConstLocation(pgssJumbleState *jstate, int location,
+ bool merged);
static char *generate_normalized_query(pgssJumbleState *jstate, const char *query,
int query_loc, int *query_len_p);
static void fill_in_constant_lengths(pgssJumbleState *jstate, const char *query,
@@ -460,6 +465,19 @@ _PG_init(void)
NULL,
NULL);
+ DefineCustomIntVariable("pg_stat_statements.merge_threshold",
+ "After this number of duplicating constants start to merge them.",
+ NULL,
+ &pgss_merge_threshold,
+ 5,
+ 1,
+ INT_MAX,
+ PGC_SUSET,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
EmitWarningsOnPlaceholders("pg_stat_statements");
/*
@@ -863,7 +881,7 @@ pgss_post_parse_analyze(ParseState *pstate, Query *query)
jstate.jumble_len = 0;
jstate.clocations_buf_size = 32;
jstate.clocations = (pgssLocationLen *)
- palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen));
+ palloc0(jstate.clocations_buf_size * sizeof(pgssLocationLen));
jstate.clocations_count = 0;
jstate.highest_extern_param_id = 0;
@@ -2655,7 +2673,7 @@ JumbleRangeTable(pgssJumbleState *jstate, List *rtable)
JumbleExpr(jstate, (Node *) rte->tablefunc);
break;
case RTE_VALUES:
- JumbleExpr(jstate, (Node *) rte->values_lists);
+ JumbleExprList(jstate, (Node *) rte->values_lists);
break;
case RTE_CTE:
@@ -2699,6 +2717,155 @@ JumbleRowMarks(pgssJumbleState *jstate, List *rowMarks)
}
}
+static bool
+JumbleExprList(pgssJumbleState *jstate, Node *node)
+{
+ ListCell *temp;
+ Node *firstExpr = NULL;
+ bool merged = false;
+ bool allConst = true;
+ int currentExprIdx;
+
+ if (node == NULL)
+ return merged;
+
+ Assert(IsA(node, List));
+ firstExpr = (Node *) lfirst(list_head((List *) node));
+
+ /* Guard against stack overflow due to overly complex expressions */
+ check_stack_depth();
+
+ /*
+ * We always emit the node's NodeTag, then any additional fields that are
+ * considered significant, and then we recurse to any child nodes.
+ */
+ APP_JUMB(node->type);
+
+ /*
+ * If the first expression is a constant or a list of constants, try to
+ * merge the following if they're constants as well. Otherwise do
+ * JumbleExpr as usual.
+ */
+ switch (nodeTag(firstExpr))
+ {
+ case T_List:
+ currentExprIdx = 0;
+
+ foreach(temp, (List *) firstExpr)
+ {
+ Node * subExpr = (Node *) lfirst(temp);
+
+ if (!IsA(subExpr, Const))
+ {
+ allConst = false;
+ break;
+ }
+ }
+
+ foreach(temp, (List *) node)
+ {
+ Node *expr = (Node *) lfirst(temp);
+ ListCell *lc;
+
+ foreach(lc, (List *) expr)
+ {
+ Node * subExpr = (Node *) lfirst(lc);
+
+ if (!IsA(subExpr, Const))
+ {
+ allConst = false;
+ break;
+ }
+ }
+
+ if (!equal(expr, firstExpr) && allConst &&
+ currentExprIdx >= pgss_merge_threshold - 1)
+ {
+ Const *c = (Const *) linitial((List *) expr);
+
+ merged = true;
+
+ /*
+ * This hash is going to accumulate the following merged
+ * statements
+ */
+ if (currentExprIdx == pgss_merge_threshold - 1)
+ {
+ JumbleExpr(jstate, expr);
+
+ /*
+ * A const expr is already found, so JumbleExpr must
+ * record it. Mark it as merged, it will be the first
+ * merged but still present in the statement query.
+ */
+ Assert(jstate->clocations_count > 0);
+ jstate->clocations[jstate->clocations_count - 1].merged = true;
+ currentExprIdx++;
+ }
+ else
+ RecordConstLocation(jstate, c->location, true);
+
+ continue;
+ }
+
+ JumbleExpr(jstate, expr);
+ currentExprIdx++;
+ }
+ break;
+
+ case T_Const:
+ currentExprIdx = 0;
+
+ foreach(temp, (List *) node)
+ {
+ Node *expr = (Node *) lfirst(temp);
+
+ if (!equal(expr, firstExpr) && IsA(expr, Const) &&
+ currentExprIdx >= pgss_merge_threshold - 1)
+ {
+ Const *c = (Const *) expr;
+
+ merged = true;
+
+ /*
+ * This hash is going to accumulate the following merged
+ * statements
+ */
+ if (currentExprIdx == pgss_merge_threshold - 1)
+ {
+ JumbleExpr(jstate, expr);
+
+ /*
+ * A const expr is already found, so JumbleExpr must
+ * record it. Mark it as merged, it will be the first
+ * merged but still present in the statement query.
+ */
+ Assert(jstate->clocations_count > 0);
+ jstate->clocations[jstate->clocations_count - 1].merged = true;
+ currentExprIdx++;
+ }
+ else
+ RecordConstLocation(jstate, c->location, true);
+
+ continue;
+ }
+
+ JumbleExpr(jstate, expr);
+ currentExprIdx++;
+ }
+ break;
+
+ default:
+ foreach(temp, (List *) node)
+ {
+ JumbleExpr(jstate, (Node *) lfirst(temp));
+ }
+ break;
+ }
+
+ return merged;
+}
+
/*
* Jumble an expression tree
*
@@ -2748,7 +2915,7 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
/* We jumble only the constant's type, not its value */
APP_JUMB(c->consttype);
/* Also, record its parse location for query normalization */
- RecordConstLocation(jstate, c->location);
+ RecordConstLocation(jstate, c->location, false);
}
break;
case T_Param:
@@ -2936,7 +3103,7 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
}
break;
case T_ArrayExpr:
- JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements);
+ JumbleExprList(jstate, (Node *) ((ArrayExpr *) node)->elements);
break;
case T_RowExpr:
JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args);
@@ -3193,7 +3360,7 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
* that is currently being walked.
*/
static void
-RecordConstLocation(pgssJumbleState *jstate, int location)
+RecordConstLocation(pgssJumbleState *jstate, int location, bool merged)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -3210,6 +3377,8 @@ RecordConstLocation(pgssJumbleState *jstate, int location)
jstate->clocations[jstate->clocations_count].location = location;
/* initialize lengths to -1 to simplify fill_in_constant_lengths */
jstate->clocations[jstate->clocations_count].length = -1;
+ jstate->clocations[jstate->clocations_count].merged = merged;
+
jstate->clocations_count++;
}
}
@@ -3246,6 +3415,7 @@ generate_normalized_query(pgssJumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool merge = false;
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -3284,12 +3454,28 @@ generate_normalized_query(pgssJumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ /* If merging, do not copy anything in between constants */
+ if (!merge)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+ }
+
+ if (!jstate->clocations[i].merged)
+ {
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id);
+ if (merge)
+ merge = false;
+ }
+ else if (!merge)
+ {
+ /* Merge until a non merged constant appear */
+ merge = true;
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ }
quer_loc = off + tok_len;
last_off = off;
diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
index e9f5bb84e3..c7d589d867 100644
--- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql
+++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
@@ -358,4 +358,119 @@ SELECT 42;
SELECT 42;
SELECT query, plans, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
+--
+-- Consts merging
+--
+SET pg_stat_statements.merge_threshold = 5;
+CREATE TABLE test_merge (id int);
+
+-- IN queries
+
+-- Normal
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- VALUES queries
+
+-- Normal
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the treshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
DROP EXTENSION pg_stat_statements;
--
2.21.0
The following review has been posted through the commitfest application:
make installcheck-world: tested, passed
Implements feature: tested, passed
Spec compliant: not tested
Documentation: not tested
Hi, I did some test and it works well
On Wed, Dec 09, 2020 at 03:37:40AM +0000, Chengxi Sun wrote:
The following review has been posted through the commitfest application:
make installcheck-world: tested, passed
Implements feature: tested, passed
Spec compliant: not tested
Documentation: not testedHi, I did some test and it works well
Thanks for testing!
On Wed, Nov 18, 2020 at 05:04:32PM +0100, Dmitry Dolgov wrote:
On Wed, Aug 12, 2020 at 06:19:02PM +0200, Dmitry Dolgov wrote:
I would like to start another thread to follow up on [1], mostly to bump up the
topic. Just to remind, it's about how pg_stat_statements jumbling ArrayExpr in
queries like:SELECT something FROM table WHERE col IN (1, 2, 3, ...)
The current implementation produces different jumble hash for every different
number of arguments for essentially the same query. Unfortunately a lot of ORMs
like to generate these types of queries, which in turn leads to
pg_stat_statements pollution. Ideally we want to prevent this and have only one
record for such a query.As the result of [1] I've identified two highlighted approaches to improve this
situation:* Reduce the generated ArrayExpr to an array Const immediately, in cases where
all the inputs are Consts.* Make repeating Const to contribute nothing to the resulting hash.
I've tried to prototype both approaches to find out pros/cons and be more
specific. Attached patches could not be considered a completed piece of work,
but they seem to work, mostly pass the tests and demonstrate the point. I would
like to get some high level input about them and ideally make it clear what is
the preferred solution to continue with.I've implemented the second approach mentioned above, this version was
tested on our test clusters for some time without visible issues. Will
create a CF item and would appreciate any feedback.
After more testing I found couple of things that could be improved,
namely in the presence of non-reducible constants one part of the query
was not copied into the normalized version, and this approach also could
be extended for Params. These are incorporated in the attached patch.
Attachments:
v2-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/x-diff; charset=us-asciiDownload
From a93824799eda63391989e8845393f0b773508e18 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Tue, 17 Nov 2020 16:18:08 +0100
Subject: [PATCH v2] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on number of parameters, because every element of ArrayExpr is
jumbled. Make Consts contribute nothing to the jumble hash if they're
part of a series and at position further that specified threshold. Do
the same for similar queries with VALUES as well.
---
.../expected/pg_stat_statements.out | 657 +++++++++++++++++-
.../pg_stat_statements/pg_stat_statements.c | 262 ++++++-
.../sql/pg_stat_statements.sql | 129 ++++
3 files changed, 1034 insertions(+), 14 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out
index 2a303a7f07..6978e37ca7 100644
--- a/contrib/pg_stat_statements/expected/pg_stat_statements.out
+++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out
@@ -205,7 +205,7 @@ SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
INSERT INTO test VALUES(generate_series($1, $2), $3) | 1 | 10
SELECT * FROM test ORDER BY a | 1 | 12
SELECT * FROM test WHERE a > $1 ORDER BY a | 2 | 4
- SELECT * FROM test WHERE a IN ($1, $2, $3, $4, $5) | 1 | 8
+ SELECT * FROM test WHERE a IN ($1, $2, $3, $4, ...) | 1 | 8
SELECT pg_stat_statements_reset() | 1 | 1
SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 | 0
UPDATE test SET b = $1 WHERE a = $2 | 6 | 6
@@ -861,4 +861,659 @@ SELECT query, plans, calls, rows FROM pg_stat_statements ORDER BY query COLLATE
SELECT query, plans, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 | 0 | 0
(6 rows)
+--
+-- Consts merging
+--
+SET pg_stat_statements.merge_threshold = 5;
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- Normal
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 6
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 6
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) and data = $11 | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- test prepared statement
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+PREPARE query AS
+SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10);
+EXECUTE query (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ PREPARE query AS +| 1
+ SELECT * FROM test_merge WHERE id IN ($6, $7, $8, ...) |
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- VALUES queries
+-- Normal
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+(3 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+(4 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+(5 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+(6 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+(7 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+(8 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3)) q | 1
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4)) q | 1
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(5 rows)
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+(4 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+(5 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+(6 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+(7 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+(8 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4)) q | 1
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(4 rows)
+
+-- With gaps on the treshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+(4 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4)) q | 1
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+(5 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+(6 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
DROP EXTENSION pg_stat_statements;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index dd963c4644..0ca1437055 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -231,6 +231,8 @@ typedef struct pgssLocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+ bool merged; /* whether or not the location was marked as
+ duplicate */
} pgssLocationLen;
/*
@@ -245,7 +247,10 @@ typedef struct pgssJumbleState
/* Number of bytes used in jumble[] */
Size jumble_len;
- /* Array of locations of constants that should be removed */
+ /*
+ * Array of locations of constants that should be removed, or parameters
+ * that are already replaced, but could be also processed to be merged
+ */
pgssLocationLen *clocations;
/* Allocated length of clocations array */
@@ -299,6 +304,7 @@ static const struct config_enum_entry track_options[] =
static int pgss_max; /* max # statements to track */
static int pgss_track; /* tracking level */
+static int pgss_merge_threshold; /* minumum number of consts for merge */
static bool pgss_track_utility; /* whether to track utility commands */
static bool pgss_track_planning; /* whether to track planning duration */
static bool pgss_save; /* whether to save stats across shutdown */
@@ -374,7 +380,9 @@ static void JumbleQuery(pgssJumbleState *jstate, Query *query);
static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable);
static void JumbleRowMarks(pgssJumbleState *jstate, List *rowMarks);
static void JumbleExpr(pgssJumbleState *jstate, Node *node);
-static void RecordConstLocation(pgssJumbleState *jstate, int location);
+static bool JumbleExprList(pgssJumbleState *jstate, Node *node);
+static void RecordConstLocation(pgssJumbleState *jstate, int location,
+ bool merged);
static char *generate_normalized_query(pgssJumbleState *jstate, const char *query,
int query_loc, int *query_len_p);
static void fill_in_constant_lengths(pgssJumbleState *jstate, const char *query,
@@ -460,6 +468,19 @@ _PG_init(void)
NULL,
NULL);
+ DefineCustomIntVariable("pg_stat_statements.merge_threshold",
+ "After this number of duplicating constants start to merge them.",
+ NULL,
+ &pgss_merge_threshold,
+ 5,
+ 1,
+ INT_MAX,
+ PGC_SUSET,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
EmitWarningsOnPlaceholders("pg_stat_statements");
/*
@@ -863,7 +884,7 @@ pgss_post_parse_analyze(ParseState *pstate, Query *query)
jstate.jumble_len = 0;
jstate.clocations_buf_size = 32;
jstate.clocations = (pgssLocationLen *)
- palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen));
+ palloc0(jstate.clocations_buf_size * sizeof(pgssLocationLen));
jstate.clocations_count = 0;
jstate.highest_extern_param_id = 0;
@@ -2655,7 +2676,7 @@ JumbleRangeTable(pgssJumbleState *jstate, List *rtable)
JumbleExpr(jstate, (Node *) rte->tablefunc);
break;
case RTE_VALUES:
- JumbleExpr(jstate, (Node *) rte->values_lists);
+ JumbleExprList(jstate, (Node *) rte->values_lists);
break;
case RTE_CTE:
@@ -2699,6 +2720,203 @@ JumbleRowMarks(pgssJumbleState *jstate, List *rowMarks)
}
}
+static bool
+JumbleExprList(pgssJumbleState *jstate, Node *node)
+{
+ ListCell *temp;
+ Node *firstExpr = NULL;
+ bool merged = false;
+ bool allConst = true;
+ int currentExprIdx;
+
+ if (node == NULL)
+ return merged;
+
+ Assert(IsA(node, List));
+ firstExpr = (Node *) lfirst(list_head((List *) node));
+
+ /* Guard against stack overflow due to overly complex expressions */
+ check_stack_depth();
+
+ /*
+ * We always emit the node's NodeTag, then any additional fields that are
+ * considered significant, and then we recurse to any child nodes.
+ */
+ APP_JUMB(node->type);
+
+ /*
+ * If the first expression is a constant or a list of constants, try to
+ * merge the following if they're constants as well. Otherwise do
+ * JumbleExpr as usual.
+ */
+ switch (nodeTag(firstExpr))
+ {
+ case T_List:
+ currentExprIdx = 0;
+
+ foreach(temp, (List *) firstExpr)
+ {
+ Node * subExpr = (Node *) lfirst(temp);
+
+ if (!IsA(subExpr, Const))
+ {
+ allConst = false;
+ break;
+ }
+ }
+
+ foreach(temp, (List *) node)
+ {
+ Node *expr = (Node *) lfirst(temp);
+ ListCell *lc;
+
+ foreach(lc, (List *) expr)
+ {
+ Node * subExpr = (Node *) lfirst(lc);
+
+ if (!IsA(subExpr, Const))
+ {
+ allConst = false;
+ break;
+ }
+ }
+
+ if (!equal(expr, firstExpr) && allConst &&
+ currentExprIdx >= pgss_merge_threshold - 1)
+ {
+ Const *c = (Const *) linitial((List *) expr);
+
+ merged = true;
+
+ /*
+ * This hash is going to accumulate the following merged
+ * statements
+ */
+ if (currentExprIdx == pgss_merge_threshold - 1)
+ {
+ JumbleExpr(jstate, expr);
+
+ /*
+ * A const expr is already found, so JumbleExpr must
+ * record it. Mark it as merged, it will be the first
+ * merged but still present in the statement query.
+ */
+ Assert(jstate->clocations_count > 0);
+ jstate->clocations[jstate->clocations_count - 1].merged = true;
+ currentExprIdx++;
+ }
+ else
+ RecordConstLocation(jstate, c->location, true);
+
+ continue;
+ }
+
+ JumbleExpr(jstate, expr);
+ currentExprIdx++;
+ }
+ break;
+
+ case T_Const:
+ currentExprIdx = 0;
+
+ foreach(temp, (List *) node)
+ {
+ Node *expr = (Node *) lfirst(temp);
+
+ if (!equal(expr, firstExpr) && IsA(expr, Const) &&
+ currentExprIdx >= pgss_merge_threshold - 1)
+ {
+ Const *c = (Const *) expr;
+
+ merged = true;
+
+ /*
+ * This hash is going to accumulate the following merged
+ * statements
+ */
+ if (currentExprIdx == pgss_merge_threshold - 1)
+ {
+ JumbleExpr(jstate, expr);
+
+ /*
+ * A const expr is already found, so JumbleExpr must
+ * record it. Mark it as merged, it will be the first
+ * merged but still present in the statement query.
+ */
+ Assert(jstate->clocations_count > 0);
+ jstate->clocations[jstate->clocations_count - 1].merged = true;
+ currentExprIdx++;
+ }
+ else
+ RecordConstLocation(jstate, c->location, true);
+
+ continue;
+ }
+
+ JumbleExpr(jstate, expr);
+ currentExprIdx++;
+ }
+ break;
+
+ case T_Param:
+ currentExprIdx = 0;
+
+ foreach(temp, (List *) node)
+ {
+ Node *expr = (Node *) lfirst(temp);
+ Param *p = (Param *) expr;
+
+ if (!equal(expr, firstExpr) && IsA(expr, Param) &&
+ currentExprIdx >= pgss_merge_threshold - 1)
+ {
+
+ merged = true;
+
+ /*
+ * This hash is going to accumulate the following merged
+ * statements
+ */
+ if (currentExprIdx == pgss_merge_threshold - 1)
+ {
+ JumbleExpr(jstate, expr);
+
+ /*
+ * A const expr is already found, so JumbleExpr must
+ * record it. Mark it as merged, it will be the first
+ * merged but still present in the statement query.
+ */
+ Assert(jstate->clocations_count > 0);
+ jstate->clocations[jstate->clocations_count - 1].merged = true;
+ currentExprIdx++;
+ }
+ else
+ RecordConstLocation(jstate, p->location, true);
+
+ continue;
+ }
+
+ JumbleExpr(jstate, expr);
+
+ /*
+ * To allow merging of parameters as well in
+ * generate_normalized_query, remember it as a constant.
+ */
+ RecordConstLocation(jstate, p->location, false);
+ currentExprIdx++;
+ }
+ break;
+
+ default:
+ foreach(temp, (List *) node)
+ {
+ JumbleExpr(jstate, (Node *) lfirst(temp));
+ }
+ break;
+ }
+
+ return merged;
+}
+
/*
* Jumble an expression tree
*
@@ -2748,7 +2966,7 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
/* We jumble only the constant's type, not its value */
APP_JUMB(c->consttype);
/* Also, record its parse location for query normalization */
- RecordConstLocation(jstate, c->location);
+ RecordConstLocation(jstate, c->location, false);
}
break;
case T_Param:
@@ -2936,7 +3154,7 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
}
break;
case T_ArrayExpr:
- JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements);
+ JumbleExprList(jstate, (Node *) ((ArrayExpr *) node)->elements);
break;
case T_RowExpr:
JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args);
@@ -3189,11 +3407,11 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
}
/*
- * Record location of constant within query string of query tree
+ * Record location of constant or a parameter within query string of query tree
* that is currently being walked.
*/
static void
-RecordConstLocation(pgssJumbleState *jstate, int location)
+RecordConstLocation(pgssJumbleState *jstate, int location, bool merged)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -3210,6 +3428,8 @@ RecordConstLocation(pgssJumbleState *jstate, int location)
jstate->clocations[jstate->clocations_count].location = location;
/* initialize lengths to -1 to simplify fill_in_constant_lengths */
jstate->clocations[jstate->clocations_count].length = -1;
+ jstate->clocations[jstate->clocations_count].merged = merged;
+
jstate->clocations_count++;
}
}
@@ -3246,6 +3466,7 @@ generate_normalized_query(pgssJumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool merge = false;
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -3284,12 +3505,27 @@ generate_normalized_query(pgssJumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ if (!jstate->clocations[i].merged)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id);
+ if (merge)
+ merge = false;
+ }
+ else if (!merge)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* Merge until a non merged constant appear */
+ merge = true;
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ }
quer_loc = off + tok_len;
last_off = off;
diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
index e9f5bb84e3..9c95dc2a9e 100644
--- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql
+++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
@@ -358,4 +358,133 @@ SELECT 42;
SELECT 42;
SELECT query, plans, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
+--
+-- Consts merging
+--
+SET pg_stat_statements.merge_threshold = 5;
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- Normal
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- test prepared statement
+SELECT pg_stat_statements_reset();
+
+PREPARE query AS
+SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10);
+EXECUTE query (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- VALUES queries
+
+-- Normal
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the treshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
DROP EXTENSION pg_stat_statements;
--
2.21.0
Hi,
A few comments.
+ "After this number of duplicating constants
start to merge them.",
duplicating -> duplicate
+ foreach(lc, (List *) expr)
+ {
+ Node * subExpr = (Node *) lfirst(lc);
+
+ if (!IsA(subExpr, Const))
+ {
+ allConst = false;
+ break;
+ }
+ }
It seems the above foreach loop (within foreach(temp, (List *) node)) can
be preceded with a check that allConst is true. Otherwise the loop can be
skipped.
+ if (currentExprIdx == pgss_merge_threshold - 1)
+ {
+ JumbleExpr(jstate, expr);
+
+ /*
+ * A const expr is already found, so JumbleExpr must
+ * record it. Mark it as merged, it will be the
first
+ * merged but still present in the statement query.
+ */
+ Assert(jstate->clocations_count > 0);
+ jstate->clocations[jstate->clocations_count -
1].merged = true;
+ currentExprIdx++;
+ }
The above snippet occurs a few times. Maybe extract into a helper method.
Cheers
On Sat, Dec 26, 2020 at 2:45 AM Dmitry Dolgov <9erthalion6@gmail.com> wrote:
Show quoted text
On Wed, Nov 18, 2020 at 05:04:32PM +0100, Dmitry Dolgov wrote:
On Wed, Aug 12, 2020 at 06:19:02PM +0200, Dmitry Dolgov wrote:
I would like to start another thread to follow up on [1], mostly to
bump up the
topic. Just to remind, it's about how pg_stat_statements jumbling
ArrayExpr in
queries like:
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
The current implementation produces different jumble hash for every
different
number of arguments for essentially the same query. Unfortunately a
lot of ORMs
like to generate these types of queries, which in turn leads to
pg_stat_statements pollution. Ideally we want to prevent this and haveonly one
record for such a query.
As the result of [1] I've identified two highlighted approaches to
improve this
situation:
* Reduce the generated ArrayExpr to an array Const immediately, in
cases where
all the inputs are Consts.
* Make repeating Const to contribute nothing to the resulting hash.
I've tried to prototype both approaches to find out pros/cons and be
more
specific. Attached patches could not be considered a completed piece
of work,
but they seem to work, mostly pass the tests and demonstrate the
point. I would
like to get some high level input about them and ideally make it clear
what is
the preferred solution to continue with.
I've implemented the second approach mentioned above, this version was
tested on our test clusters for some time without visible issues. Will
create a CF item and would appreciate any feedback.After more testing I found couple of things that could be improved,
namely in the presence of non-reducible constants one part of the query
was not copied into the normalized version, and this approach also could
be extended for Params. These are incorporated in the attached patch.
On Sat, Dec 26, 2020 at 08:53:28AM -0800, Zhihong Yu wrote:
Hi,
A few comments.+ foreach(lc, (List *) expr) + { + Node * subExpr = (Node *) lfirst(lc); + + if (!IsA(subExpr, Const)) + { + allConst = false; + break; + } + }It seems the above foreach loop (within foreach(temp, (List *) node)) can
be preceded with a check that allConst is true. Otherwise the loop can be
skipped.
Thanks for noticing. Now that I look at it closer I think it's the other
way around, the loop above checking constants for the first expression
is not really necessary.
+ if (currentExprIdx == pgss_merge_threshold - 1) + { + JumbleExpr(jstate, expr); + + /* + * A const expr is already found, so JumbleExpr must + * record it. Mark it as merged, it will be the first + * merged but still present in the statement query. + */ + Assert(jstate->clocations_count > 0); + jstate->clocations[jstate->clocations_count - 1].merged = true; + currentExprIdx++; + }The above snippet occurs a few times. Maybe extract into a helper method.
Originally I was hesitant to extract it was because it's quite small
part of the code. But now I've realized that the part relevant to lists
is not really correct, which makes those bits even more different, so I
think it makes sense to leave it like that. What do you think?
Attachments:
v3-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/x-diff; charset=us-asciiDownload
From 35f3355e56462773263d31bebaf60fee6a71dca5 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Tue, 17 Nov 2020 16:18:08 +0100
Subject: [PATCH v3] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on number of parameters, because every element of ArrayExpr is
jumbled. Make Consts contribute nothing to the jumble hash if they're
part of a series and at position further that specified threshold. Do
the same for similar queries with VALUES as well.
---
.../expected/pg_stat_statements.out | 750 +++++++++++++++++-
.../pg_stat_statements/pg_stat_statements.c | 262 +++++-
.../sql/pg_stat_statements.sql | 139 ++++
3 files changed, 1137 insertions(+), 14 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out
index 2a303a7f07..4b5ed40bb2 100644
--- a/contrib/pg_stat_statements/expected/pg_stat_statements.out
+++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out
@@ -205,7 +205,7 @@ SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
INSERT INTO test VALUES(generate_series($1, $2), $3) | 1 | 10
SELECT * FROM test ORDER BY a | 1 | 12
SELECT * FROM test WHERE a > $1 ORDER BY a | 2 | 4
- SELECT * FROM test WHERE a IN ($1, $2, $3, $4, $5) | 1 | 8
+ SELECT * FROM test WHERE a IN ($1, $2, $3, $4, ...) | 1 | 8
SELECT pg_stat_statements_reset() | 1 | 1
SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 | 0
UPDATE test SET b = $1 WHERE a = $2 | 6 | 6
@@ -861,4 +861,752 @@ SELECT query, plans, calls, rows FROM pg_stat_statements ORDER BY query COLLATE
SELECT query, plans, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 | 0 | 0
(6 rows)
+--
+-- Consts merging
+--
+SET pg_stat_statements.merge_threshold = 5;
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- Normal
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 6
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 6
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) and data = $11 | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- test prepared statement
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+PREPARE query AS
+SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10);
+EXECUTE query (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ PREPARE query AS +| 1
+ SELECT * FROM test_merge WHERE id IN ($6, $7, $8, ...) |
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- VALUES queries
+-- Normal
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+(3 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+(4 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+(5 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+(6 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+(7 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+(8 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3)) q | 1
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4)) q | 1
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(5 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3)) q;
+ column1 | column2
+---------+---------
+ 1 | 1
+ 2 | 2
+ 3 | 3
+(3 rows)
+
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4)) q;
+ column1 | column2
+---------+---------
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+(4 rows)
+
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)) q;
+ column1 | column2
+---------+---------
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+(5 rows)
+
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)) q;
+ column1 | column2
+---------+---------
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+(6 rows)
+
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7)) q;
+ column1 | column2
+---------+---------
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+ 7 | 7
+(7 rows)
+
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8)) q;
+ column1 | column2
+---------+---------
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+ 7 | 7
+ 8 | 8
+(8 rows)
+
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9)) q;
+ column1 | column2
+---------+---------
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+ 7 | 7
+ 8 | 8
+ 9 | 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1, $2), ($3, $4), ($5, $6)) q | 1
+ SELECT * FROM (VALUES ($1, $2), ($3, $4), ($5, $6), ($7, $8)) q | 1
+ SELECT * FROM (VALUES ($1, $2), ($3, $4), ($5, $6), ($7, $8), (...)) q | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(5 rows)
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+(4 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+(5 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+(6 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+(7 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+(8 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4)) q | 1
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(4 rows)
+
+-- With gaps on the treshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+(4 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4)) q | 1
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+(5 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+(6 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
DROP EXTENSION pg_stat_statements;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index dd963c4644..103ecb7c61 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -231,6 +231,8 @@ typedef struct pgssLocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+ bool merged; /* whether or not the location was marked as
+ duplicate */
} pgssLocationLen;
/*
@@ -245,7 +247,10 @@ typedef struct pgssJumbleState
/* Number of bytes used in jumble[] */
Size jumble_len;
- /* Array of locations of constants that should be removed */
+ /*
+ * Array of locations of constants that should be removed, or parameters
+ * that are already replaced, but could be also processed to be merged
+ */
pgssLocationLen *clocations;
/* Allocated length of clocations array */
@@ -299,6 +304,7 @@ static const struct config_enum_entry track_options[] =
static int pgss_max; /* max # statements to track */
static int pgss_track; /* tracking level */
+static int pgss_merge_threshold; /* minumum number of consts for merge */
static bool pgss_track_utility; /* whether to track utility commands */
static bool pgss_track_planning; /* whether to track planning duration */
static bool pgss_save; /* whether to save stats across shutdown */
@@ -374,7 +380,9 @@ static void JumbleQuery(pgssJumbleState *jstate, Query *query);
static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable);
static void JumbleRowMarks(pgssJumbleState *jstate, List *rowMarks);
static void JumbleExpr(pgssJumbleState *jstate, Node *node);
-static void RecordConstLocation(pgssJumbleState *jstate, int location);
+static bool JumbleExprList(pgssJumbleState *jstate, Node *node);
+static void RecordConstLocation(pgssJumbleState *jstate, int location,
+ bool merged);
static char *generate_normalized_query(pgssJumbleState *jstate, const char *query,
int query_loc, int *query_len_p);
static void fill_in_constant_lengths(pgssJumbleState *jstate, const char *query,
@@ -460,6 +468,19 @@ _PG_init(void)
NULL,
NULL);
+ DefineCustomIntVariable("pg_stat_statements.merge_threshold",
+ "After this number of duplicate constants start to merge them.",
+ NULL,
+ &pgss_merge_threshold,
+ 5,
+ 1,
+ INT_MAX,
+ PGC_SUSET,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
EmitWarningsOnPlaceholders("pg_stat_statements");
/*
@@ -863,7 +884,7 @@ pgss_post_parse_analyze(ParseState *pstate, Query *query)
jstate.jumble_len = 0;
jstate.clocations_buf_size = 32;
jstate.clocations = (pgssLocationLen *)
- palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen));
+ palloc0(jstate.clocations_buf_size * sizeof(pgssLocationLen));
jstate.clocations_count = 0;
jstate.highest_extern_param_id = 0;
@@ -2655,7 +2676,7 @@ JumbleRangeTable(pgssJumbleState *jstate, List *rtable)
JumbleExpr(jstate, (Node *) rte->tablefunc);
break;
case RTE_VALUES:
- JumbleExpr(jstate, (Node *) rte->values_lists);
+ JumbleExprList(jstate, (Node *) rte->values_lists);
break;
case RTE_CTE:
@@ -2699,6 +2720,203 @@ JumbleRowMarks(pgssJumbleState *jstate, List *rowMarks)
}
}
+static bool
+JumbleExprList(pgssJumbleState *jstate, Node *node)
+{
+ ListCell *temp;
+ Node *firstExpr = NULL;
+ bool merged = false;
+ bool allConst = true;
+ int currentExprIdx;
+ int lastExprLenght = 0;
+
+ if (node == NULL)
+ return merged;
+
+ Assert(IsA(node, List));
+ firstExpr = (Node *) lfirst(list_head((List *) node));
+
+ /* Guard against stack overflow due to overly complex expressions */
+ check_stack_depth();
+
+ /*
+ * We always emit the node's NodeTag, then any additional fields that are
+ * considered significant, and then we recurse to any child nodes.
+ */
+ APP_JUMB(node->type);
+
+ /*
+ * If the first expression is a constant or a list of constants, try to
+ * merge the following if they're constants as well. Otherwise do
+ * JumbleExpr as usual.
+ */
+ switch (nodeTag(firstExpr))
+ {
+ case T_List:
+ currentExprIdx = 0;
+
+ foreach(temp, (List *) node)
+ {
+ List *expr = (List *) lfirst(temp);
+ ListCell *lc;
+
+ foreach(lc, expr)
+ {
+ Node * subExpr = (Node *) lfirst(lc);
+
+ if (!IsA(subExpr, Const))
+ {
+ allConst = false;
+ break;
+ }
+ }
+
+ if (!equal(expr, firstExpr) && allConst &&
+ currentExprIdx >= pgss_merge_threshold - 1)
+ {
+ merged = true;
+
+ /*
+ * This hash is going to accumulate the following merged
+ * statements
+ */
+ if (currentExprIdx == pgss_merge_threshold - 1)
+ {
+ JumbleExpr(jstate, (Node *) expr);
+
+ /*
+ * An expr consisting of constants is already found,
+ * JumbleExpr must record it. Mark all the constants as
+ * merged, they will be the first merged but still
+ * present in the statement query.
+ */
+ Assert(jstate->clocations_count > lastExprLenght - 1);
+ for (int i = 1; i < lastExprLenght + 1; i++)
+ {
+ pgssLocationLen *loc;
+ loc = &jstate->clocations[jstate->clocations_count - i];
+ loc->merged = true;
+ }
+ currentExprIdx++;
+ }
+ else
+ foreach(lc, expr)
+ {
+ Const *c = (Const *) lfirst(lc);
+ RecordConstLocation(jstate, c->location, true);
+ }
+
+ continue;
+ }
+
+ JumbleExpr(jstate, (Node *) expr);
+ currentExprIdx++;
+ lastExprLenght = expr->length;
+ }
+ break;
+
+ case T_Const:
+ currentExprIdx = 0;
+
+ foreach(temp, (List *) node)
+ {
+ Node *expr = (Node *) lfirst(temp);
+
+ if (!equal(expr, firstExpr) && IsA(expr, Const) &&
+ currentExprIdx >= pgss_merge_threshold - 1)
+ {
+ merged = true;
+
+ /*
+ * This hash is going to accumulate the following merged
+ * statements
+ */
+ if (currentExprIdx == pgss_merge_threshold - 1)
+ {
+ JumbleExpr(jstate, expr);
+
+ /*
+ * A const expr is already found, so JumbleExpr must
+ * record it. Mark it as merged, it will be the first
+ * merged but still present in the statement query.
+ */
+ Assert(jstate->clocations_count > 0);
+ jstate->clocations[jstate->clocations_count - 1].merged = true;
+ currentExprIdx++;
+ }
+ else
+ {
+ Const *c = (Const *) expr;
+ RecordConstLocation(jstate, c->location, true);
+ }
+
+ continue;
+ }
+
+ JumbleExpr(jstate, expr);
+ currentExprIdx++;
+ }
+ break;
+
+ case T_Param:
+ currentExprIdx = 0;
+
+ foreach(temp, (List *) node)
+ {
+ Node *expr = (Node *) lfirst(temp);
+ Param *p = (Param *) expr;
+
+ if (!equal(expr, firstExpr) && IsA(expr, Param) &&
+ currentExprIdx >= pgss_merge_threshold - 1)
+ {
+
+ merged = true;
+
+ /*
+ * This hash is going to accumulate the following merged
+ * statements
+ */
+ if (currentExprIdx == pgss_merge_threshold - 1)
+ {
+ JumbleExpr(jstate, expr);
+
+ /*
+ * A const expr is already found, so JumbleExpr must
+ * record it. Mark it as merged, it will be the first
+ * merged but still present in the statement query.
+ */
+ Assert(jstate->clocations_count > 0);
+ jstate->clocations[jstate->clocations_count - 1].merged = true;
+ currentExprIdx++;
+ }
+ else
+ RecordConstLocation(jstate, p->location, true);
+
+ continue;
+ }
+
+ JumbleExpr(jstate, expr);
+
+ /*
+ * To allow merging of parameters as well in
+ * generate_normalized_query, remember it as a constant.
+ */
+ RecordConstLocation(jstate, p->location, false);
+ currentExprIdx++;
+ }
+ break;
+
+ default:
+ foreach(temp, (List *) node)
+ {
+ JumbleExpr(jstate, (Node *) lfirst(temp));
+ }
+ break;
+ }
+
+ return merged;
+}
+
/*
* Jumble an expression tree
*
@@ -2748,7 +2966,7 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
/* We jumble only the constant's type, not its value */
APP_JUMB(c->consttype);
/* Also, record its parse location for query normalization */
- RecordConstLocation(jstate, c->location);
+ RecordConstLocation(jstate, c->location, false);
}
break;
case T_Param:
@@ -2936,7 +3154,7 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
}
break;
case T_ArrayExpr:
- JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements);
+ JumbleExprList(jstate, (Node *) ((ArrayExpr *) node)->elements);
break;
case T_RowExpr:
JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args);
@@ -3189,11 +3407,11 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
}
/*
- * Record location of constant within query string of query tree
+ * Record location of constant or a parameter within query string of query tree
* that is currently being walked.
*/
static void
-RecordConstLocation(pgssJumbleState *jstate, int location)
+RecordConstLocation(pgssJumbleState *jstate, int location, bool merged)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -3210,6 +3428,8 @@ RecordConstLocation(pgssJumbleState *jstate, int location)
jstate->clocations[jstate->clocations_count].location = location;
/* initialize lengths to -1 to simplify fill_in_constant_lengths */
jstate->clocations[jstate->clocations_count].length = -1;
+ jstate->clocations[jstate->clocations_count].merged = merged;
+
jstate->clocations_count++;
}
}
@@ -3246,6 +3466,7 @@ generate_normalized_query(pgssJumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool merge = false;
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -3284,12 +3505,27 @@ generate_normalized_query(pgssJumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ if (!jstate->clocations[i].merged)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id);
+ if (merge)
+ merge = false;
+ }
+ else if (!merge)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* Merge until a non merged constant appear */
+ merge = true;
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ }
quer_loc = off + tok_len;
last_off = off;
diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
index e9f5bb84e3..ff5c9db6c0 100644
--- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql
+++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
@@ -358,4 +358,143 @@ SELECT 42;
SELECT 42;
SELECT query, plans, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
+--
+-- Consts merging
+--
+SET pg_stat_statements.merge_threshold = 5;
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- Normal
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- test prepared statement
+SELECT pg_stat_statements_reset();
+
+PREPARE query AS
+SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10);
+EXECUTE query (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- VALUES queries
+
+-- Normal
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3)) q;
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4)) q;
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)) q;
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)) q;
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7)) q;
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8)) q;
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the treshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
DROP EXTENSION pg_stat_statements;
--
2.21.0
Hi, Dmitry:
+ int lastExprLenght = 0;
Did you mean to name the variable lastExprLenghth ?
w.r.t. extracting to helper method, the second and third if (currentExprIdx
== pgss_merge_threshold - 1) blocks are similar.
It is up to you whether to create the helper method.
I am fine with the current formation.
Cheers
On Tue, Jan 5, 2021 at 4:51 AM Dmitry Dolgov <9erthalion6@gmail.com> wrote:
Show quoted text
On Sat, Dec 26, 2020 at 08:53:28AM -0800, Zhihong Yu wrote:
Hi,
A few comments.+ foreach(lc, (List *) expr) + { + Node * subExpr = (Node *) lfirst(lc); + + if (!IsA(subExpr, Const)) + { + allConst = false; + break; + } + }It seems the above foreach loop (within foreach(temp, (List *) node)) can
be preceded with a check that allConst is true. Otherwise the loop can be
skipped.Thanks for noticing. Now that I look at it closer I think it's the other
way around, the loop above checking constants for the first expression
is not really necessary.+ if (currentExprIdx == pgss_merge_threshold - 1) + { + JumbleExpr(jstate, expr); + + /* + * A const expr is already found, so JumbleExprmust
+ * record it. Mark it as merged, it will be the first + * merged but still present in the statementquery.
+ */ + Assert(jstate->clocations_count > 0); + jstate->clocations[jstate->clocations_count - 1].merged = true; + currentExprIdx++; + }The above snippet occurs a few times. Maybe extract into a helper method.
Originally I was hesitant to extract it was because it's quite small
part of the code. But now I've realized that the part relevant to lists
is not really correct, which makes those bits even more different, so I
think it makes sense to leave it like that. What do you think?
On 1/5/21 10:51 AM, Zhihong Yu wrote:
+ int lastExprLenght = 0;
Did you mean to name the variable lastExprLenghth ?
w.r.t. extracting to helper method, the second and third
if (currentExprIdx == pgss_merge_threshold - 1) blocks are similar.
It is up to you whether to create the helper method.
I am fine with the current formation.
Dmitry, thoughts on this review?
Regards,
--
-David
david@pgmasters.net
On Thu, Mar 18, 2021 at 09:38:09AM -0400, David Steele wrote:
On 1/5/21 10:51 AM, Zhihong Yu wrote:+ � int � � � � lastExprLenght = 0;
Did you mean to name the variable�lastExprLenghth�?
w.r.t. extracting to helper method, the second and third
if�(currentExprIdx == pgss_merge_threshold - 1) blocks are similar.
It is up to you whether to create the helper method.
I am fine with the current formation.Dmitry, thoughts on this review?
Oh, right. lastExprLenghth is obviously a typo, and as we agreed that
the helper is not strictly necessary I wanted to wait a bit hoping for
more feedback and eventually to post an accumulated patch. Doesn't make
sense to post another version only to fix one typo :)
On Thu, Mar 18, 2021 at 04:50:02PM +0100, Dmitry Dolgov wrote:
On Thu, Mar 18, 2021 at 09:38:09AM -0400, David Steele wrote:
On 1/5/21 10:51 AM, Zhihong Yu wrote:+ int lastExprLenght = 0;
Did you mean to name the variable lastExprLenghth ?
w.r.t. extracting to helper method, the second and third
if (currentExprIdx == pgss_merge_threshold - 1) blocks are similar.
It is up to you whether to create the helper method.
I am fine with the current formation.Dmitry, thoughts on this review?
Oh, right. lastExprLenghth is obviously a typo, and as we agreed that
the helper is not strictly necessary I wanted to wait a bit hoping for
more feedback and eventually to post an accumulated patch. Doesn't make
sense to post another version only to fix one typo :)
Hi,
I've prepared a new rebased version to deal with the new way of
computing query id, but as always there is one tricky part. From what I
understand, now an external module can provide custom implementation for
query id computation algorithm. It seems natural to think this machinery
could be used instead of patch in the thread, i.e. one could create a
custom logic that will enable constants collapsing as needed, so that
same queries with different number of constants in an array will be
hashed into the same record.
But there is a limitation in how such queries will be normalized
afterwards — to reduce level of surprise it's necessary to display the
fact that a certain query in fact had more constants that are showed in
pgss record. Ideally LocationLen needs to carry some bits of information
on what exactly could be skipped, and generate_normalized_query needs to
understand that, both are not reachable for an external module with
custom query id logic (without replicating significant part of the
existing code). Hence, a new version of the patch.
Attachments:
v4-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/x-diff; charset=us-asciiDownload
From 81a217485385452c7223a140dd4c98eeb5270945 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Thu, 10 Jun 2021 13:15:35 +0200
Subject: [PATCH v4] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on number of parameters, because every element of ArrayExpr is
jumbled. Make Consts (or any expression that could be reduced to a
Const) contribute nothing to the jumble hash if they're part of a series
and at position further that specified threshold. Do the same for
similar queries with VALUES as well.
---
.../expected/pg_stat_statements.out | 835 +++++++++++++++++-
.../pg_stat_statements/pg_stat_statements.c | 42 +-
.../sql/pg_stat_statements.sql | 163 ++++
src/backend/utils/misc/guc.c | 13 +
src/backend/utils/misc/queryjumble.c | 274 +++++-
src/include/utils/queryjumble.h | 11 +-
6 files changed, 1323 insertions(+), 15 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out
index 40b5109b55..3fc1978066 100644
--- a/contrib/pg_stat_statements/expected/pg_stat_statements.out
+++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out
@@ -205,7 +205,7 @@ SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
INSERT INTO test VALUES(generate_series($1, $2), $3) | 1 | 10
SELECT * FROM test ORDER BY a | 1 | 12
SELECT * FROM test WHERE a > $1 ORDER BY a | 2 | 4
- SELECT * FROM test WHERE a IN ($1, $2, $3, $4, $5) | 1 | 8
+ SELECT * FROM test WHERE a IN ($1, $2, $3, $4, ...) | 1 | 8
SELECT pg_stat_statements_reset() | 1 | 1
SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 | 0
UPDATE test SET b = $1 WHERE a = $2 | 6 | 6
@@ -1067,4 +1067,837 @@ SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%';
2
(1 row)
+--
+-- Consts merging
+--
+SET pg_stat_statements.merge_threshold = 5;
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- Normal
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 6
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 6
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) and data = $11 | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- test prepared statement
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+PREPARE query AS
+SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10);
+EXECUTE query (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ PREPARE query AS +| 1
+ SELECT * FROM test_merge WHERE id IN ($6, $7, $8, ...) |
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+DEALLOCATE query;
+-- VALUES queries
+-- Normal
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+(3 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+(4 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+(5 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+(6 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+(7 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+(8 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3)) q | 1
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4)) q | 1
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(5 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3)) q;
+ column1 | column2
+---------+---------
+ 1 | 1
+ 2 | 2
+ 3 | 3
+(3 rows)
+
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4)) q;
+ column1 | column2
+---------+---------
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+(4 rows)
+
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)) q;
+ column1 | column2
+---------+---------
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+(5 rows)
+
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)) q;
+ column1 | column2
+---------+---------
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+(6 rows)
+
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7)) q;
+ column1 | column2
+---------+---------
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+ 7 | 7
+(7 rows)
+
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8)) q;
+ column1 | column2
+---------+---------
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+ 7 | 7
+ 8 | 8
+(8 rows)
+
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9)) q;
+ column1 | column2
+---------+---------
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+ 7 | 7
+ 8 | 8
+ 9 | 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1, $2), ($3, $4), ($5, $6)) q | 1
+ SELECT * FROM (VALUES ($1, $2), ($3, $4), ($5, $6), ($7, $8)) q | 1
+ SELECT * FROM (VALUES ($1, $2), ($3, $4), ($5, $6), ($7, $8), (...)) q | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(5 rows)
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+(4 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+(5 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+(6 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+(7 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+(8 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4)) q | 1
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(4 rows)
+
+-- With gaps on the treshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+(4 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4)) q | 1
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+(5 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+(6 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- Const evaluation
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN ((1+1), (2+2), (3+3), (4+4), (5+5), (6+6), (7+7), (8+8), (9+9), (10+10));
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (($1+$2), ($3+$4), ($5+$6), ($7+$8), (...)) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (abs(1), abs(2), abs(3), abs(4), abs(5), abs(6), abs(7), abs(8), abs(9), abs(10));
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (abs($1), abs($2), abs($3), abs($4), abs(...)) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- Param evaluation, doesn't work yet
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+PREPARE query AS
+SELECT * FROM test_merge WHERE id IN (abs($1), abs($2), abs($3), abs($4), abs($5), abs($6), abs($7), abs($8), abs($9), abs($10));
+EXECUTE query (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------------------------------------------------------------------------+-------
+ PREPARE query AS +| 1
+ SELECT * FROM test_merge WHERE id IN (abs($1), abs($2), abs($3), abs($4), abs($5), abs($6), abs($7), abs($8), abs($9), abs($10)) |
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+DEALLOCATE query;
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
DROP EXTENSION pg_stat_statements;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 09433c8c96..f546ee67e4 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -54,7 +54,9 @@
#include "funcapi.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
#include "optimizer/planner.h"
+#include "optimizer/optimizer.h"
#include "parser/analyze.h"
#include "parser/parsetree.h"
#include "parser/scanner.h"
@@ -273,6 +275,7 @@ static const struct config_enum_entry track_options[] =
static int pgss_max; /* max # statements to track */
static int pgss_track; /* tracking level */
+static int pgss_merge_threshold; /* minumum number of consts for merge */
static bool pgss_track_utility; /* whether to track utility commands */
static bool pgss_track_planning; /* whether to track planning duration */
static bool pgss_save; /* whether to save stats across shutdown */
@@ -436,6 +439,19 @@ _PG_init(void)
NULL,
NULL);
+ DefineCustomIntVariable("pg_stat_statements.merge_threshold",
+ "After this number of duplicate constants start to merge them.",
+ NULL,
+ &pgss_merge_threshold,
+ 5,
+ 1,
+ INT_MAX,
+ PGC_SUSET,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
EmitWarningsOnPlaceholders("pg_stat_statements");
/*
@@ -2599,6 +2615,7 @@ generate_normalized_query(JumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool merge = false;
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -2637,12 +2654,27 @@ generate_normalized_query(JumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ if (!jstate->clocations[i].merged)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id);
+ if (merge)
+ merge = false;
+ }
+ else if (!merge)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* Merge until a non merged constant appear */
+ merge = true;
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ }
quer_loc = off + tok_len;
last_off = off;
diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
index bc3b6493e6..d863b2075f 100644
--- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql
+++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
@@ -437,4 +437,167 @@ SELECT (
SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%';
+--
+-- Consts merging
+--
+SET pg_stat_statements.merge_threshold = 5;
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- Normal
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- test prepared statement
+SELECT pg_stat_statements_reset();
+
+PREPARE query AS
+SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10);
+EXECUTE query (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+DEALLOCATE query;
+
+-- VALUES queries
+
+-- Normal
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3)) q;
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4)) q;
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)) q;
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)) q;
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7)) q;
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8)) q;
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the treshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Const evaluation
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN ((1+1), (2+2), (3+3), (4+4), (5+5), (6+6), (7+7), (8+8), (9+9), (10+10));
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (abs(1), abs(2), abs(3), abs(4), abs(5), abs(6), abs(7), abs(8), abs(9), abs(10));
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Param evaluation, doesn't work yet
+SELECT pg_stat_statements_reset();
+PREPARE query AS
+SELECT * FROM test_merge WHERE id IN (abs($1), abs($2), abs($3), abs($4), abs($5), abs($6), abs($7), abs($8), abs($9), abs($10));
+EXECUTE query (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+DEALLOCATE query;
+
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
DROP EXTENSION pg_stat_statements;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index ee731044b6..2dd7c40749 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3539,6 +3539,19 @@ static struct config_int ConfigureNamesInt[] =
check_client_connection_check_interval, NULL, NULL
},
+ {
+ {"const_merge_threshold", PGC_SUSET, STATS_MONITORING,
+ gettext_noop("Sets the minimal numer of constants in an array"
+ " after which they will be merged"),
+ gettext_noop("Computing query id for an array of constants"
+ " will produce the same id for all arrays with length"
+ " larger than this value."),
+ },
+ &const_merge_threshold,
+ 5, 0, INT_MAX / 2,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/queryjumble.c b/src/backend/utils/misc/queryjumble.c
index 9f2cd1f127..a9d09d08a0 100644
--- a/src/backend/utils/misc/queryjumble.c
+++ b/src/backend/utils/misc/queryjumble.c
@@ -42,6 +42,9 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+/* */
+int const_merge_threshold = 5;
+
/* True when compute_query_id is ON, or AUTO and a module requests them */
bool query_id_enabled = false;
@@ -52,7 +55,8 @@ static void JumbleQueryInternal(JumbleState *jstate, Query *query);
static void JumbleRangeTable(JumbleState *jstate, List *rtable);
static void JumbleRowMarks(JumbleState *jstate, List *rowMarks);
static void JumbleExpr(JumbleState *jstate, Node *node);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static bool JumbleExprList(JumbleState *jstate, Node *node);
+static void RecordConstLocation(JumbleState *jstate, int location, bool merged);
/*
* Given a possibly multi-statement source string, confine our attention to the
@@ -119,7 +123,7 @@ JumbleQuery(Query *query, const char *querytext)
jstate->jumble_len = 0;
jstate->clocations_buf_size = 32;
jstate->clocations = (LocationLen *)
- palloc(jstate->clocations_buf_size * sizeof(LocationLen));
+ palloc0(jstate->clocations_buf_size * sizeof(LocationLen));
jstate->clocations_count = 0;
jstate->highest_extern_param_id = 0;
@@ -297,7 +301,7 @@ JumbleRangeTable(JumbleState *jstate, List *rtable)
JumbleExpr(jstate, (Node *) rte->tablefunc);
break;
case RTE_VALUES:
- JumbleExpr(jstate, (Node *) rte->values_lists);
+ JumbleExprList(jstate, (Node *) rte->values_lists);
break;
case RTE_CTE:
@@ -341,6 +345,261 @@ JumbleRowMarks(JumbleState *jstate, List *rowMarks)
}
}
+/*
+ * find_const_walker
+ * Locate all the Const nodes in an expression tree.
+ *
+ * Caller must provide an empty list where constants will be collected.
+ */
+static bool
+find_const_walker(Node *node, List **constants)
+{
+ if (node == NULL)
+ return false;
+
+ if (IsA(node, Const))
+ {
+ *constants = lappend(*constants, (Const *) node);
+ return false;
+ }
+
+ return expression_tree_walker(node, find_const_walker, (void *) constants);
+}
+
+static bool
+JumbleExprList(JumbleState *jstate, Node *node)
+{
+ ListCell *temp;
+ Node *firstExpr = NULL;
+ bool merged = false;
+ bool allConst = true;
+ int currentExprIdx;
+ int lastExprLength = 0;
+
+ if (node == NULL)
+ return merged;
+
+ /* Guard against stack overflow due to overly complex expressions */
+ check_stack_depth();
+
+ Assert(IsA(node, List));
+ firstExpr = eval_const_expressions(NULL, (Node *) lfirst(list_head((List *) node)));
+
+ /*
+ * We always emit the node's NodeTag, then any additional fields that are
+ * considered significant, and then we recurse to any child nodes.
+ */
+ APP_JUMB(node->type);
+
+ /*
+ * If the first expression is a constant or a list of constants, try to
+ * merge the following if they're constants as well. Otherwise do
+ * JumbleExpr as usual.
+ */
+ switch (nodeTag(firstExpr))
+ {
+ case T_List:
+ currentExprIdx = 0;
+
+ foreach(temp, (List *) node)
+ {
+ List *expr = (List *) lfirst(temp);
+ ListCell *lc;
+
+ foreach(lc, expr)
+ {
+ Node * subExpr = eval_const_expressions(NULL, (Node *) lfirst(lc));
+
+ if (!IsA(subExpr, Const))
+ {
+ allConst = false;
+ break;
+ }
+ }
+
+ if (allConst && currentExprIdx >= const_merge_threshold - 1)
+ {
+ merged = true;
+
+ /*
+ * This hash is going to accumulate the following merged
+ * statements
+ */
+ if (currentExprIdx == const_merge_threshold - 1)
+ {
+ JumbleExpr(jstate, (Node *) expr);
+
+ /*
+ * An expr consisting of constants is already found,
+ * JumbleExpr must record it. Mark all the constants as
+ * merged, they will be the first merged but still
+ * present in the statement query.
+ */
+ Assert(jstate->clocations_count > lastExprLength - 1);
+ for (int i = 1; i < lastExprLength + 1; i++)
+ {
+ LocationLen *loc;
+ loc = &jstate->clocations[jstate->clocations_count - i];
+ loc->merged = true;
+ }
+ currentExprIdx++;
+ }
+ else
+ foreach(lc, expr)
+ {
+ /*
+ * eval_const_expressions does not provide real
+ * Const with valid const location, which we need
+ * for generate_normalized_query. Extract such real
+ * constants manually. We need only the last one,
+ * to find out where the current expression
+ * actually ends. */
+ Const *lastConst;
+ List *constants = NIL;
+ find_const_walker((Node *) lfirst(lc), &constants);
+ lastConst = (Const *) llast(constants);
+ RecordConstLocation(jstate, lastConst->location, true);
+ }
+
+ continue;
+ }
+
+ JumbleExpr(jstate, (Node *) expr);
+ currentExprIdx++;
+ lastExprLength = expr->length;
+ }
+ break;
+
+ case T_Const:
+ currentExprIdx = 0;
+
+ foreach(temp, (List *) node)
+ {
+ Node *expr = (Node *) lfirst(temp);
+ Node *evalExpr = eval_const_expressions(NULL, expr);
+
+ if (IsA(evalExpr, Const) && currentExprIdx >= const_merge_threshold - 1)
+ {
+ merged = true;
+
+ /*
+ * This hash is going to accumulate the following merged
+ * statements
+ */
+ if (currentExprIdx == const_merge_threshold - 1)
+ {
+ JumbleExpr(jstate, expr);
+
+ /*
+ * A const expr is already found, so JumbleExpr must
+ * record it. Mark it as merged, it will be the first
+ * merged but still present in the statement query.
+ */
+ Assert(jstate->clocations_count > lastExprLength - 1);
+ for (int i = 1; i < lastExprLength + 1; i++)
+ {
+ LocationLen *loc;
+ loc = &jstate->clocations[jstate->clocations_count - i];
+ loc->merged = true;
+ }
+ currentExprIdx++;
+ }
+ else
+ {
+ /*
+ * eval_const_expressions does not provide real
+ * Const with valid const location, which we need
+ * for generate_normalized_query. Extract such real
+ * constants manually. Take into account that even with
+ * a single expression it could potentially contains
+ * many constants, we need only the last one, to find
+ * out where the current expression actually ends.
+ */
+ Const *lastConst;
+ List *constants = NIL;
+ find_const_walker(expr, &constants);
+ lastConst = (Const *) llast(constants);
+
+ RecordConstLocation(jstate, lastConst->location, true);
+ }
+
+ continue;
+ }
+
+ JumbleExpr(jstate, expr);
+ currentExprIdx++;
+
+ if (currentExprIdx == const_merge_threshold -1)
+ {
+ // The next expression will be eligible for merging check.
+ // For it to happen correctly remember the number of
+ // constants in the previous expression.
+ List *constants = NIL;
+ find_const_walker(expr, &constants);
+ lastExprLength = constants->length;
+ }
+ }
+ break;
+
+ case T_Param:
+ currentExprIdx = 0;
+
+ foreach(temp, (List *) node)
+ {
+ Node *expr = (Node *) lfirst(temp);
+ Param *p = (Param *) expr;
+
+ if (!equal(expr, firstExpr) && IsA(expr, Param) &&
+ currentExprIdx >= const_merge_threshold - 1)
+ {
+
+ merged = true;
+
+ /*
+ * This hash is going to accumulate the following merged
+ * statements
+ */
+ if (currentExprIdx == const_merge_threshold - 1)
+ {
+ JumbleExpr(jstate, expr);
+
+ /*
+ * A const expr is already found, so JumbleExpr must
+ * record it. Mark it as merged, it will be the first
+ * merged but still present in the statement query.
+ */
+ Assert(jstate->clocations_count > 0);
+ jstate->clocations[jstate->clocations_count - 1].merged = true;
+ currentExprIdx++;
+ }
+ else
+ RecordConstLocation(jstate, p->location, true);
+
+ continue;
+ }
+
+ JumbleExpr(jstate, expr);
+
+ /*
+ * To allow merging of parameters as well in
+ * generate_normalized_query, remember it as a constant.
+ */
+ RecordConstLocation(jstate, p->location, false);
+ currentExprIdx++;
+ }
+ break;
+
+ default:
+ foreach(temp, (List *) node)
+ {
+ JumbleExpr(jstate, (Node *) lfirst(temp));
+ }
+ break;
+ }
+
+ return merged;
+}
+
/*
* Jumble an expression tree
*
@@ -390,7 +649,7 @@ JumbleExpr(JumbleState *jstate, Node *node)
/* We jumble only the constant's type, not its value */
APP_JUMB(c->consttype);
/* Also, record its parse location for query normalization */
- RecordConstLocation(jstate, c->location);
+ RecordConstLocation(jstate, c->location, false);
}
break;
case T_Param:
@@ -579,7 +838,7 @@ JumbleExpr(JumbleState *jstate, Node *node)
}
break;
case T_ArrayExpr:
- JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements);
+ JumbleExprList(jstate, (Node *) ((ArrayExpr *) node)->elements);
break;
case T_RowExpr:
JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args);
@@ -832,11 +1091,11 @@ JumbleExpr(JumbleState *jstate, Node *node)
}
/*
- * Record location of constant within query string of query tree
+ * Record location of constant or a parameter within query string of query tree
* that is currently being walked.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -851,6 +1110,7 @@ RecordConstLocation(JumbleState *jstate, int location)
sizeof(LocationLen));
}
jstate->clocations[jstate->clocations_count].location = location;
+ jstate->clocations[jstate->clocations_count].merged = merged;
/* initialize lengths to -1 to simplify third-party module usage */
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
diff --git a/src/include/utils/queryjumble.h b/src/include/utils/queryjumble.h
index 1f4d062bab..ea9f6d8ca1 100644
--- a/src/include/utils/queryjumble.h
+++ b/src/include/utils/queryjumble.h
@@ -15,6 +15,8 @@
#define QUERYJUBLE_H
#include "nodes/parsenodes.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/optimizer.h"
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
@@ -25,6 +27,8 @@ typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+ bool merged; /* whether or not the location was marked as
+ duplicate */
} LocationLen;
/*
@@ -39,7 +43,10 @@ typedef struct JumbleState
/* Number of bytes used in jumble[] */
Size jumble_len;
- /* Array of locations of constants that should be removed */
+ /*
+ * Array of locations of constants that should be removed, or parameters
+ * that are already replaced, but could be also processed to be merged
+ */
LocationLen *clocations;
/* Allocated length of clocations array */
@@ -62,7 +69,7 @@ typedef enum
/* GUC parameters */
extern int compute_query_id;
-
+extern int const_merge_threshold;
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query, const char *querytext);
--
2.26.3
On Tue, Jun 15, 2021 at 05:18:50PM +0200, Dmitry Dolgov wrote:
On Thu, Mar 18, 2021 at 04:50:02PM +0100, Dmitry Dolgov wrote:
On Thu, Mar 18, 2021 at 09:38:09AM -0400, David Steele wrote:
On 1/5/21 10:51 AM, Zhihong Yu wrote:+ int lastExprLenght = 0;
Did you mean to name the variable lastExprLenghth ?
w.r.t. extracting to helper method, the second and third
if (currentExprIdx == pgss_merge_threshold - 1) blocks are similar.
It is up to you whether to create the helper method.
I am fine with the current formation.Dmitry, thoughts on this review?
Oh, right. lastExprLenghth is obviously a typo, and as we agreed that
the helper is not strictly necessary I wanted to wait a bit hoping for
more feedback and eventually to post an accumulated patch. Doesn't make
sense to post another version only to fix one typo :)Hi,
I've prepared a new rebased version to deal with the new way of
computing query id, but as always there is one tricky part. From what I
understand, now an external module can provide custom implementation for
query id computation algorithm. It seems natural to think this machinery
could be used instead of patch in the thread, i.e. one could create a
custom logic that will enable constants collapsing as needed, so that
same queries with different number of constants in an array will be
hashed into the same record.But there is a limitation in how such queries will be normalized
afterwards — to reduce level of surprise it's necessary to display the
fact that a certain query in fact had more constants that are showed in
pgss record. Ideally LocationLen needs to carry some bits of information
on what exactly could be skipped, and generate_normalized_query needs to
understand that, both are not reachable for an external module with
custom query id logic (without replicating significant part of the
existing code). Hence, a new version of the patch.
Forgot to mention a couple of people who already reviewed the patch.
Attachments:
v4-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/x-diff; charset=us-asciiDownload
From adb36f6db4e9c923835750134a3f57543a5a911c Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Thu, 10 Jun 2021 13:15:35 +0200
Subject: [PATCH v4] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on number of parameters, because every element of ArrayExpr is
jumbled. Make Consts (or any expression that could be reduced to a
Const) contribute nothing to the jumble hash if they're part of a series
and at position further that specified threshold. Do the same for
similar queries with VALUES as well.
Reviewed-by: Zhihong Yu, Sergey Dudoladov
Tested-by: Chengxi Sun
---
.../expected/pg_stat_statements.out | 835 +++++++++++++++++-
.../pg_stat_statements/pg_stat_statements.c | 42 +-
.../sql/pg_stat_statements.sql | 163 ++++
src/backend/utils/misc/guc.c | 13 +
src/backend/utils/misc/queryjumble.c | 274 +++++-
src/include/utils/queryjumble.h | 11 +-
6 files changed, 1323 insertions(+), 15 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out
index 40b5109b55..3fc1978066 100644
--- a/contrib/pg_stat_statements/expected/pg_stat_statements.out
+++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out
@@ -205,7 +205,7 @@ SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
INSERT INTO test VALUES(generate_series($1, $2), $3) | 1 | 10
SELECT * FROM test ORDER BY a | 1 | 12
SELECT * FROM test WHERE a > $1 ORDER BY a | 2 | 4
- SELECT * FROM test WHERE a IN ($1, $2, $3, $4, $5) | 1 | 8
+ SELECT * FROM test WHERE a IN ($1, $2, $3, $4, ...) | 1 | 8
SELECT pg_stat_statements_reset() | 1 | 1
SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 | 0
UPDATE test SET b = $1 WHERE a = $2 | 6 | 6
@@ -1067,4 +1067,837 @@ SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%';
2
(1 row)
+--
+-- Consts merging
+--
+SET pg_stat_statements.merge_threshold = 5;
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- Normal
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 6
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 6
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) and data = $11 | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- test prepared statement
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+PREPARE query AS
+SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10);
+EXECUTE query (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ PREPARE query AS +| 1
+ SELECT * FROM test_merge WHERE id IN ($6, $7, $8, ...) |
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+DEALLOCATE query;
+-- VALUES queries
+-- Normal
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+(3 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+(4 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+(5 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+(6 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+(7 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+(8 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3)) q | 1
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4)) q | 1
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(5 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3)) q;
+ column1 | column2
+---------+---------
+ 1 | 1
+ 2 | 2
+ 3 | 3
+(3 rows)
+
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4)) q;
+ column1 | column2
+---------+---------
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+(4 rows)
+
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)) q;
+ column1 | column2
+---------+---------
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+(5 rows)
+
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)) q;
+ column1 | column2
+---------+---------
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+(6 rows)
+
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7)) q;
+ column1 | column2
+---------+---------
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+ 7 | 7
+(7 rows)
+
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8)) q;
+ column1 | column2
+---------+---------
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+ 7 | 7
+ 8 | 8
+(8 rows)
+
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9)) q;
+ column1 | column2
+---------+---------
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+ 7 | 7
+ 8 | 8
+ 9 | 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1, $2), ($3, $4), ($5, $6)) q | 1
+ SELECT * FROM (VALUES ($1, $2), ($3, $4), ($5, $6), ($7, $8)) q | 1
+ SELECT * FROM (VALUES ($1, $2), ($3, $4), ($5, $6), ($7, $8), (...)) q | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(5 rows)
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+(4 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+(5 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+(6 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+(7 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+(8 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4)) q | 1
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(4 rows)
+
+-- With gaps on the treshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+(4 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4)) q | 1
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+(5 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+(6 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- Const evaluation
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN ((1+1), (2+2), (3+3), (4+4), (5+5), (6+6), (7+7), (8+8), (9+9), (10+10));
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (($1+$2), ($3+$4), ($5+$6), ($7+$8), (...)) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (abs(1), abs(2), abs(3), abs(4), abs(5), abs(6), abs(7), abs(8), abs(9), abs(10));
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (abs($1), abs($2), abs($3), abs($4), abs(...)) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- Param evaluation, doesn't work yet
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+PREPARE query AS
+SELECT * FROM test_merge WHERE id IN (abs($1), abs($2), abs($3), abs($4), abs($5), abs($6), abs($7), abs($8), abs($9), abs($10));
+EXECUTE query (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------------------------------------------------------------------------+-------
+ PREPARE query AS +| 1
+ SELECT * FROM test_merge WHERE id IN (abs($1), abs($2), abs($3), abs($4), abs($5), abs($6), abs($7), abs($8), abs($9), abs($10)) |
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+DEALLOCATE query;
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
DROP EXTENSION pg_stat_statements;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 09433c8c96..f546ee67e4 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -54,7 +54,9 @@
#include "funcapi.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
#include "optimizer/planner.h"
+#include "optimizer/optimizer.h"
#include "parser/analyze.h"
#include "parser/parsetree.h"
#include "parser/scanner.h"
@@ -273,6 +275,7 @@ static const struct config_enum_entry track_options[] =
static int pgss_max; /* max # statements to track */
static int pgss_track; /* tracking level */
+static int pgss_merge_threshold; /* minumum number of consts for merge */
static bool pgss_track_utility; /* whether to track utility commands */
static bool pgss_track_planning; /* whether to track planning duration */
static bool pgss_save; /* whether to save stats across shutdown */
@@ -436,6 +439,19 @@ _PG_init(void)
NULL,
NULL);
+ DefineCustomIntVariable("pg_stat_statements.merge_threshold",
+ "After this number of duplicate constants start to merge them.",
+ NULL,
+ &pgss_merge_threshold,
+ 5,
+ 1,
+ INT_MAX,
+ PGC_SUSET,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
EmitWarningsOnPlaceholders("pg_stat_statements");
/*
@@ -2599,6 +2615,7 @@ generate_normalized_query(JumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool merge = false;
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -2637,12 +2654,27 @@ generate_normalized_query(JumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ if (!jstate->clocations[i].merged)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id);
+ if (merge)
+ merge = false;
+ }
+ else if (!merge)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* Merge until a non merged constant appear */
+ merge = true;
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ }
quer_loc = off + tok_len;
last_off = off;
diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
index bc3b6493e6..d863b2075f 100644
--- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql
+++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
@@ -437,4 +437,167 @@ SELECT (
SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%';
+--
+-- Consts merging
+--
+SET pg_stat_statements.merge_threshold = 5;
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- Normal
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- test prepared statement
+SELECT pg_stat_statements_reset();
+
+PREPARE query AS
+SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10);
+EXECUTE query (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+DEALLOCATE query;
+
+-- VALUES queries
+
+-- Normal
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3)) q;
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4)) q;
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)) q;
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)) q;
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7)) q;
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8)) q;
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the treshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Const evaluation
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN ((1+1), (2+2), (3+3), (4+4), (5+5), (6+6), (7+7), (8+8), (9+9), (10+10));
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (abs(1), abs(2), abs(3), abs(4), abs(5), abs(6), abs(7), abs(8), abs(9), abs(10));
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Param evaluation, doesn't work yet
+SELECT pg_stat_statements_reset();
+PREPARE query AS
+SELECT * FROM test_merge WHERE id IN (abs($1), abs($2), abs($3), abs($4), abs($5), abs($6), abs($7), abs($8), abs($9), abs($10));
+EXECUTE query (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+DEALLOCATE query;
+
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
DROP EXTENSION pg_stat_statements;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index ee731044b6..2dd7c40749 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3539,6 +3539,19 @@ static struct config_int ConfigureNamesInt[] =
check_client_connection_check_interval, NULL, NULL
},
+ {
+ {"const_merge_threshold", PGC_SUSET, STATS_MONITORING,
+ gettext_noop("Sets the minimal numer of constants in an array"
+ " after which they will be merged"),
+ gettext_noop("Computing query id for an array of constants"
+ " will produce the same id for all arrays with length"
+ " larger than this value."),
+ },
+ &const_merge_threshold,
+ 5, 0, INT_MAX / 2,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/queryjumble.c b/src/backend/utils/misc/queryjumble.c
index 9f2cd1f127..a9d09d08a0 100644
--- a/src/backend/utils/misc/queryjumble.c
+++ b/src/backend/utils/misc/queryjumble.c
@@ -42,6 +42,9 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+/* */
+int const_merge_threshold = 5;
+
/* True when compute_query_id is ON, or AUTO and a module requests them */
bool query_id_enabled = false;
@@ -52,7 +55,8 @@ static void JumbleQueryInternal(JumbleState *jstate, Query *query);
static void JumbleRangeTable(JumbleState *jstate, List *rtable);
static void JumbleRowMarks(JumbleState *jstate, List *rowMarks);
static void JumbleExpr(JumbleState *jstate, Node *node);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static bool JumbleExprList(JumbleState *jstate, Node *node);
+static void RecordConstLocation(JumbleState *jstate, int location, bool merged);
/*
* Given a possibly multi-statement source string, confine our attention to the
@@ -119,7 +123,7 @@ JumbleQuery(Query *query, const char *querytext)
jstate->jumble_len = 0;
jstate->clocations_buf_size = 32;
jstate->clocations = (LocationLen *)
- palloc(jstate->clocations_buf_size * sizeof(LocationLen));
+ palloc0(jstate->clocations_buf_size * sizeof(LocationLen));
jstate->clocations_count = 0;
jstate->highest_extern_param_id = 0;
@@ -297,7 +301,7 @@ JumbleRangeTable(JumbleState *jstate, List *rtable)
JumbleExpr(jstate, (Node *) rte->tablefunc);
break;
case RTE_VALUES:
- JumbleExpr(jstate, (Node *) rte->values_lists);
+ JumbleExprList(jstate, (Node *) rte->values_lists);
break;
case RTE_CTE:
@@ -341,6 +345,261 @@ JumbleRowMarks(JumbleState *jstate, List *rowMarks)
}
}
+/*
+ * find_const_walker
+ * Locate all the Const nodes in an expression tree.
+ *
+ * Caller must provide an empty list where constants will be collected.
+ */
+static bool
+find_const_walker(Node *node, List **constants)
+{
+ if (node == NULL)
+ return false;
+
+ if (IsA(node, Const))
+ {
+ *constants = lappend(*constants, (Const *) node);
+ return false;
+ }
+
+ return expression_tree_walker(node, find_const_walker, (void *) constants);
+}
+
+static bool
+JumbleExprList(JumbleState *jstate, Node *node)
+{
+ ListCell *temp;
+ Node *firstExpr = NULL;
+ bool merged = false;
+ bool allConst = true;
+ int currentExprIdx;
+ int lastExprLength = 0;
+
+ if (node == NULL)
+ return merged;
+
+ /* Guard against stack overflow due to overly complex expressions */
+ check_stack_depth();
+
+ Assert(IsA(node, List));
+ firstExpr = eval_const_expressions(NULL, (Node *) lfirst(list_head((List *) node)));
+
+ /*
+ * We always emit the node's NodeTag, then any additional fields that are
+ * considered significant, and then we recurse to any child nodes.
+ */
+ APP_JUMB(node->type);
+
+ /*
+ * If the first expression is a constant or a list of constants, try to
+ * merge the following if they're constants as well. Otherwise do
+ * JumbleExpr as usual.
+ */
+ switch (nodeTag(firstExpr))
+ {
+ case T_List:
+ currentExprIdx = 0;
+
+ foreach(temp, (List *) node)
+ {
+ List *expr = (List *) lfirst(temp);
+ ListCell *lc;
+
+ foreach(lc, expr)
+ {
+ Node * subExpr = eval_const_expressions(NULL, (Node *) lfirst(lc));
+
+ if (!IsA(subExpr, Const))
+ {
+ allConst = false;
+ break;
+ }
+ }
+
+ if (allConst && currentExprIdx >= const_merge_threshold - 1)
+ {
+ merged = true;
+
+ /*
+ * This hash is going to accumulate the following merged
+ * statements
+ */
+ if (currentExprIdx == const_merge_threshold - 1)
+ {
+ JumbleExpr(jstate, (Node *) expr);
+
+ /*
+ * An expr consisting of constants is already found,
+ * JumbleExpr must record it. Mark all the constants as
+ * merged, they will be the first merged but still
+ * present in the statement query.
+ */
+ Assert(jstate->clocations_count > lastExprLength - 1);
+ for (int i = 1; i < lastExprLength + 1; i++)
+ {
+ LocationLen *loc;
+ loc = &jstate->clocations[jstate->clocations_count - i];
+ loc->merged = true;
+ }
+ currentExprIdx++;
+ }
+ else
+ foreach(lc, expr)
+ {
+ /*
+ * eval_const_expressions does not provide real
+ * Const with valid const location, which we need
+ * for generate_normalized_query. Extract such real
+ * constants manually. We need only the last one,
+ * to find out where the current expression
+ * actually ends. */
+ Const *lastConst;
+ List *constants = NIL;
+ find_const_walker((Node *) lfirst(lc), &constants);
+ lastConst = (Const *) llast(constants);
+ RecordConstLocation(jstate, lastConst->location, true);
+ }
+
+ continue;
+ }
+
+ JumbleExpr(jstate, (Node *) expr);
+ currentExprIdx++;
+ lastExprLength = expr->length;
+ }
+ break;
+
+ case T_Const:
+ currentExprIdx = 0;
+
+ foreach(temp, (List *) node)
+ {
+ Node *expr = (Node *) lfirst(temp);
+ Node *evalExpr = eval_const_expressions(NULL, expr);
+
+ if (IsA(evalExpr, Const) && currentExprIdx >= const_merge_threshold - 1)
+ {
+ merged = true;
+
+ /*
+ * This hash is going to accumulate the following merged
+ * statements
+ */
+ if (currentExprIdx == const_merge_threshold - 1)
+ {
+ JumbleExpr(jstate, expr);
+
+ /*
+ * A const expr is already found, so JumbleExpr must
+ * record it. Mark it as merged, it will be the first
+ * merged but still present in the statement query.
+ */
+ Assert(jstate->clocations_count > lastExprLength - 1);
+ for (int i = 1; i < lastExprLength + 1; i++)
+ {
+ LocationLen *loc;
+ loc = &jstate->clocations[jstate->clocations_count - i];
+ loc->merged = true;
+ }
+ currentExprIdx++;
+ }
+ else
+ {
+ /*
+ * eval_const_expressions does not provide real
+ * Const with valid const location, which we need
+ * for generate_normalized_query. Extract such real
+ * constants manually. Take into account that even with
+ * a single expression it could potentially contains
+ * many constants, we need only the last one, to find
+ * out where the current expression actually ends.
+ */
+ Const *lastConst;
+ List *constants = NIL;
+ find_const_walker(expr, &constants);
+ lastConst = (Const *) llast(constants);
+
+ RecordConstLocation(jstate, lastConst->location, true);
+ }
+
+ continue;
+ }
+
+ JumbleExpr(jstate, expr);
+ currentExprIdx++;
+
+ if (currentExprIdx == const_merge_threshold -1)
+ {
+ // The next expression will be eligible for merging check.
+ // For it to happen correctly remember the number of
+ // constants in the previous expression.
+ List *constants = NIL;
+ find_const_walker(expr, &constants);
+ lastExprLength = constants->length;
+ }
+ }
+ break;
+
+ case T_Param:
+ currentExprIdx = 0;
+
+ foreach(temp, (List *) node)
+ {
+ Node *expr = (Node *) lfirst(temp);
+ Param *p = (Param *) expr;
+
+ if (!equal(expr, firstExpr) && IsA(expr, Param) &&
+ currentExprIdx >= const_merge_threshold - 1)
+ {
+
+ merged = true;
+
+ /*
+ * This hash is going to accumulate the following merged
+ * statements
+ */
+ if (currentExprIdx == const_merge_threshold - 1)
+ {
+ JumbleExpr(jstate, expr);
+
+ /*
+ * A const expr is already found, so JumbleExpr must
+ * record it. Mark it as merged, it will be the first
+ * merged but still present in the statement query.
+ */
+ Assert(jstate->clocations_count > 0);
+ jstate->clocations[jstate->clocations_count - 1].merged = true;
+ currentExprIdx++;
+ }
+ else
+ RecordConstLocation(jstate, p->location, true);
+
+ continue;
+ }
+
+ JumbleExpr(jstate, expr);
+
+ /*
+ * To allow merging of parameters as well in
+ * generate_normalized_query, remember it as a constant.
+ */
+ RecordConstLocation(jstate, p->location, false);
+ currentExprIdx++;
+ }
+ break;
+
+ default:
+ foreach(temp, (List *) node)
+ {
+ JumbleExpr(jstate, (Node *) lfirst(temp));
+ }
+ break;
+ }
+
+ return merged;
+}
+
/*
* Jumble an expression tree
*
@@ -390,7 +649,7 @@ JumbleExpr(JumbleState *jstate, Node *node)
/* We jumble only the constant's type, not its value */
APP_JUMB(c->consttype);
/* Also, record its parse location for query normalization */
- RecordConstLocation(jstate, c->location);
+ RecordConstLocation(jstate, c->location, false);
}
break;
case T_Param:
@@ -579,7 +838,7 @@ JumbleExpr(JumbleState *jstate, Node *node)
}
break;
case T_ArrayExpr:
- JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements);
+ JumbleExprList(jstate, (Node *) ((ArrayExpr *) node)->elements);
break;
case T_RowExpr:
JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args);
@@ -832,11 +1091,11 @@ JumbleExpr(JumbleState *jstate, Node *node)
}
/*
- * Record location of constant within query string of query tree
+ * Record location of constant or a parameter within query string of query tree
* that is currently being walked.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -851,6 +1110,7 @@ RecordConstLocation(JumbleState *jstate, int location)
sizeof(LocationLen));
}
jstate->clocations[jstate->clocations_count].location = location;
+ jstate->clocations[jstate->clocations_count].merged = merged;
/* initialize lengths to -1 to simplify third-party module usage */
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
diff --git a/src/include/utils/queryjumble.h b/src/include/utils/queryjumble.h
index 1f4d062bab..ea9f6d8ca1 100644
--- a/src/include/utils/queryjumble.h
+++ b/src/include/utils/queryjumble.h
@@ -15,6 +15,8 @@
#define QUERYJUBLE_H
#include "nodes/parsenodes.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/optimizer.h"
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
@@ -25,6 +27,8 @@ typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+ bool merged; /* whether or not the location was marked as
+ duplicate */
} LocationLen;
/*
@@ -39,7 +43,10 @@ typedef struct JumbleState
/* Number of bytes used in jumble[] */
Size jumble_len;
- /* Array of locations of constants that should be removed */
+ /*
+ * Array of locations of constants that should be removed, or parameters
+ * that are already replaced, but could be also processed to be merged
+ */
LocationLen *clocations;
/* Allocated length of clocations array */
@@ -62,7 +69,7 @@ typedef enum
/* GUC parameters */
extern int compute_query_id;
-
+extern int const_merge_threshold;
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query, const char *querytext);
--
2.26.3
On Wed, Jun 16, 2021 at 04:02:12PM +0200, Dmitry Dolgov wrote:
I've prepared a new rebased version to deal with the new way of
computing query id, but as always there is one tricky part. From what I
understand, now an external module can provide custom implementation for
query id computation algorithm. It seems natural to think this machinery
could be used instead of patch in the thread, i.e. one could create a
custom logic that will enable constants collapsing as needed, so that
same queries with different number of constants in an array will be
hashed into the same record.But there is a limitation in how such queries will be normalized
afterwards — to reduce level of surprise it's necessary to display the
fact that a certain query in fact had more constants that are showed in
pgss record. Ideally LocationLen needs to carry some bits of information
on what exactly could be skipped, and generate_normalized_query needs to
understand that, both are not reachable for an external module with
custom query id logic (without replicating significant part of the
existing code). Hence, a new version of the patch.Forgot to mention a couple of people who already reviewed the patch.
And now for something completely different, here is a new patch version.
It contains a small fix for one problem we've found during testing (one
path code was incorrectly assuming find_const_walker results).
Attachments:
v5-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/x-diff; charset=us-asciiDownload
From 8025228206518ce0db0562174932da9b9ab63c78 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Thu, 10 Jun 2021 13:15:35 +0200
Subject: [PATCH v5] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on number of parameters, because every element of ArrayExpr is
jumbled. Make Consts (or any expression that could be reduced to a
Const) contribute nothing to the jumble hash if they're part of a series
and at position further that specified threshold. Do the same for
similar queries with VALUES as well.
Reviewed-by: Zhihong Yu, Sergey Dudoladov
Tested-by: Chengxi Sun
---
.../expected/pg_stat_statements.out | 845 +++++++++++++++++-
.../pg_stat_statements/pg_stat_statements.c | 42 +-
.../sql/pg_stat_statements.sql | 170 ++++
src/backend/utils/misc/guc.c | 13 +
src/backend/utils/misc/queryjumble.c | 287 +++++-
src/include/utils/queryjumble.h | 11 +-
6 files changed, 1353 insertions(+), 15 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out
index b52d187722..140b227fb4 100644
--- a/contrib/pg_stat_statements/expected/pg_stat_statements.out
+++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out
@@ -205,7 +205,7 @@ SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
INSERT INTO test VALUES(generate_series($1, $2), $3) | 1 | 10
SELECT * FROM test ORDER BY a | 1 | 12
SELECT * FROM test WHERE a > $1 ORDER BY a | 2 | 4
- SELECT * FROM test WHERE a IN ($1, $2, $3, $4, $5) | 1 | 8
+ SELECT * FROM test WHERE a IN ($1, $2, $3, $4, ...) | 1 | 8
SELECT pg_stat_statements_reset() | 1 | 1
SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 | 0
UPDATE test SET b = $1 WHERE a = $2 | 6 | 6
@@ -1077,4 +1077,847 @@ SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%';
2
(1 row)
+--
+-- Consts merging
+--
+SET pg_stat_statements.merge_threshold = 5;
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- Normal
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 6
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 6
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) and data = $11 | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- test prepared statement
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+PREPARE query AS
+SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10);
+EXECUTE query (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ PREPARE query AS +| 1
+ SELECT * FROM test_merge WHERE id IN ($6, $7, $8, ...) |
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+DEALLOCATE query;
+-- VALUES queries
+-- Normal
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+(3 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+(4 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+(5 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+(6 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+(7 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+(8 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3)) q | 1
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4)) q | 1
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(5 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3)) q;
+ column1 | column2
+---------+---------
+ 1 | 1
+ 2 | 2
+ 3 | 3
+(3 rows)
+
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4)) q;
+ column1 | column2
+---------+---------
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+(4 rows)
+
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)) q;
+ column1 | column2
+---------+---------
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+(5 rows)
+
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)) q;
+ column1 | column2
+---------+---------
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+(6 rows)
+
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7)) q;
+ column1 | column2
+---------+---------
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+ 7 | 7
+(7 rows)
+
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8)) q;
+ column1 | column2
+---------+---------
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+ 7 | 7
+ 8 | 8
+(8 rows)
+
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9)) q;
+ column1 | column2
+---------+---------
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+ 7 | 7
+ 8 | 8
+ 9 | 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1, $2), ($3, $4), ($5, $6)) q | 1
+ SELECT * FROM (VALUES ($1, $2), ($3, $4), ($5, $6), ($7, $8)) q | 1
+ SELECT * FROM (VALUES ($1, $2), ($3, $4), ($5, $6), ($7, $8), (...)) q | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(5 rows)
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+(4 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+(5 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+(6 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+(7 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+(8 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4)) q | 1
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(4 rows)
+
+-- With gaps on the treshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+(4 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4)) q | 1
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+(5 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+(6 rows)
+
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+ column1
+---------
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+(9 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- Const evaluation
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN ((1+1), (2+2), (3+3), (4+4), (5+5), (6+6), (7+7), (8+8), (9+9), (10+10));
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (($1+$2), ($3+$4), ($5+$6), ($7+$8), (...)) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (abs(1), abs(2), abs(3), abs(4), abs(5), abs(6), abs(7), abs(8), abs(9), abs(10));
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (abs($1), abs($2), abs($3), abs($4), abs(...)) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- Param evaluation, doesn't work yet
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+PREPARE query AS
+SELECT * FROM test_merge WHERE id IN (abs($1), abs($2), abs($3), abs($4), abs($5), abs($6), abs($7), abs($8), abs($9), abs($10));
+EXECUTE query (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------------------------------------------------------------------------+-------
+ PREPARE query AS +| 1
+ SELECT * FROM test_merge WHERE id IN (abs($1), abs($2), abs($3), abs($4), abs($5), abs($6), abs($7), abs($8), abs($9), abs($10)) |
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+DEALLOCATE query;
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- Test find_const_walker
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result
+--------
+(0 rows)
+
DROP EXTENSION pg_stat_statements;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 07fe0e7cda..dbfb92fec1 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -54,7 +54,9 @@
#include "funcapi.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
#include "optimizer/planner.h"
+#include "optimizer/optimizer.h"
#include "parser/analyze.h"
#include "parser/parsetree.h"
#include "parser/scanner.h"
@@ -273,6 +275,7 @@ static const struct config_enum_entry track_options[] =
static int pgss_max; /* max # statements to track */
static int pgss_track; /* tracking level */
+static int pgss_merge_threshold; /* minumum number of consts for merge */
static bool pgss_track_utility; /* whether to track utility commands */
static bool pgss_track_planning; /* whether to track planning duration */
static bool pgss_save; /* whether to save stats across shutdown */
@@ -437,6 +440,19 @@ _PG_init(void)
NULL,
NULL);
+ DefineCustomIntVariable("pg_stat_statements.merge_threshold",
+ "After this number of duplicate constants start to merge them.",
+ NULL,
+ &pgss_merge_threshold,
+ 5,
+ 1,
+ INT_MAX,
+ PGC_SUSET,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
EmitWarningsOnPlaceholders("pg_stat_statements");
/*
@@ -2601,6 +2617,7 @@ generate_normalized_query(JumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool merge = false;
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -2639,12 +2656,27 @@ generate_normalized_query(JumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ if (!jstate->clocations[i].merged)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id);
+ if (merge)
+ merge = false;
+ }
+ else if (!merge)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* Merge until a non merged constant appear */
+ merge = true;
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ }
quer_loc = off + tok_len;
last_off = off;
diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
index dffd2c8c18..ecf8b76339 100644
--- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql
+++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
@@ -442,4 +442,174 @@ SELECT (
SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%';
+--
+-- Consts merging
+--
+SET pg_stat_statements.merge_threshold = 5;
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- Normal
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- test prepared statement
+SELECT pg_stat_statements_reset();
+
+PREPARE query AS
+SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10);
+EXECUTE query (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+DEALLOCATE query;
+
+-- VALUES queries
+
+-- Normal
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3)) q;
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4)) q;
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)) q;
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)) q;
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7)) q;
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8)) q;
+SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the treshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3), (4)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q;
+SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Const evaluation
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN ((1+1), (2+2), (3+3), (4+4), (5+5), (6+6), (7+7), (8+8), (9+9), (10+10));
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (abs(1), abs(2), abs(3), abs(4), abs(5), abs(6), abs(7), abs(8), abs(9), abs(10));
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Param evaluation, doesn't work yet
+SELECT pg_stat_statements_reset();
+PREPARE query AS
+SELECT * FROM test_merge WHERE id IN (abs($1), abs($2), abs($3), abs($4), abs($5), abs($6), abs($7), abs($8), abs($9), abs($10));
+EXECUTE query (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+DEALLOCATE query;
+
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test find_const_walker
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
DROP EXTENSION pg_stat_statements;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index d2ce4a8450..afaf8fd2e4 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3570,6 +3570,19 @@ static struct config_int ConfigureNamesInt[] =
check_client_connection_check_interval, NULL, NULL
},
+ {
+ {"const_merge_threshold", PGC_SUSET, STATS_MONITORING,
+ gettext_noop("Sets the minimal numer of constants in an array"
+ " after which they will be merged"),
+ gettext_noop("Computing query id for an array of constants"
+ " will produce the same id for all arrays with length"
+ " larger than this value."),
+ },
+ &const_merge_threshold,
+ 5, 0, INT_MAX / 2,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/queryjumble.c b/src/backend/utils/misc/queryjumble.c
index 9f2cd1f127..935196ac0b 100644
--- a/src/backend/utils/misc/queryjumble.c
+++ b/src/backend/utils/misc/queryjumble.c
@@ -42,6 +42,9 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+/* */
+int const_merge_threshold = 5;
+
/* True when compute_query_id is ON, or AUTO and a module requests them */
bool query_id_enabled = false;
@@ -52,7 +55,8 @@ static void JumbleQueryInternal(JumbleState *jstate, Query *query);
static void JumbleRangeTable(JumbleState *jstate, List *rtable);
static void JumbleRowMarks(JumbleState *jstate, List *rowMarks);
static void JumbleExpr(JumbleState *jstate, Node *node);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static bool JumbleExprList(JumbleState *jstate, Node *node);
+static void RecordConstLocation(JumbleState *jstate, int location, bool merged);
/*
* Given a possibly multi-statement source string, confine our attention to the
@@ -119,7 +123,7 @@ JumbleQuery(Query *query, const char *querytext)
jstate->jumble_len = 0;
jstate->clocations_buf_size = 32;
jstate->clocations = (LocationLen *)
- palloc(jstate->clocations_buf_size * sizeof(LocationLen));
+ palloc0(jstate->clocations_buf_size * sizeof(LocationLen));
jstate->clocations_count = 0;
jstate->highest_extern_param_id = 0;
@@ -297,7 +301,7 @@ JumbleRangeTable(JumbleState *jstate, List *rtable)
JumbleExpr(jstate, (Node *) rte->tablefunc);
break;
case RTE_VALUES:
- JumbleExpr(jstate, (Node *) rte->values_lists);
+ JumbleExprList(jstate, (Node *) rte->values_lists);
break;
case RTE_CTE:
@@ -341,6 +345,274 @@ JumbleRowMarks(JumbleState *jstate, List *rowMarks)
}
}
+/*
+ * find_const_walker
+ * Locate all the Const nodes in an expression tree.
+ *
+ * Caller must provide an empty list where constants will be collected.
+ */
+static bool
+find_const_walker(Node *node, List **constants)
+{
+ if (node == NULL)
+ return false;
+
+ if (IsA(node, Const))
+ {
+ *constants = lappend(*constants, (Const *) node);
+ return false;
+ }
+
+ return expression_tree_walker(node, find_const_walker, (void *) constants);
+}
+
+static bool
+JumbleExprList(JumbleState *jstate, Node *node)
+{
+ ListCell *temp;
+ Node *firstExpr = NULL;
+ bool merged = false;
+ bool allConst = true;
+ int currentExprIdx;
+ int lastExprLength = 0;
+
+ if (node == NULL)
+ return merged;
+
+ /* Guard against stack overflow due to overly complex expressions */
+ check_stack_depth();
+
+ Assert(IsA(node, List));
+ firstExpr = eval_const_expressions(NULL, (Node *) lfirst(list_head((List *) node)));
+
+ /*
+ * We always emit the node's NodeTag, then any additional fields that are
+ * considered significant, and then we recurse to any child nodes.
+ */
+ APP_JUMB(node->type);
+
+ /*
+ * If the first expression is a constant or a list of constants, try to
+ * merge the following if they're constants as well. Otherwise do
+ * JumbleExpr as usual.
+ */
+ switch (nodeTag(firstExpr))
+ {
+ case T_List:
+ currentExprIdx = 0;
+
+ foreach(temp, (List *) node)
+ {
+ List *expr = (List *) lfirst(temp);
+ ListCell *lc;
+
+ foreach(lc, expr)
+ {
+ Node * subExpr = eval_const_expressions(NULL, (Node *) lfirst(lc));
+
+ if (!IsA(subExpr, Const))
+ {
+ allConst = false;
+ break;
+ }
+ }
+
+ if (allConst && currentExprIdx >= const_merge_threshold - 1)
+ {
+ merged = true;
+
+ /*
+ * This hash is going to accumulate the following merged
+ * statements
+ */
+ if (currentExprIdx == const_merge_threshold - 1)
+ {
+ JumbleExpr(jstate, (Node *) expr);
+
+ /*
+ * An expr consisting of constants is already found,
+ * JumbleExpr must record it. Mark all the constants as
+ * merged, they will be the first merged but still
+ * present in the statement query.
+ */
+ Assert(jstate->clocations_count > lastExprLength - 1);
+ for (int i = 1; i < lastExprLength + 1; i++)
+ {
+ LocationLen *loc;
+ loc = &jstate->clocations[jstate->clocations_count - i];
+ loc->merged = true;
+ }
+ currentExprIdx++;
+ }
+ else
+ foreach(lc, expr)
+ {
+ /*
+ * eval_const_expressions does not provide real
+ * Const with valid const location, which we need
+ * for generate_normalized_query. Extract such real
+ * constants manually. We need only the last one,
+ * to find out where the current expression
+ * actually ends. */
+ Const *lastConst;
+ List *constants = NIL;
+ find_const_walker((Node *) lfirst(lc), &constants);
+
+ /* We should be able to find some constants, as
+ * they were discovered before. */
+ Assert(constants != NIL);
+ lastConst = (Const *) llast(constants);
+ RecordConstLocation(jstate, lastConst->location, true);
+ }
+
+ continue;
+ }
+
+ JumbleExpr(jstate, (Node *) expr);
+ currentExprIdx++;
+ lastExprLength = expr->length;
+ }
+ break;
+
+ case T_Const:
+ currentExprIdx = 0;
+
+ foreach(temp, (List *) node)
+ {
+ Node *expr = (Node *) lfirst(temp);
+ Node *evalExpr = eval_const_expressions(NULL, expr);
+
+ if (IsA(evalExpr, Const) && currentExprIdx >= const_merge_threshold - 1)
+ {
+ merged = true;
+
+ /*
+ * This hash is going to accumulate the following merged
+ * statements
+ */
+ if (currentExprIdx == const_merge_threshold - 1)
+ {
+ JumbleExpr(jstate, expr);
+
+ /*
+ * A const expr is already found, so JumbleExpr must
+ * record it. Mark it as merged, it will be the first
+ * merged but still present in the statement query.
+ */
+ Assert(jstate->clocations_count > lastExprLength - 1);
+ for (int i = 1; i < lastExprLength + 1; i++)
+ {
+ LocationLen *loc;
+ loc = &jstate->clocations[jstate->clocations_count - i];
+ loc->merged = true;
+ }
+ currentExprIdx++;
+ }
+ else
+ {
+ /*
+ * eval_const_expressions does not provide real
+ * Const with valid const location, which we need
+ * for generate_normalized_query. Extract such real
+ * constants manually. Take into account that even with
+ * a single expression it could potentially contains
+ * many constants, we need only the last one, to find
+ * out where the current expression actually ends.
+ */
+ Const *lastConst;
+ List *constants = NIL;
+ find_const_walker(expr, &constants);
+
+ /* We should be able to find some constants, as
+ * they were discovered before. */
+ Assert(constants != NIL);
+ lastConst = (Const *) llast(constants);
+ RecordConstLocation(jstate, lastConst->location, true);
+ }
+
+ continue;
+ }
+
+ JumbleExpr(jstate, expr);
+ currentExprIdx++;
+
+ if (currentExprIdx == const_merge_threshold -1)
+ {
+ // The next expression will be eligible for merging check.
+ // For it to happen correctly remember the number of
+ // constants in the previous expression.
+ List *constants = NIL;
+ find_const_walker(expr, &constants);
+
+ /* The expression we work with here could be anything, so
+ * no constants found is a possible outcome. */
+ if (constants != NIL)
+ lastExprLength = constants->length;
+ else
+ lastExprLength = 1;
+ }
+ }
+ break;
+
+ case T_Param:
+ currentExprIdx = 0;
+
+ foreach(temp, (List *) node)
+ {
+ Node *expr = (Node *) lfirst(temp);
+ Param *p = (Param *) expr;
+
+ if (!equal(expr, firstExpr) && IsA(expr, Param) &&
+ currentExprIdx >= const_merge_threshold - 1)
+ {
+
+ merged = true;
+
+ /*
+ * This hash is going to accumulate the following merged
+ * statements
+ */
+ if (currentExprIdx == const_merge_threshold - 1)
+ {
+ JumbleExpr(jstate, expr);
+
+ /*
+ * A const expr is already found, so JumbleExpr must
+ * record it. Mark it as merged, it will be the first
+ * merged but still present in the statement query.
+ */
+ Assert(jstate->clocations_count > 0);
+ jstate->clocations[jstate->clocations_count - 1].merged = true;
+ currentExprIdx++;
+ }
+ else
+ RecordConstLocation(jstate, p->location, true);
+
+ continue;
+ }
+
+ JumbleExpr(jstate, expr);
+
+ /*
+ * To allow merging of parameters as well in
+ * generate_normalized_query, remember it as a constant.
+ */
+ RecordConstLocation(jstate, p->location, false);
+ currentExprIdx++;
+ }
+ break;
+
+ default:
+ foreach(temp, (List *) node)
+ {
+ JumbleExpr(jstate, (Node *) lfirst(temp));
+ }
+ break;
+ }
+
+ return merged;
+}
+
/*
* Jumble an expression tree
*
@@ -390,7 +662,7 @@ JumbleExpr(JumbleState *jstate, Node *node)
/* We jumble only the constant's type, not its value */
APP_JUMB(c->consttype);
/* Also, record its parse location for query normalization */
- RecordConstLocation(jstate, c->location);
+ RecordConstLocation(jstate, c->location, false);
}
break;
case T_Param:
@@ -579,7 +851,7 @@ JumbleExpr(JumbleState *jstate, Node *node)
}
break;
case T_ArrayExpr:
- JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements);
+ JumbleExprList(jstate, (Node *) ((ArrayExpr *) node)->elements);
break;
case T_RowExpr:
JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args);
@@ -832,11 +1104,11 @@ JumbleExpr(JumbleState *jstate, Node *node)
}
/*
- * Record location of constant within query string of query tree
+ * Record location of constant or a parameter within query string of query tree
* that is currently being walked.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -851,6 +1123,7 @@ RecordConstLocation(JumbleState *jstate, int location)
sizeof(LocationLen));
}
jstate->clocations[jstate->clocations_count].location = location;
+ jstate->clocations[jstate->clocations_count].merged = merged;
/* initialize lengths to -1 to simplify third-party module usage */
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
diff --git a/src/include/utils/queryjumble.h b/src/include/utils/queryjumble.h
index 7af6652f3e..99a671e118 100644
--- a/src/include/utils/queryjumble.h
+++ b/src/include/utils/queryjumble.h
@@ -15,6 +15,8 @@
#define QUERYJUBLE_H
#include "nodes/parsenodes.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/optimizer.h"
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
@@ -25,6 +27,8 @@ typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+ bool merged; /* whether or not the location was marked as
+ duplicate */
} LocationLen;
/*
@@ -39,7 +43,10 @@ typedef struct JumbleState
/* Number of bytes used in jumble[] */
Size jumble_len;
- /* Array of locations of constants that should be removed */
+ /*
+ * Array of locations of constants that should be removed, or parameters
+ * that are already replaced, but could be also processed to be merged
+ */
LocationLen *clocations;
/* Allocated length of clocations array */
@@ -62,7 +69,7 @@ enum ComputeQueryIdType
/* GUC parameters */
extern int compute_query_id;
-
+extern int const_merge_threshold;
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query, const char *querytext);
--
2.26.3
On Thu, Sep 30, 2021 at 6:49 AM Dmitry Dolgov <9erthalion6@gmail.com> wrote:
On Wed, Jun 16, 2021 at 04:02:12PM +0200, Dmitry Dolgov wrote:
I've prepared a new rebased version to deal with the new way of
computing query id, but as always there is one tricky part. From what I
understand, now an external module can provide custom implementationfor
query id computation algorithm. It seems natural to think this
machinery
could be used instead of patch in the thread, i.e. one could create a
custom logic that will enable constants collapsing as needed, so that
same queries with different number of constants in an array will be
hashed into the same record.But there is a limitation in how such queries will be normalized
afterwards — to reduce level of surprise it's necessary to display the
fact that a certain query in fact had more constants that are showed in
pgss record. Ideally LocationLen needs to carry some bits ofinformation
on what exactly could be skipped, and generate_normalized_query needs
to
understand that, both are not reachable for an external module with
custom query id logic (without replicating significant part of the
existing code). Hence, a new version of the patch.Forgot to mention a couple of people who already reviewed the patch.
And now for something completely different, here is a new patch version.
It contains a small fix for one problem we've found during testing (one
path code was incorrectly assuming find_const_walker results).
Hi,
bq. and at position further that specified threshold.
that specified threshold -> than specified threshold
Cheers
On Thu, Sep 30, 2021 at 08:03:16AM -0700, Zhihong Yu wrote:
On Thu, Sep 30, 2021 at 6:49 AM Dmitry Dolgov <9erthalion6@gmail.com> wrote:On Wed, Jun 16, 2021 at 04:02:12PM +0200, Dmitry Dolgov wrote:
I've prepared a new rebased version to deal with the new way of
computing query id, but as always there is one tricky part. From what I
understand, now an external module can provide custom implementationfor
query id computation algorithm. It seems natural to think this
machinery
could be used instead of patch in the thread, i.e. one could create a
custom logic that will enable constants collapsing as needed, so that
same queries with different number of constants in an array will be
hashed into the same record.But there is a limitation in how such queries will be normalized
afterwards — to reduce level of surprise it's necessary to display the
fact that a certain query in fact had more constants that are showed in
pgss record. Ideally LocationLen needs to carry some bits ofinformation
on what exactly could be skipped, and generate_normalized_query needs
to
understand that, both are not reachable for an external module with
custom query id logic (without replicating significant part of the
existing code). Hence, a new version of the patch.Forgot to mention a couple of people who already reviewed the patch.
And now for something completely different, here is a new patch version.
It contains a small fix for one problem we've found during testing (one
path code was incorrectly assuming find_const_walker results).Hi,
bq. and at position further that specified threshold.
that specified threshold -> than specified threshold
You mean in the patch commit message, nowhere else, right? Yep, my spell
checker didn't catch that, thanks for noticing!
Dmitry Dolgov <9erthalion6@gmail.com> writes:
And now for something completely different, here is a new patch version.
It contains a small fix for one problem we've found during testing (one
path code was incorrectly assuming find_const_walker results).
I've been saying from day one that pushing the query-hashing code into the
core was a bad idea, and I think this patch perfectly illustrates why.
We can debate whether the rules proposed here are good for
pg_stat_statements or not, but it seems inevitable that they will be a
disaster for some other consumers of the query hash. In particular,
dropping external parameters from the hash seems certain to break
something for somebody --- do you really think that a query with two int
parameters is equivalent to one with five float parameters for all
query-identifying purposes?
I can see the merits of allowing different numbers of IN elements
to be considered equivalent for pg_stat_statements, but this patch
seems to go far beyond that basic idea, and I fear the side-effects
will be very bad.
Also, calling eval_const_expressions in the query jumbler is flat
out unacceptable. There is way too much code that could be reached
that way (more or less the entire executor, to start with). I
don't have a lot of faith that it'd never modify the input tree,
either.
regards, tom lane
On 1/5/22 4:02 AM, Tom Lane wrote:
Dmitry Dolgov <9erthalion6@gmail.com> writes:
And now for something completely different, here is a new patch version.
It contains a small fix for one problem we've found during testing (one
path code was incorrectly assuming find_const_walker results).I've been saying from day one that pushing the query-hashing code into the
core was a bad idea, and I think this patch perfectly illustrates why.
We can debate whether the rules proposed here are good for
pg_stat_statements or not, but it seems inevitable that they will be a
disaster for some other consumers of the query hash. In particular,
dropping external parameters from the hash seems certain to break
something for somebody
+1.
In a couple of extensions I use different logic of query jumbling - hash
value is more stable in some cases than in default implementation. For
example, it should be stable to permutations in 'FROM' section of a query.
And If anyone subtly changes jumbling logic when the extension is
active, the instance could get huge performance issues.
Let me suggest, that the core should allow an extension at least to
detect such interference between extensions. Maybe hook could be
replaced with callback to allow extension see an queryid with underlying
generation logic what it expects.
--
regards,
Andrey Lepikhov
Postgres Professional
"Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru> writes:
On 1/5/22 4:02 AM, Tom Lane wrote:
I've been saying from day one that pushing the query-hashing code into the
core was a bad idea, and I think this patch perfectly illustrates why.
+1.
Let me suggest, that the core should allow an extension at least to
detect such interference between extensions. Maybe hook could be
replaced with callback to allow extension see an queryid with underlying
generation logic what it expects.
I feel like we need to get away from the idea that there is just
one query hash, and somehow let different extensions attach
differently-calculated hashes to a query. I don't have any immediate
ideas about how to do that in a reasonably inexpensive way.
regards, tom lane
On Tue, Jan 04, 2022 at 06:02:43PM -0500, Tom Lane wrote:
We can debate whether the rules proposed here are good for
pg_stat_statements or not, but it seems inevitable that they will be a
disaster for some other consumers of the query hash.
Hm, which consumers do you mean here, potential extension? Isn't the
ability to use an external module to compute queryid make this situation
possible anyway?
do you really think that a query with two int
parameters is equivalent to one with five float parameters for all
query-identifying purposes?
Nope, and it will be hard to figure this out no matter which approach
we're talking about, because it mostly depends on the context and type
of queries I guess. Instead, such functionality should allow some
reasonable configuration. To be clear, the use case I have in mind here
is not four or five, but rather a couple of hundreds constants where
chances that the whole construction was generated automatically by ORM
is higher than normal.
I can see the merits of allowing different numbers of IN elements
to be considered equivalent for pg_stat_statements, but this patch
seems to go far beyond that basic idea, and I fear the side-effects
will be very bad.
Not sure why it goes far beyond, but then there were two approaches
under consideration, as I've stated in the first message. I already
don't remember all the details, but another one was evolving around
doing similar things in a more limited fashion in transformAExprIn. The
problem would be then to carry the information, necessary to represent
the act of "merging" some number of queryids together. Any thoughts
here?
The idea of keeping the original queryid untouched and add another type
of id instead sounds interesting, but it will add too much overhead for
a quite small use case I guess.
On Wed, Jan 05, 2022 at 10:11:11PM +0100, Dmitry Dolgov wrote:
On Tue, Jan 04, 2022 at 06:02:43PM -0500, Tom Lane wrote:
We can debate whether the rules proposed here are good for
pg_stat_statements or not, but it seems inevitable that they will be a
disaster for some other consumers of the query hash.Hm, which consumers do you mean here, potential extension? Isn't the
ability to use an external module to compute queryid make this situation
possible anyway?do you really think that a query with two int
parameters is equivalent to one with five float parameters for all
query-identifying purposes?Nope, and it will be hard to figure this out no matter which approach
we're talking about, because it mostly depends on the context and type
of queries I guess. Instead, such functionality should allow some
reasonable configuration. To be clear, the use case I have in mind here
is not four or five, but rather a couple of hundreds constants where
chances that the whole construction was generated automatically by ORM
is higher than normal.I can see the merits of allowing different numbers of IN elements
to be considered equivalent for pg_stat_statements, but this patch
seems to go far beyond that basic idea, and I fear the side-effects
will be very bad.Not sure why it goes far beyond, but then there were two approaches
under consideration, as I've stated in the first message. I already
don't remember all the details, but another one was evolving around
doing similar things in a more limited fashion in transformAExprIn. The
problem would be then to carry the information, necessary to represent
the act of "merging" some number of queryids together. Any thoughts
here?The idea of keeping the original queryid untouched and add another type
of id instead sounds interesting, but it will add too much overhead for
a quite small use case I guess.
```
Thu, 10 Mar 2022
New status: Waiting on Author
```
This seems incorrect, as the only feedback I've got was "this is a bad
idea", and no reaction on follow-up questions.
Dmitry Dolgov <9erthalion6@gmail.com> writes:
New status: Waiting on Author
This seems incorrect, as the only feedback I've got was "this is a bad
idea", and no reaction on follow-up questions.
I changed the status because it seems to me there is no chance of
this being committed as-is.
1. I think an absolute prerequisite before we could even consider
changing the query jumbler rules this much is to do the work that was
put off when the jumbler was moved into core: that is, provide some
honest support for multiple query-ID generation methods being used at
the same time. Even if you successfully make a case for
pg_stat_statements to act this way, other consumers of query IDs
aren't going to be happy with it.
2. You haven't made a case for it. The original complaint was
about different lengths of IN lists not being treated as equivalent,
but this patch has decided to do I'm-not-even-sure-quite-what
about treating different Params as equivalent. Plus you're trying
to invoke eval_const_expressions in the jumbler; that is absolutely
Not OK, for both safety and semantic reasons.
If you backed off to just treating ArrayExprs containing different
numbers of Consts as equivalent, maybe that'd be something we could
adopt without fixing point 1. I don't think anything that fuzzes the
treatment of Params can get away with that, though.
regards, tom lane
On Thu, Mar 10, 2022 at 12:12 PM Tom Lane <tgl@sss.pgh.pa.us> wrote:
This seems incorrect, as the only feedback I've got was "this is a bad
idea", and no reaction on follow-up questions.I changed the status because it seems to me there is no chance of
this being committed as-is.1. I think an absolute prerequisite before we could even consider
changing the query jumbler rules this much is to do the work that was
put off when the jumbler was moved into core: that is, provide some
honest support for multiple query-ID generation methods being used at
the same time. Even if you successfully make a case for
pg_stat_statements to act this way, other consumers of query IDs
aren't going to be happy with it.
FWIW, I don't find this convincing at all. Query jumbling is already
somewhat expensive, and it seems unlikely that the same person is
going to want to jumble queries in one way for pg_stat_statements and
another way for pg_stat_broccoli or whatever their other extension is.
Putting a lot of engineering work into something with such a marginal
use case seems not worthwhile to me - and also likely futile, because
I don't see how it could realistically be made nearly as cheap as a
single jumble.
2. You haven't made a case for it. The original complaint was
about different lengths of IN lists not being treated as equivalent,
but this patch has decided to do I'm-not-even-sure-quite-what
about treating different Params as equivalent. Plus you're trying
to invoke eval_const_expressions in the jumbler; that is absolutely
Not OK, for both safety and semantic reasons.
I think there are two separate points here, one about patch quality
and the other about whether the basic idea is good. I think the basic
idea is good. I do not contend that collapsing IN-lists of arbitrary
length is what everyone wants in all cases, but it seems entirely
reasonable to me to think that it is what some people want. So I would
say just make it a parameter and let people configure whichever
behavior they want. My bet is 95% of users would prefer to have it on,
but even if that's wildly wrong, having it as an optional behavior
hurts nobody. Let it be off by default and let those who want it flip
the toggle. On the code quality issue, I haven't read the patch but
your concerns sound well-founded to me from reading what you wrote.
--
Robert Haas
EDB: http://www.enterprisedb.com
On Thu, Mar 10, 2022 at 12:32:08PM -0500, Robert Haas wrote:
On Thu, Mar 10, 2022 at 12:12 PM Tom Lane <tgl@sss.pgh.pa.us> wrote:2. You haven't made a case for it. The original complaint was
about different lengths of IN lists not being treated as equivalent,
but this patch has decided to do I'm-not-even-sure-quite-what
about treating different Params as equivalent. Plus you're trying
to invoke eval_const_expressions in the jumbler; that is absolutely
Not OK, for both safety and semantic reasons.I think there are two separate points here, one about patch quality
and the other about whether the basic idea is good. I think the basic
idea is good. I do not contend that collapsing IN-lists of arbitrary
length is what everyone wants in all cases, but it seems entirely
reasonable to me to think that it is what some people want. So I would
say just make it a parameter and let people configure whichever
behavior they want. My bet is 95% of users would prefer to have it on,
but even if that's wildly wrong, having it as an optional behavior
hurts nobody. Let it be off by default and let those who want it flip
the toggle. On the code quality issue, I haven't read the patch but
your concerns sound well-founded to me from reading what you wrote.
I have the same understanding, there is a toggle in the patch exactly
for this purpose.
To give a bit more context, the whole development was ORM-driven rather
than pulled out of thin air -- people were complaining about huge
generated queries that could be barely displayed in monitoring, I was
trying to address it via collapsing the list where it was happening. In
other words "I'm-not-even-sure-quite-what" part may be indeed too
extensive, but was triggered by real world issues.
Of course, I could get the implementation not quite right, e.g. I wasn't
aware about dangers of using eval_const_expressions. But that's what the
CF item and the corresponding discussion is for, I guess. Let me see
what I could do to improve it.
On Thu, Mar 10, 2022 at 12:11:59PM -0500, Tom Lane wrote:
Dmitry Dolgov <9erthalion6@gmail.com> writes:New status: Waiting on Author
This seems incorrect, as the only feedback I've got was "this is a bad
idea", and no reaction on follow-up questions.I changed the status because it seems to me there is no chance of
this being committed as-is.1. I think an absolute prerequisite before we could even consider
changing the query jumbler rules this much is to do the work that was
put off when the jumbler was moved into core: that is, provide some
honest support for multiple query-ID generation methods being used at
the same time. Even if you successfully make a case for
pg_stat_statements to act this way, other consumers of query IDs
aren't going to be happy with it.2. You haven't made a case for it. The original complaint was
about different lengths of IN lists not being treated as equivalent,
but this patch has decided to do I'm-not-even-sure-quite-what
about treating different Params as equivalent. Plus you're trying
to invoke eval_const_expressions in the jumbler; that is absolutely
Not OK, for both safety and semantic reasons.If you backed off to just treating ArrayExprs containing different
numbers of Consts as equivalent, maybe that'd be something we could
adopt without fixing point 1. I don't think anything that fuzzes the
treatment of Params can get away with that, though.
Here is the limited version of list collapsing functionality, which
doesn't utilize eval_const_expressions and ignores most of the stuff
except ArrayExprs. Any thoughts/more suggestions?
Attachments:
v6-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/x-diff; charset=us-asciiDownload
From ce9f2ed2466d28dbbef3310383d84eba58e5791b Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Sat, 12 Mar 2022 14:42:02 +0100
Subject: [PATCH v6] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on number of parameters, because every element of ArrayExpr is
jumbled. Make Consts contribute nothing to the jumble hash if they're
part of a series and at position further that specified threshold.
Reviewed-by: Zhihong Yu, Sergey Dudoladov
Tested-by: Chengxi Sun
---
.../expected/pg_stat_statements.out | 412 ++++++++++++++++++
.../pg_stat_statements/pg_stat_statements.c | 26 +-
.../sql/pg_stat_statements.sql | 107 +++++
src/backend/utils/misc/guc.c | 13 +
src/backend/utils/misc/queryjumble.c | 236 +++++++++-
src/include/utils/queryjumble.h | 10 +-
6 files changed, 791 insertions(+), 13 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out
index e0abe34bb6..e05a6f565a 100644
--- a/contrib/pg_stat_statements/expected/pg_stat_statements.out
+++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out
@@ -1077,4 +1077,416 @@ SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%';
2
(1 row)
+--
+-- Consts merging
+--
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+--------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(7 rows)
+
+-- Normal
+SET const_merge_threshold = 5;
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 6
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 6
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) and data = $11 | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN ($1, $2, $3, $4, ...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- Test find_const_walker
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result
+--------
+(0 rows)
+
+RESET const_merge_threshold;
DROP EXTENSION pg_stat_statements;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 082bfa8f77..b872490133 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2610,6 +2610,7 @@ generate_normalized_query(JumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool merge = false;
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -2648,12 +2649,27 @@ generate_normalized_query(JumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ if (!jstate->clocations[i].merged)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id);
+ if (merge)
+ merge = false;
+ }
+ else if (!merge)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* Merge until a non merged constant appear */
+ merge = true;
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ }
quer_loc = off + tok_len;
last_off = off;
diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
index dffd2c8c18..55ba3b35e7 100644
--- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql
+++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
@@ -442,4 +442,111 @@ SELECT (
SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%';
+--
+-- Consts merging
+--
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- No merging
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal
+SET const_merge_threshold = 5;
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test find_const_walker
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+RESET const_merge_threshold;
+
DROP EXTENSION pg_stat_statements;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index f505413a7f..45a6d593ce 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3600,6 +3600,19 @@ static struct config_int ConfigureNamesInt[] =
NULL, NULL, NULL
},
+ {
+ {"const_merge_threshold", PGC_SUSET, STATS_MONITORING,
+ gettext_noop("Sets the minimal numer of constants in an array"
+ " after which they will be merged"),
+ gettext_noop("Computing query id for an array of constants"
+ " will produce the same id for all arrays with length"
+ " larger than this value. Zero turns off merging."),
+ },
+ &const_merge_threshold,
+ 0, 0, INT_MAX,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/queryjumble.c b/src/backend/utils/misc/queryjumble.c
index a67487e5fe..563de94f9f 100644
--- a/src/backend/utils/misc/queryjumble.c
+++ b/src/backend/utils/misc/queryjumble.c
@@ -42,6 +42,9 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+/* Minimal numer of constants in an array after which they will be merged */
+int const_merge_threshold = 0;
+
/* True when compute_query_id is ON, or AUTO and a module requests them */
bool query_id_enabled = false;
@@ -52,7 +55,8 @@ static void JumbleQueryInternal(JumbleState *jstate, Query *query);
static void JumbleRangeTable(JumbleState *jstate, List *rtable);
static void JumbleRowMarks(JumbleState *jstate, List *rowMarks);
static void JumbleExpr(JumbleState *jstate, Node *node);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static bool JumbleExprList(JumbleState *jstate, Node *node);
+static void RecordConstLocation(JumbleState *jstate, int location, bool merged);
/*
* Given a possibly multi-statement source string, confine our attention to the
@@ -119,7 +123,7 @@ JumbleQuery(Query *query, const char *querytext)
jstate->jumble_len = 0;
jstate->clocations_buf_size = 32;
jstate->clocations = (LocationLen *)
- palloc(jstate->clocations_buf_size * sizeof(LocationLen));
+ palloc0(jstate->clocations_buf_size * sizeof(LocationLen));
jstate->clocations_count = 0;
jstate->highest_extern_param_id = 0;
@@ -341,6 +345,225 @@ JumbleRowMarks(JumbleState *jstate, List *rowMarks)
}
}
+/*
+ * find_const_walker
+ * Locate all the Const nodes in an expression tree.
+ *
+ * Caller must provide an empty list where constants will be collected.
+ */
+static bool
+find_const_walker(Node *node, List **constants)
+{
+ if (node == NULL)
+ return false;
+
+ if (IsA(node, Const))
+ {
+ *constants = lappend(*constants, (Const *) node);
+ return false;
+ }
+
+ return expression_tree_walker(node, find_const_walker, (void *) constants);
+}
+
+static bool
+JumbleExprList(JumbleState *jstate, Node *node)
+{
+ ListCell *temp;
+ Node *firstExpr = NULL;
+ bool merged = false;
+ bool allConst = true;
+ int currentExprIdx;
+ int lastExprLength = 0;
+
+ if (node == NULL)
+ return merged;
+
+ if (const_merge_threshold == 0)
+ {
+ /* Merging is disabled, process everything one by one. */
+ JumbleExpr(jstate, node);
+ return merged;
+ }
+
+ /* Guard against stack overflow due to overly complex expressions */
+ check_stack_depth();
+
+ Assert(IsA(node, List));
+ firstExpr = (Node *) lfirst(list_head((List *) node));
+
+ /*
+ * We always emit the node's NodeTag, then any additional fields that are
+ * considered significant, and then we recurse to any child nodes.
+ */
+ APP_JUMB(node->type);
+
+ /*
+ * If the first expression is a constant or a list of constants, try to
+ * merge the following if they're constants as well. Otherwise do
+ * JumbleExpr as usual.
+ */
+ switch (nodeTag(firstExpr))
+ {
+ case T_List:
+ currentExprIdx = 0;
+
+ foreach(temp, (List *) node)
+ {
+ List *expr = (List *) lfirst(temp);
+ ListCell *lc;
+
+ foreach(lc, expr)
+ {
+ Node * subExpr = (Node *) lfirst(lc);
+
+ if (!IsA(subExpr, Const))
+ {
+ allConst = false;
+ break;
+ }
+ }
+
+ if (allConst && currentExprIdx >= const_merge_threshold - 1)
+ {
+ merged = true;
+
+ /*
+ * This hash is going to accumulate the following merged
+ * statements
+ */
+ if (currentExprIdx == const_merge_threshold - 1)
+ {
+ JumbleExpr(jstate, (Node *) expr);
+
+ /*
+ * An expr consisting of constants is already found,
+ * JumbleExpr must record it. Mark all the constants as
+ * merged, they will be the first merged but still
+ * present in the statement query.
+ */
+ Assert(jstate->clocations_count > lastExprLength - 1);
+ for (int i = 1; i < lastExprLength + 1; i++)
+ {
+ LocationLen *loc;
+ loc = &jstate->clocations[jstate->clocations_count - i];
+ loc->merged = true;
+ }
+ currentExprIdx++;
+ }
+ else
+ foreach(lc, expr)
+ {
+ /*
+ * Find the last constant to provide information
+ * for generate_normalized_query where the current
+ * expression actually ends.
+ */
+ Const *lastConst;
+ List *constants = NIL;
+ find_const_walker((Node *) lfirst(lc), &constants);
+
+ /* We should be able to find some constants, as
+ * they were discovered before. */
+ Assert(constants != NIL);
+ lastConst = (Const *) llast(constants);
+ RecordConstLocation(jstate, lastConst->location, true);
+ }
+
+ continue;
+ }
+
+ JumbleExpr(jstate, (Node *) expr);
+ currentExprIdx++;
+ lastExprLength = expr->length;
+ }
+ break;
+
+ case T_Const:
+ currentExprIdx = 0;
+
+ foreach(temp, (List *) node)
+ {
+ Node *expr = (Node *) lfirst(temp);
+
+ if (IsA(expr, Const) && currentExprIdx >= const_merge_threshold - 1)
+ {
+ merged = true;
+
+ /*
+ * This hash is going to accumulate the following merged
+ * statements
+ */
+ if (currentExprIdx == const_merge_threshold - 1)
+ {
+ JumbleExpr(jstate, expr);
+
+ /*
+ * A const expr is already found, so JumbleExpr must
+ * record it. Mark it as merged, it will be the first
+ * merged but still present in the statement query.
+ */
+ Assert(jstate->clocations_count > lastExprLength - 1);
+ for (int i = 1; i < lastExprLength + 1; i++)
+ {
+ LocationLen *loc;
+ loc = &jstate->clocations[jstate->clocations_count - i];
+ loc->merged = true;
+ }
+ currentExprIdx++;
+ }
+ else
+ {
+ /*
+ * Find the last constant to provide information
+ * for generate_normalized_query where the current
+ * expression actually ends.
+ */
+ Const *lastConst;
+ List *constants = NIL;
+ find_const_walker(expr, &constants);
+
+ /* We should be able to find some constants, as
+ * they were discovered before. */
+ Assert(constants != NIL);
+ lastConst = (Const *) llast(constants);
+ RecordConstLocation(jstate, lastConst->location, true);
+ }
+
+ continue;
+ }
+
+ JumbleExpr(jstate, expr);
+ currentExprIdx++;
+
+ if (currentExprIdx == const_merge_threshold -1)
+ {
+ /*
+ * The next expression will be eligible for merging check.
+ * For it to happen correctly remember the number of
+ * constants in the previous expression.
+ */
+ List *constants = NIL;
+ find_const_walker(expr, &constants);
+
+ /* The expression we work with here could be anything, so
+ * no constants found is a possible outcome. */
+ if (constants != NIL)
+ lastExprLength = constants->length;
+ else
+ lastExprLength = 1;
+ }
+ }
+ break;
+
+ default:
+ JumbleExpr(jstate, node);
+ break;
+ }
+
+ return merged;
+}
+
/*
* Jumble an expression tree
*
@@ -390,7 +613,7 @@ JumbleExpr(JumbleState *jstate, Node *node)
/* We jumble only the constant's type, not its value */
APP_JUMB(c->consttype);
/* Also, record its parse location for query normalization */
- RecordConstLocation(jstate, c->location);
+ RecordConstLocation(jstate, c->location, false);
}
break;
case T_Param:
@@ -579,7 +802,7 @@ JumbleExpr(JumbleState *jstate, Node *node)
}
break;
case T_ArrayExpr:
- JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements);
+ JumbleExprList(jstate, (Node *) ((ArrayExpr *) node)->elements);
break;
case T_RowExpr:
JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args);
@@ -832,11 +1055,11 @@ JumbleExpr(JumbleState *jstate, Node *node)
}
/*
- * Record location of constant within query string of query tree
+ * Record location of constant or a parameter within query string of query tree
* that is currently being walked.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -851,6 +1074,7 @@ RecordConstLocation(JumbleState *jstate, int location)
sizeof(LocationLen));
}
jstate->clocations[jstate->clocations_count].location = location;
+ jstate->clocations[jstate->clocations_count].merged = merged;
/* initialize lengths to -1 to simplify third-party module usage */
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
diff --git a/src/include/utils/queryjumble.h b/src/include/utils/queryjumble.h
index a4c277269e..157e3f777a 100644
--- a/src/include/utils/queryjumble.h
+++ b/src/include/utils/queryjumble.h
@@ -15,6 +15,7 @@
#define QUERYJUBLE_H
#include "nodes/parsenodes.h"
+#include "nodes/nodeFuncs.h"
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
@@ -25,6 +26,8 @@ typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+ bool merged; /* whether or not the location was marked as
+ duplicate */
} LocationLen;
/*
@@ -39,7 +42,10 @@ typedef struct JumbleState
/* Number of bytes used in jumble[] */
Size jumble_len;
- /* Array of locations of constants that should be removed */
+ /*
+ * Array of locations of constants that should be removed, or parameters
+ * that are already replaced, but could be also processed to be merged
+ */
LocationLen *clocations;
/* Allocated length of clocations array */
@@ -62,7 +68,7 @@ enum ComputeQueryIdType
/* GUC parameters */
extern int compute_query_id;
-
+extern int const_merge_threshold;
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query, const char *querytext);
--
2.32.0
On Sat, Mar 12, 2022 at 9:11 AM Dmitry Dolgov <9erthalion6@gmail.com> wrote:
Here is the limited version of list collapsing functionality, which
doesn't utilize eval_const_expressions and ignores most of the stuff
except ArrayExprs. Any thoughts/more suggestions?
The proposed commit message says this commit intends to "Make Consts
contribute nothing to the jumble hash if they're part of a series and
at position further that specified threshold." I'm not sure whether
that's what the patch actually implements because I can't immediately
understand the new logic you've added, but I think if we did what that
sentence said then, supposing the threshold is set to 1, it would
result in producing the same hash for "x in (1,2)" that we do for "x
in (1,3)" but a different hash for "x in (2,3)" which does not sound
like what we want. What I would have thought we'd do is: if the list
is all constants and long enough to satisfy the threshold then nothing
in the list gets jumbled.
I'm a little surprised that there's not more context-awareness in this
code. It seems that it applies to every ArrayExpr found in the query,
which I think would extend to cases beyond something = IN(whatever).
In particular, any use of ARRAY[] in the query would be impacted. Now,
the comments seem to imply that's pretty intentional, but from the
user's point of view, WHERE x in (1,3) and x = any(array[1,3]) are two
different things. If anything like this is to be adopted, we certainly
need to be precise about exactly what it is doing and which cases are
covered. I thought of looking at the documentation to see whether
you'd tried to clarify this there, and found that you hadn't written
any.
In short, I think this patch is not really very close to being in
committable shape even if nobody were objecting to the concept.
--
Robert Haas
EDB: http://www.enterprisedb.com
On Mon, Mar 14, 2022 at 10:17:57AM -0400, Robert Haas wrote:
On Sat, Mar 12, 2022 at 9:11 AM Dmitry Dolgov <9erthalion6@gmail.com> wrote:Here is the limited version of list collapsing functionality, which
doesn't utilize eval_const_expressions and ignores most of the stuff
except ArrayExprs. Any thoughts/more suggestions?The proposed commit message says this commit intends to "Make Consts
contribute nothing to the jumble hash if they're part of a series and
at position further that specified threshold." I'm not sure whether
that's what the patch actually implements because I can't immediately
understand the new logic you've added, but I think if we did what that
sentence said then, supposing the threshold is set to 1, it would
result in producing the same hash for "x in (1,2)" that we do for "x
in (1,3)" but a different hash for "x in (2,3)" which does not sound
like what we want. What I would have thought we'd do is: if the list
is all constants and long enough to satisfy the threshold then nothing
in the list gets jumbled.
Well, yeah, the commit message is somewhat clumsy in this regard. It
works almost in the way you've described, except if the list is all
constants and long enough to satisfy the threshold then *first N
elements (where N == threshold) will be jumbled -- to leave at least
some traces of it in pgss.
I'm a little surprised that there's not more context-awareness in this
code. It seems that it applies to every ArrayExpr found in the query,
which I think would extend to cases beyond something = IN(whatever).
In particular, any use of ARRAY[] in the query would be impacted. Now,
the comments seem to imply that's pretty intentional, but from the
user's point of view, WHERE x in (1,3) and x = any(array[1,3]) are two
different things. If anything like this is to be adopted, we certainly
need to be precise about exactly what it is doing and which cases are
covered.
I'm not sure if I follow the last point. WHERE x in (1,3) and x =
any(array[1,3]) are two different things for sure, but in which way are
they going to be mixed together because of this change? My goal was to
make only the following transformation, without leaving any uncertainty:
WHERE x in (1, 2, 3, 4, 5) -> WHERE x in (1, 2, ...)
WHERE x = any(array[1, 2, 3, 4, 5]) -> WHERE x = any(array[1, 2, ...])
I thought of looking at the documentation to see whether you'd tried
to clarify this there, and found that you hadn't written any.In short, I think this patch is not really very close to being in
committable shape even if nobody were objecting to the concept.
Sure, I'll add documentation. To be honest I'm not targeting PG15 with
this, just want to make some progress. Thanks for the feedback, I'm glad
to see it coming!
On Mon, Mar 14, 2022 at 10:57 AM Dmitry Dolgov <9erthalion6@gmail.com> wrote:
Well, yeah, the commit message is somewhat clumsy in this regard. It
works almost in the way you've described, except if the list is all
constants and long enough to satisfy the threshold then *first N
elements (where N == threshold) will be jumbled -- to leave at least
some traces of it in pgss.
But that seems to me to be a thing we would not want. Why do you think
otherwise?
I'm not sure if I follow the last point. WHERE x in (1,3) and x =
any(array[1,3]) are two different things for sure, but in which way are
they going to be mixed together because of this change? My goal was to
make only the following transformation, without leaving any uncertainty:WHERE x in (1, 2, 3, 4, 5) -> WHERE x in (1, 2, ...)
WHERE x = any(array[1, 2, 3, 4, 5]) -> WHERE x = any(array[1, 2, ...])
I understand. I think it might be OK to transform both of those
things, but I don't think it's very clear either from the comments or
the nonexistent documentation that both of those cases are affected --
and I think that needs to be clear. Not sure exactly how to do that,
just saying that we can't add behavior unless it will be clear to
users what the behavior is.
Sure, I'll add documentation. To be honest I'm not targeting PG15 with
this, just want to make some progress.
wfm!
--
Robert Haas
EDB: http://www.enterprisedb.com
On Mon, Mar 14, 2022 at 11:02:16AM -0400, Robert Haas wrote:
On Mon, Mar 14, 2022 at 10:57 AM Dmitry Dolgov <9erthalion6@gmail.com> wrote:Well, yeah, the commit message is somewhat clumsy in this regard. It
works almost in the way you've described, except if the list is all
constants and long enough to satisfy the threshold then *first N
elements (where N == threshold) will be jumbled -- to leave at least
some traces of it in pgss.But that seems to me to be a thing we would not want. Why do you think
otherwise?
Hm. Well, if the whole list would be not jumbled, the transformation
would look like this, right?
WHERE x in (1, 2, 3, 4, 5) -> WHERE x in (...)
Leaving some number of original elements in place gives some clue for
the reader about at least what type of data the array has contained.
Which hopefully makes it a bit easier to identify even in the collapsed
form:
WHERE x in (1, 2, 3, 4, 5) -> WHERE x in (1, 2, ...)
I'm not sure if I follow the last point. WHERE x in (1,3) and x =
any(array[1,3]) are two different things for sure, but in which way are
they going to be mixed together because of this change? My goal was to
make only the following transformation, without leaving any uncertainty:WHERE x in (1, 2, 3, 4, 5) -> WHERE x in (1, 2, ...)
WHERE x = any(array[1, 2, 3, 4, 5]) -> WHERE x = any(array[1, 2, ...])I understand. I think it might be OK to transform both of those
things, but I don't think it's very clear either from the comments or
the nonexistent documentation that both of those cases are affected --
and I think that needs to be clear. Not sure exactly how to do that,
just saying that we can't add behavior unless it will be clear to
users what the behavior is.
Yep, got it.
Robert Haas <robertmhaas@gmail.com> writes:
On Mon, Mar 14, 2022 at 10:57 AM Dmitry Dolgov <9erthalion6@gmail.com> wrote:
I'm not sure if I follow the last point. WHERE x in (1,3) and x =
any(array[1,3]) are two different things for sure, but in which way are
they going to be mixed together because of this change? My goal was to
make only the following transformation, without leaving any uncertainty:WHERE x in (1, 2, 3, 4, 5) -> WHERE x in (1, 2, ...)
WHERE x = any(array[1, 2, 3, 4, 5]) -> WHERE x = any(array[1, 2, ...])
I understand. I think it might be OK to transform both of those
things, but I don't think it's very clear either from the comments or
the nonexistent documentation that both of those cases are affected --
and I think that needs to be clear.
We've transformed IN(...) to ANY(ARRAY[...]) at the parser stage for a
long time, and this has been visible to users of either EXPLAIN or
pg_stat_statements for the same length of time. I doubt people are
going to find that surprising. Even if they do, it's not the query
jumbler's fault.
I do find it odd that the proposed patch doesn't cause the *entire*
list to be skipped over. That seems like extra complexity and confusion
to no benefit.
regards, tom lane
On Mon, Mar 14, 2022 at 11:23:17AM -0400, Tom Lane wrote:
Robert Haas <robertmhaas@gmail.com> writes:I do find it odd that the proposed patch doesn't cause the *entire*
list to be skipped over. That seems like extra complexity and confusion
to no benefit.
That's a bit surprising for me, I haven't even thought that folks could
think this is an odd behaviour. As I've mentioned above, the original
idea was to give some clues about what was inside the collapsed array,
but if everyone finds it unnecessary I can of course change it.
Dmitry Dolgov <9erthalion6@gmail.com> writes:
On Mon, Mar 14, 2022 at 11:23:17AM -0400, Tom Lane wrote:
I do find it odd that the proposed patch doesn't cause the *entire*
list to be skipped over. That seems like extra complexity and confusion
to no benefit.
That's a bit surprising for me, I haven't even thought that folks could
think this is an odd behaviour. As I've mentioned above, the original
idea was to give some clues about what was inside the collapsed array,
but if everyone finds it unnecessary I can of course change it.
But if what we're doing is skipping over an all-Consts list, then the
individual Consts would be elided from the pg_stat_statements entry
anyway, no? All that would remain is information about how many such
Consts there were, which is exactly the information you want to drop.
regards, tom lane
On Mon, Mar 14, 2022 at 11:38:23AM -0400, Tom Lane wrote:
Dmitry Dolgov <9erthalion6@gmail.com> writes:On Mon, Mar 14, 2022 at 11:23:17AM -0400, Tom Lane wrote:
I do find it odd that the proposed patch doesn't cause the *entire*
list to be skipped over. That seems like extra complexity and confusion
to no benefit.That's a bit surprising for me, I haven't even thought that folks could
think this is an odd behaviour. As I've mentioned above, the original
idea was to give some clues about what was inside the collapsed array,
but if everyone finds it unnecessary I can of course change it.But if what we're doing is skipping over an all-Consts list, then the
individual Consts would be elided from the pg_stat_statements entry
anyway, no? All that would remain is information about how many such
Consts there were, which is exactly the information you want to drop.
Hm, yes, you're right. I guess I was thinking about this more like about
shortening some text with ellipsis, but indeed no actual Consts will end
up in the result anyway. Thanks for clarification, will modify the
patch!
On Mon, Mar 14, 2022 at 04:51:50PM +0100, Dmitry Dolgov wrote:
On Mon, Mar 14, 2022 at 11:38:23AM -0400, Tom Lane wrote:
Dmitry Dolgov <9erthalion6@gmail.com> writes:On Mon, Mar 14, 2022 at 11:23:17AM -0400, Tom Lane wrote:
I do find it odd that the proposed patch doesn't cause the *entire*
list to be skipped over. That seems like extra complexity and confusion
to no benefit.That's a bit surprising for me, I haven't even thought that folks could
think this is an odd behaviour. As I've mentioned above, the original
idea was to give some clues about what was inside the collapsed array,
but if everyone finds it unnecessary I can of course change it.But if what we're doing is skipping over an all-Consts list, then the
individual Consts would be elided from the pg_stat_statements entry
anyway, no? All that would remain is information about how many such
Consts there were, which is exactly the information you want to drop.Hm, yes, you're right. I guess I was thinking about this more like about
shortening some text with ellipsis, but indeed no actual Consts will end
up in the result anyway. Thanks for clarification, will modify the
patch!
Here is another iteration. Now the patch doesn't leave any trailing
Consts in the normalized query, and contains more documentation. I hope
it's getting better.
Attachments:
v7-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/x-diff; charset=us-asciiDownload
From 8226635c221a659097deb6ea64626a587296ea60 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Sat, 12 Mar 2022 14:42:02 +0100
Subject: [PATCH v7] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on number of parameters, because every element of ArrayExpr is
jumbled. In certain situations it's undesirable, especially if the list
becomes too large.
Make Const expressions contribute nothing to the jumble hash if they're
a part of an ArrayExpr, which length is larger than specified threshold.
Allow to configure the threshold via the new GUC const_merge_threshold
with the default value zero, which disables this feature.
Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane
Tested-by: Chengxi Sun
---
.../expected/pg_stat_statements.out | 412 ++++++++++++++++++
.../pg_stat_statements/pg_stat_statements.c | 33 +-
.../sql/pg_stat_statements.sql | 107 +++++
doc/src/sgml/config.sgml | 26 ++
doc/src/sgml/pgstatstatements.sgml | 28 +-
src/backend/utils/misc/guc.c | 13 +
src/backend/utils/misc/queryjumble.c | 105 ++++-
src/include/utils/queryjumble.h | 5 +-
8 files changed, 711 insertions(+), 18 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out
index e0abe34bb6..de3970e462 100644
--- a/contrib/pg_stat_statements/expected/pg_stat_statements.out
+++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out
@@ -1077,4 +1077,416 @@ SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%';
2
(1 row)
+--
+-- Consts merging
+--
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+--------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(7 rows)
+
+-- Normal
+SET const_merge_threshold = 5;
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 6
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 6
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) and data = $3 | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- Test find_const_walker
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result
+--------
+(0 rows)
+
+RESET const_merge_threshold;
DROP EXTENSION pg_stat_statements;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 9e525a6ad3..a92e7aea15 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2581,6 +2581,9 @@ generate_normalized_query(JumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool skip = false; /* Signals that certain constants are
+ merged together and have to be skipped */
+
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -2604,7 +2607,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
{
int off, /* Offset from start for cur tok */
tok_len; /* Length (in bytes) of that tok */
-
off = jstate->clocations[i].location;
/* Adjust recorded location if we're dealing with partial string */
off -= query_loc;
@@ -2619,12 +2621,31 @@ generate_normalized_query(JumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ /* Normal path, non merged constant */
+ if (!jstate->clocations[i].merged)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id);
+
+ /* In case previous constants were merged away, stop doing that */
+ if (skip)
+ skip = false;
+ }
+ /* The firsts merged constant */
+ else if (!skip)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* Skip the following until a non merged constant appear */
+ skip = true;
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ }
quer_loc = off + tok_len;
last_off = off;
diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
index dffd2c8c18..55ba3b35e7 100644
--- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql
+++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
@@ -442,4 +442,111 @@ SELECT (
SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%';
+--
+-- Consts merging
+--
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- No merging
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal
+SET const_merge_threshold = 5;
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test find_const_walker
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+RESET const_merge_threshold;
+
DROP EXTENSION pg_stat_statements;
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 05df48131d..bca118366e 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8013,6 +8013,32 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
</listitem>
</varlistentry>
+ <varlistentry id="guc-const-merge-threshold" xreflabel="const_merge_treshold">
+ <term><varname>const_merge_threshold</varname> (<type>integer</type>)
+ <indexterm>
+ <primary><varname>const_merge_threshold</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Specifies the minimal length of an array to be eligible for constants
+ collapsing. Normally every element of an array contributes to a query
+ identifier, which means the same query containing an array of constants
+ could get multiple different identifiers, depending of size of the
+ array. If this parameter is nonzero, the array contains only constants
+ and it's length is larger than <varname> const_merge_threshold </varname>,
+ then array elements will contribure nothing to the query identifier.
+ Thus the query will get the same identifier no matter how many constants
+ it contains.
+
+ Zero turns off collapsing, and it is the default value.
+
+ The <xref linkend="pgstatstatements"/> extension will represent such
+ collapsed constants via <literal>'(...)'</literal>.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
</sect2>
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index bc9d5bdbe3..7507aa08ce 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -425,10 +425,30 @@
<para>
In some cases, queries with visibly different texts might get merged into a
single <structname>pg_stat_statements</structname> entry. Normally this will happen
- only for semantically equivalent queries, but there is a small chance of
- hash collisions causing unrelated queries to be merged into one entry.
- (This cannot happen for queries belonging to different users or databases,
- however.)
+ only for semantically equivalent queries, for example when queries are
+ different only in values of constants they use. Another valid possibility for
+ merging queries into a single <structname>pg_stat_statements</structname>
+ entry is when <xref linkend="guc-const-merge-threshold"/> is nonzero and the
+ queries contain an array with more than <varname>const_merge_threshold</varname>
+ constants in it:
+
+<screen>
+=# SET const_merge_threshold = 5;
+=# SELECT pg_stat_statements_reset();
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7);
+=# SELECT query, calls FROM pg_stat_statements;
+-[ RECORD 1 ]------------------------------
+query | SELECT * FROM test WHERE a IN (...)
+calls | 2
+-[ RECORD 2 ]------------------------------
+query | SELECT pg_stat_statements_reset()
+calls | 1
+</screen>
+
+ But there is a small chance of hash collisions causing unrelated queries to
+ be merged into one entry. (This cannot happen for queries belonging to
+ different users or databases, however.)
</para>
<para>
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index b86137dc38..2765acd891 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3608,6 +3608,19 @@ static struct config_int ConfigureNamesInt[] =
NULL, NULL, NULL
},
+ {
+ {"const_merge_threshold", PGC_SUSET, STATS_MONITORING,
+ gettext_noop("Sets the minimal numer of constants in an array"
+ " after which they will be merged"),
+ gettext_noop("Computing query id for an array of constants"
+ " will produce the same id for all arrays with length"
+ " larger than this value. Zero turns off merging."),
+ },
+ &const_merge_threshold,
+ 0, 0, INT_MAX,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/queryjumble.c b/src/backend/utils/misc/queryjumble.c
index a67487e5fe..063b4be725 100644
--- a/src/backend/utils/misc/queryjumble.c
+++ b/src/backend/utils/misc/queryjumble.c
@@ -42,6 +42,9 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+/* Minimal numer of constants in an array after which they will be merged */
+int const_merge_threshold = 0;
+
/* True when compute_query_id is ON, or AUTO and a module requests them */
bool query_id_enabled = false;
@@ -52,7 +55,8 @@ static void JumbleQueryInternal(JumbleState *jstate, Query *query);
static void JumbleRangeTable(JumbleState *jstate, List *rtable);
static void JumbleRowMarks(JumbleState *jstate, List *rowMarks);
static void JumbleExpr(JumbleState *jstate, Node *node);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void JumbleExprList(JumbleState *jstate, List *node);
+static void RecordConstLocation(JumbleState *jstate, int location, bool merged);
/*
* Given a possibly multi-statement source string, confine our attention to the
@@ -119,7 +123,7 @@ JumbleQuery(Query *query, const char *querytext)
jstate->jumble_len = 0;
jstate->clocations_buf_size = 32;
jstate->clocations = (LocationLen *)
- palloc(jstate->clocations_buf_size * sizeof(LocationLen));
+ palloc0(jstate->clocations_buf_size * sizeof(LocationLen));
jstate->clocations_count = 0;
jstate->highest_extern_param_id = 0;
@@ -341,6 +345,90 @@ JumbleRowMarks(JumbleState *jstate, List *rowMarks)
}
}
+/*
+ * Jubmle a list of expressions
+ *
+ * This function enforces const_merge_threshold limitation, i.e. if the
+ * provided list contains only constant expressions and its length is greater
+ * than or equal to const_merge_threshold, such list will not contribute to
+ * jumble. Otherwise it falls back to JumbleExpr.
+ */
+static void
+JumbleExprList(JumbleState *jstate, List *elements)
+{
+ ListCell *temp;
+ Node *firstExpr = NULL;
+ bool allConst = true;
+
+ if (elements == NULL)
+ return;
+
+ if (const_merge_threshold == 0)
+ {
+ /* Merging is disabled, process everything one by one. */
+ JumbleExpr(jstate, (Node *) elements);
+ return;
+ }
+
+ if (elements->length < const_merge_threshold)
+ {
+ /* The list is not large enough to collapse it. */
+ JumbleExpr(jstate, (Node *) elements);
+ return;
+ }
+
+ /* Guard against stack overflow due to overly complex expressions */
+ check_stack_depth();
+
+ firstExpr = linitial(elements);
+
+ /*
+ * We always emit the node's NodeTag, then any additional fields that are
+ * considered significant, and then we recurse to any child nodes.
+ */
+ APP_JUMB(elements->type);
+
+ /*
+ * If the first expression is a constant, verify if the following elements
+ * are constants as well. If yes, the list is eligible for collapsing --
+ * mark it as merged and return from the function.
+ */
+ if (IsA(firstExpr, Const))
+ {
+ foreach(temp, elements)
+ {
+ Node *expr = (Node *) lfirst(temp);
+
+ if (!IsA(expr, Const))
+ {
+ allConst = false;
+ break;
+ }
+ }
+
+ if (allConst)
+ {
+ Const *firstConst = (Const *) firstExpr;
+ Const *lastConst = llast_node(Const, elements);
+
+ /*
+ * First and last constants are needed to identify which part of
+ * the query to skip in generate_normalized_query.
+ */
+ RecordConstLocation(jstate, firstConst->location, true);
+ RecordConstLocation(jstate, lastConst->location, true);
+ return;
+ }
+ }
+
+ /*
+ * If we end up here, it means no constants merging is possible, process
+ * the list as usual.
+ */
+ JumbleExpr(jstate, (Node *) elements);
+ return;
+}
+
/*
* Jumble an expression tree
*
@@ -390,7 +478,7 @@ JumbleExpr(JumbleState *jstate, Node *node)
/* We jumble only the constant's type, not its value */
APP_JUMB(c->consttype);
/* Also, record its parse location for query normalization */
- RecordConstLocation(jstate, c->location);
+ RecordConstLocation(jstate, c->location, false);
}
break;
case T_Param:
@@ -579,7 +667,7 @@ JumbleExpr(JumbleState *jstate, Node *node)
}
break;
case T_ArrayExpr:
- JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements);
+ JumbleExprList(jstate, (List *) ((ArrayExpr *) node)->elements);
break;
case T_RowExpr:
JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args);
@@ -832,11 +920,13 @@ JumbleExpr(JumbleState *jstate, Node *node)
}
/*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked. Merged argument signals that the constant do not
+ * contribute to the jumble hash, and any reader of constants array may want to
+ * use this information to represent such constants differently.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -851,6 +941,7 @@ RecordConstLocation(JumbleState *jstate, int location)
sizeof(LocationLen));
}
jstate->clocations[jstate->clocations_count].location = location;
+ jstate->clocations[jstate->clocations_count].merged = merged;
/* initialize lengths to -1 to simplify third-party module usage */
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
diff --git a/src/include/utils/queryjumble.h b/src/include/utils/queryjumble.h
index c670662db2..5bb994a6ac 100644
--- a/src/include/utils/queryjumble.h
+++ b/src/include/utils/queryjumble.h
@@ -15,6 +15,7 @@
#define QUERYJUBLE_H
#include "nodes/parsenodes.h"
+#include "nodes/nodeFuncs.h"
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
@@ -25,6 +26,8 @@ typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+ bool merged; /* whether or not the location was marked as
+ not contributing to jumble */
} LocationLen;
/*
@@ -63,7 +66,7 @@ enum ComputeQueryIdType
/* GUC parameters */
extern int compute_query_id;
-
+extern int const_merge_threshold;
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query, const char *querytext);
--
2.32.0
On Sat, Mar 26, 2022 at 06:40:35PM +0100, Dmitry Dolgov wrote:
On Mon, Mar 14, 2022 at 04:51:50PM +0100, Dmitry Dolgov wrote:
On Mon, Mar 14, 2022 at 11:38:23AM -0400, Tom Lane wrote:
Dmitry Dolgov <9erthalion6@gmail.com> writes:On Mon, Mar 14, 2022 at 11:23:17AM -0400, Tom Lane wrote:
I do find it odd that the proposed patch doesn't cause the *entire*
list to be skipped over. That seems like extra complexity and confusion
to no benefit.That's a bit surprising for me, I haven't even thought that folks could
think this is an odd behaviour. As I've mentioned above, the original
idea was to give some clues about what was inside the collapsed array,
but if everyone finds it unnecessary I can of course change it.But if what we're doing is skipping over an all-Consts list, then the
individual Consts would be elided from the pg_stat_statements entry
anyway, no? All that would remain is information about how many such
Consts there were, which is exactly the information you want to drop.Hm, yes, you're right. I guess I was thinking about this more like about
shortening some text with ellipsis, but indeed no actual Consts will end
up in the result anyway. Thanks for clarification, will modify the
patch!Here is another iteration. Now the patch doesn't leave any trailing
Consts in the normalized query, and contains more documentation. I hope
it's getting better.
Hi,
Here is the rebased version, with no other changes.
Attachments:
v8-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/x-diff; charset=us-asciiDownload
From 5092a6914f1e55636bb8beed2251322cc0f1eec6 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Sun, 24 Jul 2022 11:43:25 +0200
Subject: [PATCH v8] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on number of parameters, because every element of ArrayExpr is
jumbled. In certain situations it's undesirable, especially if the list
becomes too large.
Make Const expressions contribute nothing to the jumble hash if they're
a part of an ArrayExpr, which length is larger than specified threshold.
Allow to configure the threshold via the new GUC const_merge_threshold
with the default value zero, which disables this feature.
Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane
Tested-by: Chengxi Sun
---
.../expected/pg_stat_statements.out | 412 ++++++++++++++++++
.../pg_stat_statements/pg_stat_statements.c | 33 +-
.../sql/pg_stat_statements.sql | 107 +++++
doc/src/sgml/config.sgml | 26 ++
doc/src/sgml/pgstatstatements.sgml | 28 +-
src/backend/utils/misc/guc.c | 13 +
src/backend/utils/misc/queryjumble.c | 105 ++++-
src/include/utils/queryjumble.h | 5 +-
8 files changed, 711 insertions(+), 18 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out
index ff0166fb9d..858cf49e66 100644
--- a/contrib/pg_stat_statements/expected/pg_stat_statements.out
+++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out
@@ -1102,4 +1102,416 @@ SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%';
2
(1 row)
+--
+-- Consts merging
+--
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+--------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(7 rows)
+
+-- Normal
+SET const_merge_threshold = 5;
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 6
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 6
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) and data = $3 | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- Test find_const_walker
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result
+--------
+(0 rows)
+
+RESET const_merge_threshold;
DROP EXTENSION pg_stat_statements;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 049da9fe6d..daaa3f77cb 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2635,6 +2635,9 @@ generate_normalized_query(JumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool skip = false; /* Signals that certain constants are
+ merged together and have to be skipped */
+
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -2658,7 +2661,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
{
int off, /* Offset from start for cur tok */
tok_len; /* Length (in bytes) of that tok */
-
off = jstate->clocations[i].location;
/* Adjust recorded location if we're dealing with partial string */
off -= query_loc;
@@ -2673,12 +2675,31 @@ generate_normalized_query(JumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ /* Normal path, non merged constant */
+ if (!jstate->clocations[i].merged)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id);
+
+ /* In case previous constants were merged away, stop doing that */
+ if (skip)
+ skip = false;
+ }
+ /* The firsts merged constant */
+ else if (!skip)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* Skip the following until a non merged constant appear */
+ skip = true;
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ }
quer_loc = off + tok_len;
last_off = off;
diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
index a01f183727..9ff50b52d3 100644
--- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql
+++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
@@ -442,4 +442,111 @@ SELECT (
SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%';
+--
+-- Consts merging
+--
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- No merging
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal
+SET const_merge_threshold = 5;
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test find_const_walker
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+RESET const_merge_threshold;
+
DROP EXTENSION pg_stat_statements;
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index e2d728e0c4..9bcee698fd 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8154,6 +8154,32 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
</listitem>
</varlistentry>
+ <varlistentry id="guc-const-merge-threshold" xreflabel="const_merge_treshold">
+ <term><varname>const_merge_threshold</varname> (<type>integer</type>)
+ <indexterm>
+ <primary><varname>const_merge_threshold</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Specifies the minimal length of an array to be eligible for constants
+ collapsing. Normally every element of an array contributes to a query
+ identifier, which means the same query containing an array of constants
+ could get multiple different identifiers, depending of size of the
+ array. If this parameter is nonzero, the array contains only constants
+ and it's length is larger than <varname> const_merge_threshold </varname>,
+ then array elements will contribure nothing to the query identifier.
+ Thus the query will get the same identifier no matter how many constants
+ it contains.
+
+ Zero turns off collapsing, and it is the default value.
+
+ The <xref linkend="pgstatstatements"/> extension will represent such
+ collapsed constants via <literal>'(...)'</literal>.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
</sect2>
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index ecf6cd6bf3..a7b1d2863b 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -519,10 +519,30 @@
<para>
In some cases, queries with visibly different texts might get merged into a
single <structname>pg_stat_statements</structname> entry. Normally this will happen
- only for semantically equivalent queries, but there is a small chance of
- hash collisions causing unrelated queries to be merged into one entry.
- (This cannot happen for queries belonging to different users or databases,
- however.)
+ only for semantically equivalent queries, for example when queries are
+ different only in values of constants they use. Another valid possibility for
+ merging queries into a single <structname>pg_stat_statements</structname>
+ entry is when <xref linkend="guc-const-merge-threshold"/> is nonzero and the
+ queries contain an array with more than <varname>const_merge_threshold</varname>
+ constants in it:
+
+<screen>
+=# SET const_merge_threshold = 5;
+=# SELECT pg_stat_statements_reset();
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7);
+=# SELECT query, calls FROM pg_stat_statements;
+-[ RECORD 1 ]------------------------------
+query | SELECT * FROM test WHERE a IN (...)
+calls | 2
+-[ RECORD 2 ]------------------------------
+query | SELECT pg_stat_statements_reset()
+calls | 1
+</screen>
+
+ But there is a small chance of hash collisions causing unrelated queries to
+ be merged into one entry. (This cannot happen for queries belonging to
+ different users or databases, however.)
</para>
<para>
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index af4a1c3068..b74ba720a4 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3662,6 +3662,19 @@ static struct config_int ConfigureNamesInt[] =
NULL, NULL, NULL
},
+ {
+ {"const_merge_threshold", PGC_SUSET, STATS_MONITORING,
+ gettext_noop("Sets the minimal numer of constants in an array"
+ " after which they will be merged"),
+ gettext_noop("Computing query id for an array of constants"
+ " will produce the same id for all arrays with length"
+ " larger than this value. Zero turns off merging."),
+ },
+ &const_merge_threshold,
+ 0, 0, INT_MAX,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/queryjumble.c b/src/backend/utils/misc/queryjumble.c
index eeaa0b31fe..d68e644c98 100644
--- a/src/backend/utils/misc/queryjumble.c
+++ b/src/backend/utils/misc/queryjumble.c
@@ -42,6 +42,9 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+/* Minimal numer of constants in an array after which they will be merged */
+int const_merge_threshold = 0;
+
/* True when compute_query_id is ON, or AUTO and a module requests them */
bool query_id_enabled = false;
@@ -52,7 +55,8 @@ static void JumbleQueryInternal(JumbleState *jstate, Query *query);
static void JumbleRangeTable(JumbleState *jstate, List *rtable);
static void JumbleRowMarks(JumbleState *jstate, List *rowMarks);
static void JumbleExpr(JumbleState *jstate, Node *node);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void JumbleExprList(JumbleState *jstate, List *node);
+static void RecordConstLocation(JumbleState *jstate, int location, bool merged);
/*
* Given a possibly multi-statement source string, confine our attention to the
@@ -119,7 +123,7 @@ JumbleQuery(Query *query, const char *querytext)
jstate->jumble_len = 0;
jstate->clocations_buf_size = 32;
jstate->clocations = (LocationLen *)
- palloc(jstate->clocations_buf_size * sizeof(LocationLen));
+ palloc0(jstate->clocations_buf_size * sizeof(LocationLen));
jstate->clocations_count = 0;
jstate->highest_extern_param_id = 0;
@@ -341,6 +345,90 @@ JumbleRowMarks(JumbleState *jstate, List *rowMarks)
}
}
+/*
+ * Jubmle a list of expressions
+ *
+ * This function enforces const_merge_threshold limitation, i.e. if the
+ * provided list contains only constant expressions and its length is greater
+ * than or equal to const_merge_threshold, such list will not contribute to
+ * jumble. Otherwise it falls back to JumbleExpr.
+ */
+static void
+JumbleExprList(JumbleState *jstate, List *elements)
+{
+ ListCell *temp;
+ Node *firstExpr = NULL;
+ bool allConst = true;
+
+ if (elements == NULL)
+ return;
+
+ if (const_merge_threshold == 0)
+ {
+ /* Merging is disabled, process everything one by one. */
+ JumbleExpr(jstate, (Node *) elements);
+ return;
+ }
+
+ if (elements->length < const_merge_threshold)
+ {
+ /* The list is not large enough to collapse it. */
+ JumbleExpr(jstate, (Node *) elements);
+ return;
+ }
+
+ /* Guard against stack overflow due to overly complex expressions */
+ check_stack_depth();
+
+ firstExpr = linitial(elements);
+
+ /*
+ * We always emit the node's NodeTag, then any additional fields that are
+ * considered significant, and then we recurse to any child nodes.
+ */
+ APP_JUMB(elements->type);
+
+ /*
+ * If the first expression is a constant, verify if the following elements
+ * are constants as well. If yes, the list is eligible for collapsing --
+ * mark it as merged and return from the function.
+ */
+ if (IsA(firstExpr, Const))
+ {
+ foreach(temp, elements)
+ {
+ Node *expr = (Node *) lfirst(temp);
+
+ if (!IsA(expr, Const))
+ {
+ allConst = false;
+ break;
+ }
+ }
+
+ if (allConst)
+ {
+ Const *firstConst = (Const *) firstExpr;
+ Const *lastConst = llast_node(Const, elements);
+
+ /*
+ * First and last constants are needed to identify which part of
+ * the query to skip in generate_normalized_query.
+ */
+ RecordConstLocation(jstate, firstConst->location, true);
+ RecordConstLocation(jstate, lastConst->location, true);
+ return;
+ }
+ }
+
+ /*
+ * If we end up here, it means no constants merging is possible, process
+ * the list as usual.
+ */
+ JumbleExpr(jstate, (Node *) elements);
+ return;
+}
+
/*
* Jumble an expression tree
*
@@ -390,7 +478,7 @@ JumbleExpr(JumbleState *jstate, Node *node)
/* We jumble only the constant's type, not its value */
APP_JUMB(c->consttype);
/* Also, record its parse location for query normalization */
- RecordConstLocation(jstate, c->location);
+ RecordConstLocation(jstate, c->location, false);
}
break;
case T_Param:
@@ -579,7 +667,7 @@ JumbleExpr(JumbleState *jstate, Node *node)
}
break;
case T_ArrayExpr:
- JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements);
+ JumbleExprList(jstate, (List *) ((ArrayExpr *) node)->elements);
break;
case T_RowExpr:
JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args);
@@ -904,11 +992,13 @@ JumbleExpr(JumbleState *jstate, Node *node)
}
/*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked. Merged argument signals that the constant do not
+ * contribute to the jumble hash, and any reader of constants array may want to
+ * use this information to represent such constants differently.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -923,6 +1013,7 @@ RecordConstLocation(JumbleState *jstate, int location)
sizeof(LocationLen));
}
jstate->clocations[jstate->clocations_count].location = location;
+ jstate->clocations[jstate->clocations_count].merged = merged;
/* initialize lengths to -1 to simplify third-party module usage */
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
diff --git a/src/include/utils/queryjumble.h b/src/include/utils/queryjumble.h
index 3c2d9beab2..b50cc42d4e 100644
--- a/src/include/utils/queryjumble.h
+++ b/src/include/utils/queryjumble.h
@@ -15,6 +15,7 @@
#define QUERYJUBLE_H
#include "nodes/parsenodes.h"
+#include "nodes/nodeFuncs.h"
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
@@ -25,6 +26,8 @@ typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+ bool merged; /* whether or not the location was marked as
+ not contributing to jumble */
} LocationLen;
/*
@@ -63,7 +66,7 @@ enum ComputeQueryIdType
/* GUC parameters */
extern PGDLLIMPORT int compute_query_id;
-
+extern PGDLLIMPORT int const_merge_threshold;
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query, const char *querytext);
--
2.32.0
Hello!
Unfortunately the patch needs another rebase due to the recent split of guc.c (0a20ff54f5e66158930d5328f89f087d4e9ab400)
I'm reviewing a patch on top of a previous commit and noticed a failed test:
# Failed test 'no parameters missing from postgresql.conf.sample'
# at t/003_check_guc.pl line 82.
# got: '1'
# expected: '0'
# Looks like you failed 1 test of 3.
t/003_check_guc.pl ..............
The new option has not been added to the postgresql.conf.sample
PS: I would also like to have such a feature. It's hard to increase pg_stat_statements.max or lose some entries just because some ORM sends requests with a different number of parameters.
regards, Sergei
On Fri, Sep 16, 2022 at 09:25:13PM +0300, Sergei Kornilov wrote:
Hello!Unfortunately the patch needs another rebase due to the recent split of guc.c (0a20ff54f5e66158930d5328f89f087d4e9ab400)
I'm reviewing a patch on top of a previous commit and noticed a failed test:
# Failed test 'no parameters missing from postgresql.conf.sample'
# at t/003_check_guc.pl line 82.
# got: '1'
# expected: '0'
# Looks like you failed 1 test of 3.
t/003_check_guc.pl ..............The new option has not been added to the postgresql.conf.sample
PS: I would also like to have such a feature. It's hard to increase pg_stat_statements.max or lose some entries just because some ORM sends requests with a different number of parameters.
Thanks! I'll post the rebased version soon.
On Sat, Sep 24, 2022 at 04:07:14PM +0200, Dmitry Dolgov wrote:
On Fri, Sep 16, 2022 at 09:25:13PM +0300, Sergei Kornilov wrote:
Hello!Unfortunately the patch needs another rebase due to the recent split of guc.c (0a20ff54f5e66158930d5328f89f087d4e9ab400)
I'm reviewing a patch on top of a previous commit and noticed a failed test:
# Failed test 'no parameters missing from postgresql.conf.sample'
# at t/003_check_guc.pl line 82.
# got: '1'
# expected: '0'
# Looks like you failed 1 test of 3.
t/003_check_guc.pl ..............The new option has not been added to the postgresql.conf.sample
PS: I would also like to have such a feature. It's hard to increase pg_stat_statements.max or lose some entries just because some ORM sends requests with a different number of parameters.
Thanks! I'll post the rebased version soon.
And here it is.
Attachments:
v9-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/plain; charset=us-asciiDownload
From 327673290bfad8cebc00f9706a25d11034d2245d Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Sun, 24 Jul 2022 11:43:25 +0200
Subject: [PATCH v9] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on number of parameters, because every element of ArrayExpr is
jumbled. In certain situations it's undesirable, especially if the list
becomes too large.
Make Const expressions contribute nothing to the jumble hash if they're
a part of an ArrayExpr, which length is larger than specified threshold.
Allow to configure the threshold via the new GUC const_merge_threshold
with the default value zero, which disables this feature.
Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane
Tested-by: Chengxi Sun
---
.../expected/pg_stat_statements.out | 412 ++++++++++++++++++
.../pg_stat_statements/pg_stat_statements.c | 33 +-
.../sql/pg_stat_statements.sql | 107 +++++
doc/src/sgml/config.sgml | 26 ++
doc/src/sgml/pgstatstatements.sgml | 28 +-
src/backend/utils/misc/guc_tables.c | 13 +
src/backend/utils/misc/postgresql.conf.sample | 2 +-
src/backend/utils/misc/queryjumble.c | 105 ++++-
src/include/utils/queryjumble.h | 5 +-
9 files changed, 712 insertions(+), 19 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out
index ff0166fb9d..858cf49e66 100644
--- a/contrib/pg_stat_statements/expected/pg_stat_statements.out
+++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out
@@ -1102,4 +1102,416 @@ SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%';
2
(1 row)
+--
+-- Consts merging
+--
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+--------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(7 rows)
+
+-- Normal
+SET const_merge_threshold = 5;
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 6
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 6
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) and data = $3 | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- Test find_const_walker
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result
+--------
+(0 rows)
+
+RESET const_merge_threshold;
DROP EXTENSION pg_stat_statements;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index ba868f0de9..5554581475 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2653,6 +2653,9 @@ generate_normalized_query(JumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool skip = false; /* Signals that certain constants are
+ merged together and have to be skipped */
+
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -2676,7 +2679,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
{
int off, /* Offset from start for cur tok */
tok_len; /* Length (in bytes) of that tok */
-
off = jstate->clocations[i].location;
/* Adjust recorded location if we're dealing with partial string */
off -= query_loc;
@@ -2691,12 +2693,31 @@ generate_normalized_query(JumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ /* Normal path, non merged constant */
+ if (!jstate->clocations[i].merged)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id);
+
+ /* In case previous constants were merged away, stop doing that */
+ if (skip)
+ skip = false;
+ }
+ /* The firsts merged constant */
+ else if (!skip)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* Skip the following until a non merged constant appear */
+ skip = true;
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ }
quer_loc = off + tok_len;
last_off = off;
diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
index a01f183727..9ff50b52d3 100644
--- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql
+++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
@@ -442,4 +442,111 @@ SELECT (
SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%';
+--
+-- Consts merging
+--
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- No merging
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal
+SET const_merge_threshold = 5;
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test find_const_walker
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+RESET const_merge_threshold;
+
DROP EXTENSION pg_stat_statements;
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index a5cd4e44c7..ee388d8374 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8175,6 +8175,32 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
</listitem>
</varlistentry>
+ <varlistentry id="guc-const-merge-threshold" xreflabel="const_merge_treshold">
+ <term><varname>const_merge_threshold</varname> (<type>integer</type>)
+ <indexterm>
+ <primary><varname>const_merge_threshold</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Specifies the minimal length of an array to be eligible for constants
+ collapsing. Normally every element of an array contributes to a query
+ identifier, which means the same query containing an array of constants
+ could get multiple different identifiers, depending of size of the
+ array. If this parameter is nonzero, the array contains only constants
+ and it's length is larger than <varname> const_merge_threshold </varname>,
+ then array elements will contribure nothing to the query identifier.
+ Thus the query will get the same identifier no matter how many constants
+ it contains.
+
+ Zero turns off collapsing, and it is the default value.
+
+ The <xref linkend="pgstatstatements"/> extension will represent such
+ collapsed constants via <literal>'(...)'</literal>.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
</sect2>
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index ecf6cd6bf3..a7b1d2863b 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -519,10 +519,30 @@
<para>
In some cases, queries with visibly different texts might get merged into a
single <structname>pg_stat_statements</structname> entry. Normally this will happen
- only for semantically equivalent queries, but there is a small chance of
- hash collisions causing unrelated queries to be merged into one entry.
- (This cannot happen for queries belonging to different users or databases,
- however.)
+ only for semantically equivalent queries, for example when queries are
+ different only in values of constants they use. Another valid possibility for
+ merging queries into a single <structname>pg_stat_statements</structname>
+ entry is when <xref linkend="guc-const-merge-threshold"/> is nonzero and the
+ queries contain an array with more than <varname>const_merge_threshold</varname>
+ constants in it:
+
+<screen>
+=# SET const_merge_threshold = 5;
+=# SELECT pg_stat_statements_reset();
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7);
+=# SELECT query, calls FROM pg_stat_statements;
+-[ RECORD 1 ]------------------------------
+query | SELECT * FROM test WHERE a IN (...)
+calls | 2
+-[ RECORD 2 ]------------------------------
+query | SELECT pg_stat_statements_reset()
+calls | 1
+</screen>
+
+ But there is a small chance of hash collisions causing unrelated queries to
+ be merged into one entry. (This cannot happen for queries belonging to
+ different users or databases, however.)
</para>
<para>
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 87e625aa7a..bff8fcaaf9 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -3415,6 +3415,19 @@ struct config_int ConfigureNamesInt[] =
NULL, NULL, NULL
},
+ {
+ {"const_merge_threshold", PGC_SUSET, STATS_MONITORING,
+ gettext_noop("Sets the minimal numer of constants in an array"
+ " after which they will be merged"),
+ gettext_noop("Computing query id for an array of constants"
+ " will produce the same id for all arrays with length"
+ " larger than this value. Zero turns off merging."),
+ },
+ &const_merge_threshold,
+ 0, 0, INT_MAX,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 90bec0502c..806e74d61b 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -626,7 +626,7 @@
#log_parser_stats = off
#log_planner_stats = off
#log_executor_stats = off
-
+#const_merge_threshold = 0
#------------------------------------------------------------------------------
# AUTOVACUUM
diff --git a/src/backend/utils/misc/queryjumble.c b/src/backend/utils/misc/queryjumble.c
index a67487e5fe..063b4be725 100644
--- a/src/backend/utils/misc/queryjumble.c
+++ b/src/backend/utils/misc/queryjumble.c
@@ -42,6 +42,9 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+/* Minimal numer of constants in an array after which they will be merged */
+int const_merge_threshold = 0;
+
/* True when compute_query_id is ON, or AUTO and a module requests them */
bool query_id_enabled = false;
@@ -52,7 +55,8 @@ static void JumbleQueryInternal(JumbleState *jstate, Query *query);
static void JumbleRangeTable(JumbleState *jstate, List *rtable);
static void JumbleRowMarks(JumbleState *jstate, List *rowMarks);
static void JumbleExpr(JumbleState *jstate, Node *node);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void JumbleExprList(JumbleState *jstate, List *node);
+static void RecordConstLocation(JumbleState *jstate, int location, bool merged);
/*
* Given a possibly multi-statement source string, confine our attention to the
@@ -119,7 +123,7 @@ JumbleQuery(Query *query, const char *querytext)
jstate->jumble_len = 0;
jstate->clocations_buf_size = 32;
jstate->clocations = (LocationLen *)
- palloc(jstate->clocations_buf_size * sizeof(LocationLen));
+ palloc0(jstate->clocations_buf_size * sizeof(LocationLen));
jstate->clocations_count = 0;
jstate->highest_extern_param_id = 0;
@@ -341,6 +345,90 @@ JumbleRowMarks(JumbleState *jstate, List *rowMarks)
}
}
+/*
+ * Jubmle a list of expressions
+ *
+ * This function enforces const_merge_threshold limitation, i.e. if the
+ * provided list contains only constant expressions and its length is greater
+ * than or equal to const_merge_threshold, such list will not contribute to
+ * jumble. Otherwise it falls back to JumbleExpr.
+ */
+static void
+JumbleExprList(JumbleState *jstate, List *elements)
+{
+ ListCell *temp;
+ Node *firstExpr = NULL;
+ bool allConst = true;
+
+ if (elements == NULL)
+ return;
+
+ if (const_merge_threshold == 0)
+ {
+ /* Merging is disabled, process everything one by one. */
+ JumbleExpr(jstate, (Node *) elements);
+ return;
+ }
+
+ if (elements->length < const_merge_threshold)
+ {
+ /* The list is not large enough to collapse it. */
+ JumbleExpr(jstate, (Node *) elements);
+ return;
+ }
+
+ /* Guard against stack overflow due to overly complex expressions */
+ check_stack_depth();
+
+ firstExpr = linitial(elements);
+
+ /*
+ * We always emit the node's NodeTag, then any additional fields that are
+ * considered significant, and then we recurse to any child nodes.
+ */
+ APP_JUMB(elements->type);
+
+ /*
+ * If the first expression is a constant, verify if the following elements
+ * are constants as well. If yes, the list is eligible for collapsing --
+ * mark it as merged and return from the function.
+ */
+ if (IsA(firstExpr, Const))
+ {
+ foreach(temp, elements)
+ {
+ Node *expr = (Node *) lfirst(temp);
+
+ if (!IsA(expr, Const))
+ {
+ allConst = false;
+ break;
+ }
+ }
+
+ if (allConst)
+ {
+ Const *firstConst = (Const *) firstExpr;
+ Const *lastConst = llast_node(Const, elements);
+
+ /*
+ * First and last constants are needed to identify which part of
+ * the query to skip in generate_normalized_query.
+ */
+ RecordConstLocation(jstate, firstConst->location, true);
+ RecordConstLocation(jstate, lastConst->location, true);
+ return;
+ }
+ }
+
+ /*
+ * If we end up here, it means no constants merging is possible, process
+ * the list as usual.
+ */
+ JumbleExpr(jstate, (Node *) elements);
+ return;
+}
+
/*
* Jumble an expression tree
*
@@ -390,7 +478,7 @@ JumbleExpr(JumbleState *jstate, Node *node)
/* We jumble only the constant's type, not its value */
APP_JUMB(c->consttype);
/* Also, record its parse location for query normalization */
- RecordConstLocation(jstate, c->location);
+ RecordConstLocation(jstate, c->location, false);
}
break;
case T_Param:
@@ -579,7 +667,7 @@ JumbleExpr(JumbleState *jstate, Node *node)
}
break;
case T_ArrayExpr:
- JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements);
+ JumbleExprList(jstate, (List *) ((ArrayExpr *) node)->elements);
break;
case T_RowExpr:
JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args);
@@ -832,11 +920,13 @@ JumbleExpr(JumbleState *jstate, Node *node)
}
/*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked. Merged argument signals that the constant do not
+ * contribute to the jumble hash, and any reader of constants array may want to
+ * use this information to represent such constants differently.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -851,6 +941,7 @@ RecordConstLocation(JumbleState *jstate, int location)
sizeof(LocationLen));
}
jstate->clocations[jstate->clocations_count].location = location;
+ jstate->clocations[jstate->clocations_count].merged = merged;
/* initialize lengths to -1 to simplify third-party module usage */
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
diff --git a/src/include/utils/queryjumble.h b/src/include/utils/queryjumble.h
index 3c2d9beab2..b50cc42d4e 100644
--- a/src/include/utils/queryjumble.h
+++ b/src/include/utils/queryjumble.h
@@ -15,6 +15,7 @@
#define QUERYJUBLE_H
#include "nodes/parsenodes.h"
+#include "nodes/nodeFuncs.h"
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
@@ -25,6 +26,8 @@ typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+ bool merged; /* whether or not the location was marked as
+ not contributing to jumble */
} LocationLen;
/*
@@ -63,7 +66,7 @@ enum ComputeQueryIdType
/* GUC parameters */
extern PGDLLIMPORT int compute_query_id;
-
+extern PGDLLIMPORT int const_merge_threshold;
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query, const char *querytext);
--
2.27.0
On Sun, 25 Sept 2022 at 05:29, Dmitry Dolgov <9erthalion6@gmail.com> wrote:
On Sat, Sep 24, 2022 at 04:07:14PM +0200, Dmitry Dolgov wrote:
On Fri, Sep 16, 2022 at 09:25:13PM +0300, Sergei Kornilov wrote:
Hello!Unfortunately the patch needs another rebase due to the recent split of guc.c (0a20ff54f5e66158930d5328f89f087d4e9ab400)
I'm reviewing a patch on top of a previous commit and noticed a failed test:
# Failed test 'no parameters missing from postgresql.conf.sample'
# at t/003_check_guc.pl line 82.
# got: '1'
# expected: '0'
# Looks like you failed 1 test of 3.
t/003_check_guc.pl ..............The new option has not been added to the postgresql.conf.sample
PS: I would also like to have such a feature. It's hard to increase pg_stat_statements.max or lose some entries just because some ORM sends requests with a different number of parameters.
Thanks! I'll post the rebased version soon.
The patch does not apply on top of HEAD as in [1]http://cfbot.cputube.org/patch_41_2837.log, please post a rebased patch:
=== Applying patches on top of PostgreSQL commit ID
456fa635a909ee36f73ca84d340521bd730f265f ===
=== applying patch
./v9-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patch
....
can't find file to patch at input line 746
Perhaps you used the wrong -p or --strip option?
The text leading up to this was:
--------------------------
|diff --git a/src/backend/utils/misc/queryjumble.c
b/src/backend/utils/misc/queryjumble.c
|index a67487e5fe..063b4be725 100644
|--- a/src/backend/utils/misc/queryjumble.c
|+++ b/src/backend/utils/misc/queryjumble.c
--------------------------
No file to patch. Skipping patch.
8 out of 8 hunks ignored
can't find file to patch at input line 913
Perhaps you used the wrong -p or --strip option?
The text leading up to this was:
--------------------------
|diff --git a/src/include/utils/queryjumble.h b/src/include/utils/queryjumble.h
|index 3c2d9beab2..b50cc42d4e 100644
|--- a/src/include/utils/queryjumble.h
|+++ b/src/include/utils/queryjumble.h
--------------------------
No file to patch. Skipping patch.
[1]: http://cfbot.cputube.org/patch_41_2837.log
Regards,
Vignesh
On Fri, Jan 27, 2023 at 08:15:29PM +0530, vignesh C wrote:
The patch does not apply on top of HEAD as in [1], please post a rebased patch:
Thanks. I think this one should do the trick.
Attachments:
v10-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/x-diff; charset=us-asciiDownload
From 3c51561ddaecdbc82842fae4fab74cc33526f17c Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Sun, 24 Jul 2022 11:43:25 +0200
Subject: [PATCH v10] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on number of parameters, because every element of ArrayExpr is
jumbled. In certain situations it's undesirable, especially if the list
becomes too large.
Make Const expressions contribute nothing to the jumble hash if they're
a part of an ArrayExpr, which length is larger than specified threshold.
Allow to configure the threshold via the new GUC const_merge_threshold
with the default value zero, which disables this feature.
Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane
Tested-by: Chengxi Sun
---
.../expected/pg_stat_statements.out | 412 ++++++++++++++++++
.../pg_stat_statements/pg_stat_statements.c | 33 +-
.../sql/pg_stat_statements.sql | 107 +++++
doc/src/sgml/config.sgml | 26 ++
doc/src/sgml/pgstatstatements.sgml | 28 +-
src/backend/nodes/queryjumblefuncs.c | 105 ++++-
src/backend/utils/misc/guc_tables.c | 13 +
src/backend/utils/misc/postgresql.conf.sample | 2 +-
src/include/nodes/queryjumble.h | 5 +-
9 files changed, 712 insertions(+), 19 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out
index 9ac5c87c3a..f18f34ae5b 100644
--- a/contrib/pg_stat_statements/expected/pg_stat_statements.out
+++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out
@@ -1141,4 +1141,416 @@ SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%';
2
(1 row)
+--
+-- Consts merging
+--
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+--------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(7 rows)
+
+-- Normal
+SET const_merge_threshold = 5;
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 6
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 6
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) and data = $3 | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- Test find_const_walker
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result
+--------
+(0 rows)
+
+RESET const_merge_threshold;
DROP EXTENSION pg_stat_statements;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index ad1fe44496..b26ae1f234 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2666,6 +2666,9 @@ generate_normalized_query(JumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool skip = false; /* Signals that certain constants are
+ merged together and have to be skipped */
+
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -2689,7 +2692,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
{
int off, /* Offset from start for cur tok */
tok_len; /* Length (in bytes) of that tok */
-
off = jstate->clocations[i].location;
/* Adjust recorded location if we're dealing with partial string */
off -= query_loc;
@@ -2704,12 +2706,31 @@ generate_normalized_query(JumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ /* Normal path, non merged constant */
+ if (!jstate->clocations[i].merged)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id);
+
+ /* In case previous constants were merged away, stop doing that */
+ if (skip)
+ skip = false;
+ }
+ /* The firsts merged constant */
+ else if (!skip)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* Skip the following until a non merged constant appear */
+ skip = true;
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ }
quer_loc = off + tok_len;
last_off = off;
diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
index 8f5c866225..8f9d284ed3 100644
--- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql
+++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
@@ -464,4 +464,111 @@ SELECT (
SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%';
+--
+-- Consts merging
+--
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- No merging
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal
+SET const_merge_threshold = 5;
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test find_const_walker
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+RESET const_merge_threshold;
+
DROP EXTENSION pg_stat_statements;
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index f985afc009..270107926c 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8278,6 +8278,32 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
</listitem>
</varlistentry>
+ <varlistentry id="guc-const-merge-threshold" xreflabel="const_merge_treshold">
+ <term><varname>const_merge_threshold</varname> (<type>integer</type>)
+ <indexterm>
+ <primary><varname>const_merge_threshold</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Specifies the minimal length of an array to be eligible for constants
+ collapsing. Normally every element of an array contributes to a query
+ identifier, which means the same query containing an array of constants
+ could get multiple different identifiers, depending of size of the
+ array. If this parameter is nonzero, the array contains only constants
+ and it's length is larger than <varname> const_merge_threshold </varname>,
+ then array elements will contribure nothing to the query identifier.
+ Thus the query will get the same identifier no matter how many constants
+ it contains.
+
+ Zero turns off collapsing, and it is the default value.
+
+ The <xref linkend="pgstatstatements"/> extension will represent such
+ collapsed constants via <literal>'(...)'</literal>.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
</sect2>
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index efc36da602..f7e2e9fe85 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -519,10 +519,30 @@
<para>
In some cases, queries with visibly different texts might get merged into a
single <structname>pg_stat_statements</structname> entry. Normally this will happen
- only for semantically equivalent queries, but there is a small chance of
- hash collisions causing unrelated queries to be merged into one entry.
- (This cannot happen for queries belonging to different users or databases,
- however.)
+ only for semantically equivalent queries, for example when queries are
+ different only in values of constants they use. Another valid possibility for
+ merging queries into a single <structname>pg_stat_statements</structname>
+ entry is when <xref linkend="guc-const-merge-threshold"/> is nonzero and the
+ queries contain an array with more than <varname>const_merge_threshold</varname>
+ constants in it:
+
+<screen>
+=# SET const_merge_threshold = 5;
+=# SELECT pg_stat_statements_reset();
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7);
+=# SELECT query, calls FROM pg_stat_statements;
+-[ RECORD 1 ]------------------------------
+query | SELECT * FROM test WHERE a IN (...)
+calls | 2
+-[ RECORD 2 ]------------------------------
+query | SELECT pg_stat_statements_reset()
+calls | 1
+</screen>
+
+ But there is a small chance of hash collisions causing unrelated queries to
+ be merged into one entry. (This cannot happen for queries belonging to
+ different users or databases, however.)
</para>
<para>
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index 16084842a3..1ea1cc66f8 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -42,6 +42,9 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+/* Minimal numer of constants in an array after which they will be merged */
+int const_merge_threshold = 0;
+
/* True when compute_query_id is ON, or AUTO and a module requests them */
bool query_id_enabled = false;
@@ -53,7 +56,8 @@ static void JumbleQueryInternal(JumbleState *jstate, Query *query);
static void JumbleRangeTable(JumbleState *jstate, List *rtable);
static void JumbleRowMarks(JumbleState *jstate, List *rowMarks);
static void JumbleExpr(JumbleState *jstate, Node *node);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void JumbleExprList(JumbleState *jstate, List *node);
+static void RecordConstLocation(JumbleState *jstate, int location, bool merged);
/*
* Given a possibly multi-statement source string, confine our attention to the
@@ -120,7 +124,7 @@ JumbleQuery(Query *query, const char *querytext)
jstate->jumble_len = 0;
jstate->clocations_buf_size = 32;
jstate->clocations = (LocationLen *)
- palloc(jstate->clocations_buf_size * sizeof(LocationLen));
+ palloc0(jstate->clocations_buf_size * sizeof(LocationLen));
jstate->clocations_count = 0;
jstate->highest_extern_param_id = 0;
@@ -343,6 +347,90 @@ JumbleRowMarks(JumbleState *jstate, List *rowMarks)
}
}
+/*
+ * Jubmle a list of expressions
+ *
+ * This function enforces const_merge_threshold limitation, i.e. if the
+ * provided list contains only constant expressions and its length is greater
+ * than or equal to const_merge_threshold, such list will not contribute to
+ * jumble. Otherwise it falls back to JumbleExpr.
+ */
+static void
+JumbleExprList(JumbleState *jstate, List *elements)
+{
+ ListCell *temp;
+ Node *firstExpr = NULL;
+ bool allConst = true;
+
+ if (elements == NULL)
+ return;
+
+ if (const_merge_threshold == 0)
+ {
+ /* Merging is disabled, process everything one by one. */
+ JumbleExpr(jstate, (Node *) elements);
+ return;
+ }
+
+ if (elements->length < const_merge_threshold)
+ {
+ /* The list is not large enough to collapse it. */
+ JumbleExpr(jstate, (Node *) elements);
+ return;
+ }
+
+ /* Guard against stack overflow due to overly complex expressions */
+ check_stack_depth();
+
+ firstExpr = linitial(elements);
+
+ /*
+ * We always emit the node's NodeTag, then any additional fields that are
+ * considered significant, and then we recurse to any child nodes.
+ */
+ APP_JUMB(elements->type);
+
+ /*
+ * If the first expression is a constant, verify if the following elements
+ * are constants as well. If yes, the list is eligible for collapsing --
+ * mark it as merged and return from the function.
+ */
+ if (IsA(firstExpr, Const))
+ {
+ foreach(temp, elements)
+ {
+ Node *expr = (Node *) lfirst(temp);
+
+ if (!IsA(expr, Const))
+ {
+ allConst = false;
+ break;
+ }
+ }
+
+ if (allConst)
+ {
+ Const *firstConst = (Const *) firstExpr;
+ Const *lastConst = llast_node(Const, elements);
+
+ /*
+ * First and last constants are needed to identify which part of
+ * the query to skip in generate_normalized_query.
+ */
+ RecordConstLocation(jstate, firstConst->location, true);
+ RecordConstLocation(jstate, lastConst->location, true);
+ return;
+ }
+ }
+
+ /*
+ * If we end up here, it means no constants merging is possible, process
+ * the list as usual.
+ */
+ JumbleExpr(jstate, (Node *) elements);
+ return;
+}
+
/*
* Jumble an expression tree
*
@@ -392,7 +480,7 @@ JumbleExpr(JumbleState *jstate, Node *node)
/* We jumble only the constant's type, not its value */
APP_JUMB(c->consttype);
/* Also, record its parse location for query normalization */
- RecordConstLocation(jstate, c->location);
+ RecordConstLocation(jstate, c->location, false);
}
break;
case T_Param:
@@ -581,7 +669,7 @@ JumbleExpr(JumbleState *jstate, Node *node)
}
break;
case T_ArrayExpr:
- JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements);
+ JumbleExprList(jstate, (List *) ((ArrayExpr *) node)->elements);
break;
case T_RowExpr:
JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args);
@@ -835,11 +923,13 @@ JumbleExpr(JumbleState *jstate, Node *node)
}
/*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked. Merged argument signals that the constant do not
+ * contribute to the jumble hash, and any reader of constants array may want to
+ * use this information to represent such constants differently.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -854,6 +944,7 @@ RecordConstLocation(JumbleState *jstate, int location)
sizeof(LocationLen));
}
jstate->clocations[jstate->clocations_count].location = location;
+ jstate->clocations[jstate->clocations_count].merged = merged;
/* initialize lengths to -1 to simplify third-party module usage */
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 4ac808ed22..663aded290 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -3467,6 +3467,19 @@ struct config_int ConfigureNamesInt[] =
NULL, NULL, NULL
},
+ {
+ {"const_merge_threshold", PGC_SUSET, STATS_MONITORING,
+ gettext_noop("Sets the minimal numer of constants in an array"
+ " after which they will be merged"),
+ gettext_noop("Computing query id for an array of constants"
+ " will produce the same id for all arrays with length"
+ " larger than this value. Zero turns off merging."),
+ },
+ &const_merge_threshold,
+ 0, 0, INT_MAX,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index d06074b86f..0594eb17b2 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -627,7 +627,7 @@
#log_parser_stats = off
#log_planner_stats = off
#log_executor_stats = off
-
+#const_merge_threshold = 0
#------------------------------------------------------------------------------
# AUTOVACUUM
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 204b8f74fd..4410e2cf61 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -15,6 +15,7 @@
#define QUERYJUBLE_H
#include "nodes/parsenodes.h"
+#include "nodes/nodeFuncs.h"
/*
* Struct for tracking locations/lengths of constants during normalization
@@ -23,6 +24,8 @@ typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+ bool merged; /* whether or not the location was marked as
+ not contributing to jumble */
} LocationLen;
/*
@@ -61,7 +64,7 @@ enum ComputeQueryIdType
/* GUC parameters */
extern PGDLLIMPORT int compute_query_id;
-
+extern PGDLLIMPORT int const_merge_threshold;
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query, const char *querytext);
--
2.32.0
Em dom., 29 de jan. de 2023 às 09:24, Dmitry Dolgov <9erthalion6@gmail.com>
escreveu:
On Fri, Jan 27, 2023 at 08:15:29PM +0530, vignesh C wrote:
The patch does not apply on top of HEAD as in [1], please post a rebasedpatch:
Thanks. I think this one should do the trick.
There is a typo on DOC part
+ and it's length is larger than <varname> const_merge_threshold
</varname>,
+ then array elements will contribure nothing to the query
identifier.
+ Thus the query will get the same identifier no matter how many
constants
That "contribure" should be "contribute"
regards
Marcos
On Sun, Jan 29, 2023 at 09:56:02AM -0300, Marcos Pegoraro wrote:
Em dom., 29 de jan. de 2023 �s 09:24, Dmitry Dolgov <9erthalion6@gmail.com>
escreveu:On Fri, Jan 27, 2023 at 08:15:29PM +0530, vignesh C wrote:
The patch does not apply on top of HEAD as in [1], please post a rebasedpatch:
Thanks. I think this one should do the trick.
There is a typo on DOC part + and it's length is larger than <varname> const_merge_threshold </varname>, + then array elements will contribure nothing to the query identifier. + Thus the query will get the same identifier no matter how many constantsThat "contribure" should be "contribute"
Indeed, thanks for noticing.
Attachments:
v11-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/x-diff; charset=us-asciiDownload
From 1d980ef5f556c1684ea5c991965b2375bbdd139b Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Sun, 24 Jul 2022 11:43:25 +0200
Subject: [PATCH v11] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on number of parameters, because every element of ArrayExpr is
jumbled. In certain situations it's undesirable, especially if the list
becomes too large.
Make Const expressions contribute nothing to the jumble hash if they're
a part of an ArrayExpr, which length is larger than specified threshold.
Allow to configure the threshold via the new GUC const_merge_threshold
with the default value zero, which disables this feature.
Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane
Tested-by: Chengxi Sun
---
.../expected/pg_stat_statements.out | 412 ++++++++++++++++++
.../pg_stat_statements/pg_stat_statements.c | 33 +-
.../sql/pg_stat_statements.sql | 107 +++++
doc/src/sgml/config.sgml | 26 ++
doc/src/sgml/pgstatstatements.sgml | 28 +-
src/backend/nodes/queryjumblefuncs.c | 105 ++++-
src/backend/utils/misc/guc_tables.c | 13 +
src/backend/utils/misc/postgresql.conf.sample | 2 +-
src/include/nodes/queryjumble.h | 5 +-
9 files changed, 712 insertions(+), 19 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out
index 9ac5c87c3a..f18f34ae5b 100644
--- a/contrib/pg_stat_statements/expected/pg_stat_statements.out
+++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out
@@ -1141,4 +1141,416 @@ SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%';
2
(1 row)
+--
+-- Consts merging
+--
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+--------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(7 rows)
+
+-- Normal
+SET const_merge_threshold = 5;
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 6
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 6
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) and data = $3 | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- Test find_const_walker
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result
+--------
+(0 rows)
+
+RESET const_merge_threshold;
DROP EXTENSION pg_stat_statements;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index ad1fe44496..b26ae1f234 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2666,6 +2666,9 @@ generate_normalized_query(JumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool skip = false; /* Signals that certain constants are
+ merged together and have to be skipped */
+
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -2689,7 +2692,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
{
int off, /* Offset from start for cur tok */
tok_len; /* Length (in bytes) of that tok */
-
off = jstate->clocations[i].location;
/* Adjust recorded location if we're dealing with partial string */
off -= query_loc;
@@ -2704,12 +2706,31 @@ generate_normalized_query(JumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ /* Normal path, non merged constant */
+ if (!jstate->clocations[i].merged)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id);
+
+ /* In case previous constants were merged away, stop doing that */
+ if (skip)
+ skip = false;
+ }
+ /* The firsts merged constant */
+ else if (!skip)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* Skip the following until a non merged constant appear */
+ skip = true;
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ }
quer_loc = off + tok_len;
last_off = off;
diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
index 8f5c866225..8f9d284ed3 100644
--- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql
+++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
@@ -464,4 +464,111 @@ SELECT (
SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%';
+--
+-- Consts merging
+--
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- No merging
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal
+SET const_merge_threshold = 5;
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test find_const_walker
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+RESET const_merge_threshold;
+
DROP EXTENSION pg_stat_statements;
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index f985afc009..e4306cdb89 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8278,6 +8278,32 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
</listitem>
</varlistentry>
+ <varlistentry id="guc-const-merge-threshold" xreflabel="const_merge_treshold">
+ <term><varname>const_merge_threshold</varname> (<type>integer</type>)
+ <indexterm>
+ <primary><varname>const_merge_threshold</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Specifies the minimal length of an array to be eligible for constants
+ collapsing. Normally every element of an array contributes to a query
+ identifier, which means the same query containing an array of constants
+ could get multiple different identifiers, depending of size of the
+ array. If this parameter is nonzero, the array contains only constants
+ and it's length is larger than <varname> const_merge_threshold </varname>,
+ then array elements will contribute nothing to the query identifier.
+ Thus the query will get the same identifier no matter how many constants
+ it contains.
+
+ Zero turns off collapsing, and it is the default value.
+
+ The <xref linkend="pgstatstatements"/> extension will represent such
+ collapsed constants via <literal>'(...)'</literal>.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
</sect2>
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index efc36da602..f7e2e9fe85 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -519,10 +519,30 @@
<para>
In some cases, queries with visibly different texts might get merged into a
single <structname>pg_stat_statements</structname> entry. Normally this will happen
- only for semantically equivalent queries, but there is a small chance of
- hash collisions causing unrelated queries to be merged into one entry.
- (This cannot happen for queries belonging to different users or databases,
- however.)
+ only for semantically equivalent queries, for example when queries are
+ different only in values of constants they use. Another valid possibility for
+ merging queries into a single <structname>pg_stat_statements</structname>
+ entry is when <xref linkend="guc-const-merge-threshold"/> is nonzero and the
+ queries contain an array with more than <varname>const_merge_threshold</varname>
+ constants in it:
+
+<screen>
+=# SET const_merge_threshold = 5;
+=# SELECT pg_stat_statements_reset();
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7);
+=# SELECT query, calls FROM pg_stat_statements;
+-[ RECORD 1 ]------------------------------
+query | SELECT * FROM test WHERE a IN (...)
+calls | 2
+-[ RECORD 2 ]------------------------------
+query | SELECT pg_stat_statements_reset()
+calls | 1
+</screen>
+
+ But there is a small chance of hash collisions causing unrelated queries to
+ be merged into one entry. (This cannot happen for queries belonging to
+ different users or databases, however.)
</para>
<para>
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index 16084842a3..1ea1cc66f8 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -42,6 +42,9 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+/* Minimal numer of constants in an array after which they will be merged */
+int const_merge_threshold = 0;
+
/* True when compute_query_id is ON, or AUTO and a module requests them */
bool query_id_enabled = false;
@@ -53,7 +56,8 @@ static void JumbleQueryInternal(JumbleState *jstate, Query *query);
static void JumbleRangeTable(JumbleState *jstate, List *rtable);
static void JumbleRowMarks(JumbleState *jstate, List *rowMarks);
static void JumbleExpr(JumbleState *jstate, Node *node);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void JumbleExprList(JumbleState *jstate, List *node);
+static void RecordConstLocation(JumbleState *jstate, int location, bool merged);
/*
* Given a possibly multi-statement source string, confine our attention to the
@@ -120,7 +124,7 @@ JumbleQuery(Query *query, const char *querytext)
jstate->jumble_len = 0;
jstate->clocations_buf_size = 32;
jstate->clocations = (LocationLen *)
- palloc(jstate->clocations_buf_size * sizeof(LocationLen));
+ palloc0(jstate->clocations_buf_size * sizeof(LocationLen));
jstate->clocations_count = 0;
jstate->highest_extern_param_id = 0;
@@ -343,6 +347,90 @@ JumbleRowMarks(JumbleState *jstate, List *rowMarks)
}
}
+/*
+ * Jubmle a list of expressions
+ *
+ * This function enforces const_merge_threshold limitation, i.e. if the
+ * provided list contains only constant expressions and its length is greater
+ * than or equal to const_merge_threshold, such list will not contribute to
+ * jumble. Otherwise it falls back to JumbleExpr.
+ */
+static void
+JumbleExprList(JumbleState *jstate, List *elements)
+{
+ ListCell *temp;
+ Node *firstExpr = NULL;
+ bool allConst = true;
+
+ if (elements == NULL)
+ return;
+
+ if (const_merge_threshold == 0)
+ {
+ /* Merging is disabled, process everything one by one. */
+ JumbleExpr(jstate, (Node *) elements);
+ return;
+ }
+
+ if (elements->length < const_merge_threshold)
+ {
+ /* The list is not large enough to collapse it. */
+ JumbleExpr(jstate, (Node *) elements);
+ return;
+ }
+
+ /* Guard against stack overflow due to overly complex expressions */
+ check_stack_depth();
+
+ firstExpr = linitial(elements);
+
+ /*
+ * We always emit the node's NodeTag, then any additional fields that are
+ * considered significant, and then we recurse to any child nodes.
+ */
+ APP_JUMB(elements->type);
+
+ /*
+ * If the first expression is a constant, verify if the following elements
+ * are constants as well. If yes, the list is eligible for collapsing --
+ * mark it as merged and return from the function.
+ */
+ if (IsA(firstExpr, Const))
+ {
+ foreach(temp, elements)
+ {
+ Node *expr = (Node *) lfirst(temp);
+
+ if (!IsA(expr, Const))
+ {
+ allConst = false;
+ break;
+ }
+ }
+
+ if (allConst)
+ {
+ Const *firstConst = (Const *) firstExpr;
+ Const *lastConst = llast_node(Const, elements);
+
+ /*
+ * First and last constants are needed to identify which part of
+ * the query to skip in generate_normalized_query.
+ */
+ RecordConstLocation(jstate, firstConst->location, true);
+ RecordConstLocation(jstate, lastConst->location, true);
+ return;
+ }
+ }
+
+ /*
+ * If we end up here, it means no constants merging is possible, process
+ * the list as usual.
+ */
+ JumbleExpr(jstate, (Node *) elements);
+ return;
+}
+
/*
* Jumble an expression tree
*
@@ -392,7 +480,7 @@ JumbleExpr(JumbleState *jstate, Node *node)
/* We jumble only the constant's type, not its value */
APP_JUMB(c->consttype);
/* Also, record its parse location for query normalization */
- RecordConstLocation(jstate, c->location);
+ RecordConstLocation(jstate, c->location, false);
}
break;
case T_Param:
@@ -581,7 +669,7 @@ JumbleExpr(JumbleState *jstate, Node *node)
}
break;
case T_ArrayExpr:
- JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements);
+ JumbleExprList(jstate, (List *) ((ArrayExpr *) node)->elements);
break;
case T_RowExpr:
JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args);
@@ -835,11 +923,13 @@ JumbleExpr(JumbleState *jstate, Node *node)
}
/*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked. Merged argument signals that the constant do not
+ * contribute to the jumble hash, and any reader of constants array may want to
+ * use this information to represent such constants differently.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -854,6 +944,7 @@ RecordConstLocation(JumbleState *jstate, int location)
sizeof(LocationLen));
}
jstate->clocations[jstate->clocations_count].location = location;
+ jstate->clocations[jstate->clocations_count].merged = merged;
/* initialize lengths to -1 to simplify third-party module usage */
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 4ac808ed22..663aded290 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -3467,6 +3467,19 @@ struct config_int ConfigureNamesInt[] =
NULL, NULL, NULL
},
+ {
+ {"const_merge_threshold", PGC_SUSET, STATS_MONITORING,
+ gettext_noop("Sets the minimal numer of constants in an array"
+ " after which they will be merged"),
+ gettext_noop("Computing query id for an array of constants"
+ " will produce the same id for all arrays with length"
+ " larger than this value. Zero turns off merging."),
+ },
+ &const_merge_threshold,
+ 0, 0, INT_MAX,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index d06074b86f..0594eb17b2 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -627,7 +627,7 @@
#log_parser_stats = off
#log_planner_stats = off
#log_executor_stats = off
-
+#const_merge_threshold = 0
#------------------------------------------------------------------------------
# AUTOVACUUM
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 204b8f74fd..4410e2cf61 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -15,6 +15,7 @@
#define QUERYJUBLE_H
#include "nodes/parsenodes.h"
+#include "nodes/nodeFuncs.h"
/*
* Struct for tracking locations/lengths of constants during normalization
@@ -23,6 +24,8 @@ typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+ bool merged; /* whether or not the location was marked as
+ not contributing to jumble */
} LocationLen;
/*
@@ -61,7 +64,7 @@ enum ComputeQueryIdType
/* GUC parameters */
extern PGDLLIMPORT int compute_query_id;
-
+extern PGDLLIMPORT int const_merge_threshold;
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query, const char *querytext);
--
2.32.0
This appears to have massive conflicts. Would you please rebase?
--
Álvaro Herrera PostgreSQL Developer — https://www.EnterpriseDB.com/
"¿Cómo puedes confiar en algo que pagas y que no ves,
y no confiar en algo que te dan y te lo muestran?" (Germán Poo)
On Thu, Feb 02, 2023 at 03:07:27PM +0100, Alvaro Herrera wrote:
This appears to have massive conflicts. Would you please rebase?
Sure, I was already mentally preparing myself to do so in the view of
recent changes in query jumbling. Will post soon.
On Thu, Feb 02, 2023 at 04:05:54PM +0100, Dmitry Dolgov wrote:
On Thu, Feb 02, 2023 at 03:07:27PM +0100, Alvaro Herrera wrote:
This appears to have massive conflicts. Would you please rebase?Sure, I was already mentally preparing myself to do so in the view of
recent changes in query jumbling. Will post soon.
Here is the rebased version. To adapt to the latest changes, I've marked
ArrayExpr with custom_query_jumble to implement this functionality, but
tried to make the actual merge logic relatively independent. Otherwise,
everything is the same.
Attachments:
v12-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/x-diff; charset=us-asciiDownload
From e72f6b8990dace82667d46b3578062bee92af472 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Sun, 24 Jul 2022 11:43:25 +0200
Subject: [PATCH v12] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on number of parameters, because every element of ArrayExpr is
jumbled. In certain situations it's undesirable, especially if the list
becomes too large.
Make Const expressions contribute nothing to the jumble hash if they're
a part of an ArrayExpr, which length is larger than specified threshold.
Allow to configure the threshold via the new GUC const_merge_threshold
with the default value zero, which disables this feature.
Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane
Tested-by: Chengxi Sun
---
.../expected/pg_stat_statements.out | 412 ++++++++++++++++++
.../pg_stat_statements/pg_stat_statements.c | 33 +-
.../sql/pg_stat_statements.sql | 107 +++++
doc/src/sgml/config.sgml | 26 ++
doc/src/sgml/pgstatstatements.sgml | 28 +-
src/backend/nodes/gen_node_support.pl | 2 +-
src/backend/nodes/queryjumblefuncs.c | 102 ++++-
src/backend/utils/misc/guc_tables.c | 13 +
src/backend/utils/misc/postgresql.conf.sample | 2 +-
src/include/nodes/primnodes.h | 2 +
src/include/nodes/queryjumble.h | 5 +-
11 files changed, 712 insertions(+), 20 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out
index fb9ccd920f..9acdb55c9a 100644
--- a/contrib/pg_stat_statements/expected/pg_stat_statements.out
+++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out
@@ -1142,4 +1142,416 @@ SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%';
2
(1 row)
+--
+-- Consts merging
+--
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+--------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(7 rows)
+
+-- Normal
+SET const_merge_threshold = 5;
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 6
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 6
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) and data = $3 | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- Test find_const_walker
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result
+--------
+(0 rows)
+
+RESET const_merge_threshold;
DROP EXTENSION pg_stat_statements;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index ad1fe44496..b26ae1f234 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2666,6 +2666,9 @@ generate_normalized_query(JumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool skip = false; /* Signals that certain constants are
+ merged together and have to be skipped */
+
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -2689,7 +2692,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
{
int off, /* Offset from start for cur tok */
tok_len; /* Length (in bytes) of that tok */
-
off = jstate->clocations[i].location;
/* Adjust recorded location if we're dealing with partial string */
off -= query_loc;
@@ -2704,12 +2706,31 @@ generate_normalized_query(JumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ /* Normal path, non merged constant */
+ if (!jstate->clocations[i].merged)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id);
+
+ /* In case previous constants were merged away, stop doing that */
+ if (skip)
+ skip = false;
+ }
+ /* The firsts merged constant */
+ else if (!skip)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* Skip the following until a non merged constant appear */
+ skip = true;
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ }
quer_loc = off + tok_len;
last_off = off;
diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
index b82cddf16f..f561edbe83 100644
--- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql
+++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
@@ -465,4 +465,111 @@ SELECT (
SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%';
+--
+-- Consts merging
+--
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- No merging
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal
+SET const_merge_threshold = 5;
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test find_const_walker
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+RESET const_merge_threshold;
+
DROP EXTENSION pg_stat_statements;
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index d190be1925..501a132a4a 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8278,6 +8278,32 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
</listitem>
</varlistentry>
+ <varlistentry id="guc-const-merge-threshold" xreflabel="const_merge_treshold">
+ <term><varname>const_merge_threshold</varname> (<type>integer</type>)
+ <indexterm>
+ <primary><varname>const_merge_threshold</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Specifies the minimal length of an array to be eligible for constants
+ collapsing. Normally every element of an array contributes to a query
+ identifier, which means the same query containing an array of constants
+ could get multiple different identifiers, depending of size of the
+ array. If this parameter is nonzero, the array contains only constants
+ and it's length is larger than <varname> const_merge_threshold </varname>,
+ then array elements will contribute nothing to the query identifier.
+ Thus the query will get the same identifier no matter how many constants
+ it contains.
+
+ Zero turns off collapsing, and it is the default value.
+
+ The <xref linkend="pgstatstatements"/> extension will represent such
+ collapsed constants via <literal>'(...)'</literal>.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
</sect2>
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index efc36da602..f7e2e9fe85 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -519,10 +519,30 @@
<para>
In some cases, queries with visibly different texts might get merged into a
single <structname>pg_stat_statements</structname> entry. Normally this will happen
- only for semantically equivalent queries, but there is a small chance of
- hash collisions causing unrelated queries to be merged into one entry.
- (This cannot happen for queries belonging to different users or databases,
- however.)
+ only for semantically equivalent queries, for example when queries are
+ different only in values of constants they use. Another valid possibility for
+ merging queries into a single <structname>pg_stat_statements</structname>
+ entry is when <xref linkend="guc-const-merge-threshold"/> is nonzero and the
+ queries contain an array with more than <varname>const_merge_threshold</varname>
+ constants in it:
+
+<screen>
+=# SET const_merge_threshold = 5;
+=# SELECT pg_stat_statements_reset();
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7);
+=# SELECT query, calls FROM pg_stat_statements;
+-[ RECORD 1 ]------------------------------
+query | SELECT * FROM test WHERE a IN (...)
+calls | 2
+-[ RECORD 2 ]------------------------------
+query | SELECT pg_stat_statements_reset()
+calls | 1
+</screen>
+
+ But there is a small chance of hash collisions causing unrelated queries to
+ be merged into one entry. (This cannot happen for queries belonging to
+ different users or databases, however.)
</para>
<para>
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 19ed29657c..86c4223ace 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -1306,7 +1306,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
# Track the node's location only if directly requested.
if ($query_jumble_location)
{
- print $jff "\tJUMBLE_LOCATION($f);\n"
+ print $jff "\tJUMBLE_LOCATION($f, false);\n"
unless $query_jumble_ignore;
}
}
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index 223d1bc826..06551f6a1b 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -42,12 +42,15 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+/* Minimal numer of constants in an array after which they will be merged */
+int const_merge_threshold = 0;
+
/* True when compute_query_id is ON, or AUTO and a module requests them */
bool query_id_enabled = false;
static void AppendJumble(JumbleState *jstate,
const unsigned char *item, Size size);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void RecordConstLocation(JumbleState *jstate, int location, bool merged);
static void _jumbleNode(JumbleState *jstate, Node *node);
static void _jumbleList(JumbleState *jstate, Node *node);
static void _jumbleRangeTblEntry(JumbleState *jstate, Node *node);
@@ -109,7 +112,7 @@ JumbleQuery(Query *query, const char *querytext)
jstate->jumble_len = 0;
jstate->clocations_buf_size = 32;
jstate->clocations = (LocationLen *)
- palloc(jstate->clocations_buf_size * sizeof(LocationLen));
+ palloc0(jstate->clocations_buf_size * sizeof(LocationLen));
jstate->clocations_count = 0;
jstate->highest_extern_param_id = 0;
@@ -185,11 +188,13 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
}
/*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked. Merged argument signals that the constant do not
+ * contribute to the jumble hash, and any reader of constants array may want to
+ * use this information to represent such constants differently.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -205,15 +210,81 @@ RecordConstLocation(JumbleState *jstate, int location)
}
jstate->clocations[jstate->clocations_count].location = location;
/* initialize lengths to -1 to simplify third-party module usage */
+ jstate->clocations[jstate->clocations_count].merged = merged;
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
}
}
+/*
+ * Verify if the provided list contains only constant expressions and its
+ * length is greater than or equal to const_merge_threshold, thus making it
+ * eligible for constants merging.
+ *
+ * Note that this function searches only for Const directly and do not tries to
+ * simplify expressions.
+ */
+static bool
+IsConstList(List *elements, Const **firstConst, Const **lastConst)
+{
+ ListCell *temp;
+ Node *firstExpr = NULL;
+ bool allConst = true;
+
+ if (elements == NULL)
+ return false;
+
+ if (const_merge_threshold == 0)
+ {
+ /* Merging is disabled, process everything one by one. */
+ return false;
+ }
+
+ if (elements->length < const_merge_threshold)
+ {
+ /* The list is not large enough to collapse it. */
+ return false;
+ }
+
+ firstExpr = linitial(elements);
+
+ /*
+ * If the first expression is a constant, verify if the following elements
+ * are constants as well. If yes, the list is eligible for collapsing --
+ * mark it as merged and return from the function.
+ */
+ if (IsA(firstExpr, Const))
+ {
+ foreach(temp, elements)
+ {
+ Node *expr = (Node *) lfirst(temp);
+
+ if (!IsA(expr, Const))
+ {
+ allConst = false;
+ break;
+ }
+ }
+
+ if (allConst)
+ {
+ *firstConst = (Const *) firstExpr;
+ *lastConst = llast_node(Const, elements);
+ return true;
+ }
+ }
+
+ /*
+ * If we end up here, it means no constants merging is possible, process
+ * the list as usual.
+ */
+ return false;
+}
+
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
-#define JUMBLE_LOCATION(location) \
- RecordConstLocation(jstate, expr->location)
+#define JUMBLE_LOCATION(location, merged) \
+ RecordConstLocation(jstate, expr->location, merged)
#define JUMBLE_FIELD(item) \
AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
#define JUMBLE_FIELD_SINGLE(item) \
@@ -226,6 +297,23 @@ do { \
#include "queryjumblefuncs.funcs.c"
+static void
+_jumbleArrayExpr(JumbleState *jstate, Node *node)
+{
+ ArrayExpr *expr = (ArrayExpr *) node;
+ Const *first, *last;
+
+ if (IsConstList(expr->elements, &first, &last))
+ {
+ RecordConstLocation(jstate, first->location, true);
+ RecordConstLocation(jstate, last->location, true);
+ }
+ else
+ {
+ JUMBLE_NODE(elements);
+ }
+}
+
static void
_jumbleNode(JumbleState *jstate, Node *node)
{
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index b46e3b8c55..77ab38172b 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -3467,6 +3467,19 @@ struct config_int ConfigureNamesInt[] =
NULL, NULL, NULL
},
+ {
+ {"const_merge_threshold", PGC_SUSET, STATS_MONITORING,
+ gettext_noop("Sets the minimal numer of constants in an array"
+ " after which they will be merged"),
+ gettext_noop("Computing query id for an array of constants"
+ " will produce the same id for all arrays with length"
+ " larger than this value. Zero turns off merging."),
+ },
+ &const_merge_threshold,
+ 0, 0, INT_MAX,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index d06074b86f..0594eb17b2 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -627,7 +627,7 @@
#log_parser_stats = off
#log_planner_stats = off
#log_executor_stats = off
-
+#const_merge_threshold = 0
#------------------------------------------------------------------------------
# AUTOVACUUM
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 6d740be5c0..8e568947bd 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1287,6 +1287,8 @@ typedef struct CaseTestExpr
*/
typedef struct ArrayExpr
{
+ pg_node_attr(custom_query_jumble)
+
Expr xpr;
/* type of expression result */
Oid array_typeid pg_node_attr(query_jumble_ignore);
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 204b8f74fd..4410e2cf61 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -15,6 +15,7 @@
#define QUERYJUBLE_H
#include "nodes/parsenodes.h"
+#include "nodes/nodeFuncs.h"
/*
* Struct for tracking locations/lengths of constants during normalization
@@ -23,6 +24,8 @@ typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+ bool merged; /* whether or not the location was marked as
+ not contributing to jumble */
} LocationLen;
/*
@@ -61,7 +64,7 @@ enum ComputeQueryIdType
/* GUC parameters */
extern PGDLLIMPORT int compute_query_id;
-
+extern PGDLLIMPORT int const_merge_threshold;
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query, const char *querytext);
--
2.32.0
On Sat, Feb 04, 2023 at 06:08:41PM +0100, Dmitry Dolgov wrote:
Here is the rebased version. To adapt to the latest changes, I've marked
ArrayExpr with custom_query_jumble to implement this functionality, but
tried to make the actual merge logic relatively independent. Otherwise,
everything is the same.
I was quickly looking at this patch, so these are rough impressions.
+ bool merged; /* whether or not the location was marked as
+ not contributing to jumble */
This part of the patch is a bit disturbing.. We have node attributes
to track if portions of a node should be ignored or have a location
marked, still this "merged" flag is used as an extension to track if a
location should be done or not. Is that a concept that had better be
controlled via a new node attribute?
+--
+-- Consts merging
+--
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging
Would it be better to split this set of tests into a new file? FWIW,
I have a patch in baking process that refactors a bit the whole,
before being able to extend it so as we have more coverage for
normalized utility queries, as of now the query strings stored by
pg_stat_statements don't reflect that even if the jumbling computation
marks the location of the Const nodes included in utility statements
(partition bounds, queries of COPY, etc.). I should be able to send
that tomorrow, and my guess that you could take advantage of that
even for this thread.
--
Michael
On Sun, Feb 05, 2023 at 10:30:25AM +0900, Michael Paquier wrote:
On Sat, Feb 04, 2023 at 06:08:41PM +0100, Dmitry Dolgov wrote:Here is the rebased version. To adapt to the latest changes, I've marked
ArrayExpr with custom_query_jumble to implement this functionality, but
tried to make the actual merge logic relatively independent. Otherwise,
everything is the same.I was quickly looking at this patch, so these are rough impressions.
+ bool merged; /* whether or not the location was marked as + not contributing to jumble */This part of the patch is a bit disturbing.. We have node attributes
to track if portions of a node should be ignored or have a location
marked, still this "merged" flag is used as an extension to track if a
location should be done or not. Is that a concept that had better be
controlled via a new node attribute?
Good question. I need to think a bit more if it's possible to leverage
node attributes mechanism, but at the moment I'm still inclined to
extend LocationLen. The reason is that it doesn't exactly serve the
tracking purpose, i.e. whether to capture a location (I have to update
the code commentary), it helps differentiate cases when locations A and
D are obtained from merging A B C D instead of just being A and D.
I'm thinking about this in the following way: the core jumbling logic is
responsible for deriving locations based on the input expressions; in
the case of merging we produce less locations; pgss have to represent
the result only using locations and has to be able to differentiate
simple locations and locations after merging.
+-- +-- Consts merging +-- +CREATE TABLE test_merge (id int, data int); +-- IN queries +-- No mergingWould it be better to split this set of tests into a new file? FWIW,
I have a patch in baking process that refactors a bit the whole,
before being able to extend it so as we have more coverage for
normalized utility queries, as of now the query strings stored by
pg_stat_statements don't reflect that even if the jumbling computation
marks the location of the Const nodes included in utility statements
(partition bounds, queries of COPY, etc.). I should be able to send
that tomorrow, and my guess that you could take advantage of that
even for this thread.
Sure, I'll take a look how I can benefit from your work, thanks.
Dmitry Dolgov <9erthalion6@gmail.com> writes:
I'm thinking about this in the following way: the core jumbling logic is
responsible for deriving locations based on the input expressions; in
the case of merging we produce less locations; pgss have to represent
the result only using locations and has to be able to differentiate
simple locations and locations after merging.
Uh ... why? ISTM you're just going to elide all inside the IN,
so why do you need more than a start and stop position?
regards, tom lane
On Sun, Feb 05, 2023 at 11:02:32AM -0500, Tom Lane wrote:
Dmitry Dolgov <9erthalion6@gmail.com> writes:I'm thinking about this in the following way: the core jumbling logic is
responsible for deriving locations based on the input expressions; in
the case of merging we produce less locations; pgss have to represent
the result only using locations and has to be able to differentiate
simple locations and locations after merging.Uh ... why? ISTM you're just going to elide all inside the IN,
so why do you need more than a start and stop position?
Exactly, start and stop positions. But if there would be no information
that merging was applied, the following queries will look the same after
jumbling, right?
-- input query
SELECT * FROM test_merge WHERE id IN (1, 2);
-- jumbling result, two LocationLen, for values 1 and 2
SELECT * FROM test_merge WHERE id IN ($1, $2);
-- input query
SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
-- jumbling result, two LocationLen after merging, for values 1 and 10
SELECT * FROM test_merge WHERE id IN (...);
-- without remembering about merging the result would be
SELECT * FROM test_merge WHERE id IN ($1, $2);
Hello!
Unfortunately, rebase is needed again due to recent changes in queryjumblefuncs ( 9ba37b2cb6a174b37fc51d0649ef73e56eae27fc )
It seems a little strange to me that with const_merge_threshold = 1, such a test case gives the same result as with const_merge_threshold = 2
select pg_stat_statements_reset();
set const_merge_threshold to 1;
select * from test where i in (1,2,3);
select * from test where i in (1,2);
select * from test where i in (1);
select query, calls from pg_stat_statements order by query;
query | calls
-------------------------------------+-------
select * from test where i in (...) | 2
select * from test where i in ($1) | 1
Probably const_merge_threshold = 1 should produce only "i in (...)"?
const_merge_threshold is "the minimal length of an array" (more or equal) or "array .. length is larger than" (not equals)? I think the documentation is ambiguous in this regard.
I also noticed a typo in guc_tables.c: "Sets the minimal numer of constants in an array" -> number
regards, Sergei
On 07.02.23 21:14, Sergei Kornilov wrote:
It seems a little strange to me that with const_merge_threshold = 1, such a test case gives the same result as with const_merge_threshold = 2
What is the point of making this a numeric setting? Either you want to
merge all values or you don't want to merge any values.
On Tue, Feb 07, 2023 at 11:14:52PM +0300, Sergei Kornilov wrote:
Hello!
Thanks for reviewing.
Unfortunately, rebase is needed again due to recent changes in queryjumblefuncs ( 9ba37b2cb6a174b37fc51d0649ef73e56eae27fc )
Yep, my favourite game, rebaseball. Will post a new version soon, after
figuring out all the recent questions.
It seems a little strange to me that with const_merge_threshold = 1, such a test case gives the same result as with const_merge_threshold = 2
select pg_stat_statements_reset();
set const_merge_threshold to 1;
select * from test where i in (1,2,3);
select * from test where i in (1,2);
select * from test where i in (1);
select query, calls from pg_stat_statements order by query;query | calls
-------------------------------------+-------
select * from test where i in (...) | 2
select * from test where i in ($1) | 1Probably const_merge_threshold = 1 should produce only "i in (...)"?
Well, it's not intentional, probably I need to be more careful with
off-by-one. Although I agree to a certain extent with Peter questioning
the value of having numerical option here, let me think about this.
const_merge_threshold is "the minimal length of an array" (more or equal) or "array .. length is larger than" (not equals)? I think the documentation is ambiguous in this regard.
I also noticed a typo in guc_tables.c: "Sets the minimal numer of constants in an array" -> number
Yep, I'll rephrase the documentation.
On Thu, Feb 09, 2023 at 02:30:34PM +0100, Peter Eisentraut wrote:
On 07.02.23 21:14, Sergei Kornilov wrote:It seems a little strange to me that with const_merge_threshold = 1, such a test case gives the same result as with const_merge_threshold = 2
What is the point of making this a numeric setting? Either you want to
merge all values or you don't want to merge any values.
At least in theory the definition of "too many constants" is different
for different use cases and I see allowing to configure it as a way of
reducing the level of surprise here. The main scenario for a numerical
setting would be to distinguish between normal usage with just a handful
of constants (and the user expecting to see them represented in pgss)
and some sort of outliers with thousands of constants in a query (e.g.
as a defence mechanism for the infrastructure working with those
metrics). But I agree that it's not clear how much value is in that.
Not having strong opinion about this I would be fine changing it to a
boolean option (with an actual limit hidden internally) if everyone
agrees it fits better.
On 2023-Feb-09, Dmitry Dolgov wrote:
On Thu, Feb 09, 2023 at 02:30:34PM +0100, Peter Eisentraut wrote:
What is the point of making this a numeric setting? Either you want
to merge all values or you don't want to merge any values.At least in theory the definition of "too many constants" is different
for different use cases and I see allowing to configure it as a way of
reducing the level of surprise here.
I was thinking about this a few days ago and I agree that we don't
necessarily want to make it just a boolean thing; we may want to make it
more complex. One trivial idea is to make it group entries in powers of
10: for 0-9 elements, you get one entry, and 10-99 you get a different
one, and so on:
# group everything in a single bucket
const_merge_threshold = true / yes / on
# group 0-9, 10-99, 100-999, 1000-9999
const_merge_treshold = powers
Ideally the value would be represented somehow in the query text. For
example
query | calls
----------------------------------------------------------+-------
select * from test where i in ({... 0-9 entries ...}) | 2
select * from test where i in ({... 10-99 entries ...}) | 1
What do you think? The jumble would have to know how to reduce all
values within each power-of-ten group to one specific value, but I don't
think that should be particularly difficult.
--
Álvaro Herrera PostgreSQL Developer — https://www.EnterpriseDB.com/
"Find a bug in a program, and fix it, and the program will work today.
Show the program how to find and fix a bug, and the program
will work forever" (Oliver Silfridge)
On Thu, Feb 09, 2023 at 06:26:51PM +0100, Alvaro Herrera wrote:
On 2023-Feb-09, Dmitry Dolgov wrote:On Thu, Feb 09, 2023 at 02:30:34PM +0100, Peter Eisentraut wrote:
What is the point of making this a numeric setting? Either you want
to merge all values or you don't want to merge any values.At least in theory the definition of "too many constants" is different
for different use cases and I see allowing to configure it as a way of
reducing the level of surprise here.I was thinking about this a few days ago and I agree that we don't
necessarily want to make it just a boolean thing; we may want to make it
more complex. One trivial idea is to make it group entries in powers of
10: for 0-9 elements, you get one entry, and 10-99 you get a different
one, and so on:# group everything in a single bucket
const_merge_threshold = true / yes / on# group 0-9, 10-99, 100-999, 1000-9999
const_merge_treshold = powersIdeally the value would be represented somehow in the query text. For
examplequery | calls
----------------------------------------------------------+-------
select * from test where i in ({... 0-9 entries ...}) | 2
select * from test where i in ({... 10-99 entries ...}) | 1What do you think? The jumble would have to know how to reduce all
values within each power-of-ten group to one specific value, but I don't
think that should be particularly difficult.
Yeah, it sounds appealing and conveniently addresses the question of
losing the information about how many constants originally were there.
Not sure if the example above would be the most natural way to represent
it in the query text, but otherwise I'm going to try implementing this.
Stay tuned.
Hi,
On 2/9/23 16:02, Dmitry Dolgov wrote:
Unfortunately, rebase is needed again due to recent changes in queryjumblefuncs ( 9ba37b2cb6a174b37fc51d0649ef73e56eae27fc )
I reviewed the last patch applied to some commit from Feb. 4th.
It seems a little strange to me that with const_merge_threshold = 1, such a test case gives the same result as with const_merge_threshold = 2
select pg_stat_statements_reset();
set const_merge_threshold to 1;
select * from test where i in (1,2,3);
select * from test where i in (1,2);
select * from test where i in (1);
select query, calls from pg_stat_statements order by query;query | calls
-------------------------------------+-------
select * from test where i in (...) | 2
select * from test where i in ($1) | 1Probably const_merge_threshold = 1 should produce only "i in (...)"?
Well, it's not intentional, probably I need to be more careful with
off-by-one. Although I agree to a certain extent with Peter questioning
Please add tests for all the corner cases. At least for (1) IN only
contains a single element and (2) const_merge_threshold = 1.
Beyond that:
- There's a comment about find_const_walker(). I cannot find that
function anywhere. What am I missing?
- What about renaming IsConstList() to IsMergableConstList().
- Don't you intend to use the NUMERIC data column in SELECT * FROM
test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10)?
Otherwise, the test is identical to previous test cases and you're not
checking for what happens with NUMERICs which are wrapped in FuncExpr
because of the implicit coercion.
- Don't we want to extend IsConstList() to allow a list of all
implicitly coerced constants? It's inconsistent that otherwise e.g.
NUMERICs don't work.
- Typo in /* The firsts merged constant */ (first not firsts)
- Prepared statements are not supported as they contain INs with Param
instead of Const nodes. While less likely, I've seen applications that
use prepared statements in conjunction with queries generated through a
UI which ended up with tons of prepared queries with different number of
elements in the IN clause. Not necessarily something that must go into
this patch but maybe worth thinking about.
- The setting name const_merge_threshold is not very telling without
knowing the context. While being a little longer, what about
jumble_const_merge_threshold or queryid_const_merge_threshold or similar?
- Why do we actually only want to merge constants? Why don't we ignore
the type of element in the IN and merge whatever is there? Is this
because the original jumbling logic as of now only has support for
constants?
- Ideally we would even remove duplicates. That would even improve
cardinality estimation but I guess we don't want to spend the cycles on
doing so in the planner?
- Why did you change palloc() to palloc0() for clocations array? The
length is initialized to 0 and FWICS RecordConstLocation() initializes
all members. Seems to me like we don't have to spend these cycles.
- Can't the logic at the end of IsConstList() not be simplified to:
foreach(temp, elements)
if (!IsA(lfirst(temp), Const))
return false;
// All elements are of type Const
*firstConst = linitial_node(Const, elements);
*lastConst = llast_node(Const, elements);
return true;
--
David Geier
(ServiceNow)
On Sat, Feb 11, 2023 at 11:03:36AM +0100, David Geier wrote:
Hi,On 2/9/23 16:02, Dmitry Dolgov wrote:
Unfortunately, rebase is needed again due to recent changes in queryjumblefuncs ( 9ba37b2cb6a174b37fc51d0649ef73e56eae27fc )
I reviewed the last patch applied to some commit from Feb. 4th.
Thanks for looking. Few quick answers about high-level questions below,
the rest I'll incorporate in the new version.
- There's a comment about find_const_walker(). I cannot find that function
anywhere. What am I missing?[...]
- Don't you intend to use the NUMERIC data column in SELECT * FROM
test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10)? Otherwise,
the test is identical to previous test cases and you're not checking for
what happens with NUMERICs which are wrapped in FuncExpr because of the
implicit coercion.- Don't we want to extend IsConstList() to allow a list of all implicitly
coerced constants? It's inconsistent that otherwise e.g. NUMERICs don't
work.[...]
- Prepared statements are not supported as they contain INs with Param
instead of Const nodes. While less likely, I've seen applications that use
prepared statements in conjunction with queries generated through a UI which
ended up with tons of prepared queries with different number of elements in
the IN clause. Not necessarily something that must go into this patch but
maybe worth thinking about.
The original version of the patch was doing all of this, i.e. handling
numerics, Param nodes, RTE_VALUES. The commentary about
find_const_walker in tests is referring to a part of that, that was
dealing with evaluation of expression to see if it could be reduced to a
constant.
Unfortunately there was a significant push back from reviewers because
of those features. That's why I've reduced the patch to it's minimally
useful version, having in mind re-implementing them as follow-up patches
in the future. This is the reason as well why I left tests covering all
this missing functionality -- as breadcrumbs to already discovered
cases, important for the future extensions.
- Why do we actually only want to merge constants? Why don't we ignore the
type of element in the IN and merge whatever is there? Is this because the
original jumbling logic as of now only has support for constants?- Ideally we would even remove duplicates. That would even improve
cardinality estimation but I guess we don't want to spend the cycles on
doing so in the planner?
I believe these points are beyond the patch goals, as it's less clear
(at least to me) if it's safe or desirable to do so.
On Sat, Feb 11, 2023 at 11:47:07AM +0100, Dmitry Dolgov wrote:
The original version of the patch was doing all of this, i.e. handling
numerics, Param nodes, RTE_VALUES. The commentary about
find_const_walker in tests is referring to a part of that, that was
dealing with evaluation of expression to see if it could be reduced to a
constant.Unfortunately there was a significant push back from reviewers because
of those features. That's why I've reduced the patch to it's minimally
useful version, having in mind re-implementing them as follow-up patches
in the future. This is the reason as well why I left tests covering all
this missing functionality -- as breadcrumbs to already discovered
cases, important for the future extensions.
I'd like to elaborate on this a bit and remind about the origins of the
patch, as it's lost somewhere in the beginning of the thread. The idea
is not pulled out of thin air, everything is coming from our attempts to
improve one particular monitoring infrastructure in a real commercial
setting. Every covered use case and test in the original proposal was a
result of field trials, when some application-side library or ORM was
responsible for gigabytes of data in pgss, chocking the monitoring agent.
Hi,
On 2/11/23 13:08, Dmitry Dolgov wrote:
On Sat, Feb 11, 2023 at 11:47:07AM +0100, Dmitry Dolgov wrote:
The original version of the patch was doing all of this, i.e. handling
numerics, Param nodes, RTE_VALUES. The commentary about
find_const_walker in tests is referring to a part of that, that was
dealing with evaluation of expression to see if it could be reduced to a
constant.Unfortunately there was a significant push back from reviewers because
of those features. That's why I've reduced the patch to it's minimally
useful version, having in mind re-implementing them as follow-up patches
in the future. This is the reason as well why I left tests covering all
this missing functionality -- as breadcrumbs to already discovered
cases, important for the future extensions.I'd like to elaborate on this a bit and remind about the origins of the
patch, as it's lost somewhere in the beginning of the thread. The idea
is not pulled out of thin air, everything is coming from our attempts to
improve one particular monitoring infrastructure in a real commercial
setting. Every covered use case and test in the original proposal was a
result of field trials, when some application-side library or ORM was
responsible for gigabytes of data in pgss, chocking the monitoring agent.
Thanks for the clarifications. I didn't mean to contend the usefulness
of the patch and I wasn't aware that you already jumped through the
loops of handling Param, etc. Seems like supporting only constants is a
good starting point. The only thing that is likely confusing for users
is that NUMERICs (and potentially constants of other types) are
unsupported. Wouldn't it be fairly simple to support them via something
like the following?
is_const(element) || (is_coercion(element) && is_const(element->child))
--
David Geier
(ServiceNow)
On Wed, Feb 15, 2023 at 08:51:56AM +0100, David Geier wrote:
Hi,On 2/11/23 13:08, Dmitry Dolgov wrote:
On Sat, Feb 11, 2023 at 11:47:07AM +0100, Dmitry Dolgov wrote:
The original version of the patch was doing all of this, i.e. handling
numerics, Param nodes, RTE_VALUES. The commentary about
find_const_walker in tests is referring to a part of that, that was
dealing with evaluation of expression to see if it could be reduced to a
constant.Unfortunately there was a significant push back from reviewers because
of those features. That's why I've reduced the patch to it's minimally
useful version, having in mind re-implementing them as follow-up patches
in the future. This is the reason as well why I left tests covering all
this missing functionality -- as breadcrumbs to already discovered
cases, important for the future extensions.I'd like to elaborate on this a bit and remind about the origins of the
patch, as it's lost somewhere in the beginning of the thread. The idea
is not pulled out of thin air, everything is coming from our attempts to
improve one particular monitoring infrastructure in a real commercial
setting. Every covered use case and test in the original proposal was a
result of field trials, when some application-side library or ORM was
responsible for gigabytes of data in pgss, chocking the monitoring agent.Thanks for the clarifications. I didn't mean to contend the usefulness of
the patch and I wasn't aware that you already jumped through the loops of
handling Param, etc.
No worries, I just wanted to emphasize that we've already collected
quite some number of use cases.
Seems like supporting only constants is a good starting
point. The only thing that is likely confusing for users is that NUMERICs
(and potentially constants of other types) are unsupported. Wouldn't it be
fairly simple to support them via something like the following?��� is_const(element) || (is_coercion(element) && is_const(element->child))
It definitely makes sense to implement that, although I don't think it's
going to be acceptable to do that via directly listing conditions an
element has to satisfy. It probably has to be more flexible, sice we
would like to extend it in the future. My plan is to address this in a
follow-up patch, when the main mechanism is approved. Would you agree
with this approach?
On Thu, Feb 09, 2023 at 08:43:29PM +0100, Dmitry Dolgov wrote:
On Thu, Feb 09, 2023 at 06:26:51PM +0100, Alvaro Herrera wrote:
On 2023-Feb-09, Dmitry Dolgov wrote:On Thu, Feb 09, 2023 at 02:30:34PM +0100, Peter Eisentraut wrote:
What is the point of making this a numeric setting? Either you want
to merge all values or you don't want to merge any values.At least in theory the definition of "too many constants" is different
for different use cases and I see allowing to configure it as a way of
reducing the level of surprise here.I was thinking about this a few days ago and I agree that we don't
necessarily want to make it just a boolean thing; we may want to make it
more complex. One trivial idea is to make it group entries in powers of
10: for 0-9 elements, you get one entry, and 10-99 you get a different
one, and so on:# group everything in a single bucket
const_merge_threshold = true / yes / on# group 0-9, 10-99, 100-999, 1000-9999
const_merge_treshold = powersIdeally the value would be represented somehow in the query text. For
examplequery | calls
----------------------------------------------------------+-------
select * from test where i in ({... 0-9 entries ...}) | 2
select * from test where i in ({... 10-99 entries ...}) | 1What do you think? The jumble would have to know how to reduce all
values within each power-of-ten group to one specific value, but I don't
think that should be particularly difficult.Yeah, it sounds appealing and conveniently addresses the question of
losing the information about how many constants originally were there.
Not sure if the example above would be the most natural way to represent
it in the query text, but otherwise I'm going to try implementing this.
Stay tuned.
It took me couple of evenings, here is what I've got:
* The representation is not that far away from your proposal, I've
settled on:
SELECT * FROM test_merge WHERE id IN (... [10-99 entries])
* To not reinvent the wheel, I've reused decimalLenght function from
numutils, hence one more patch to make it available to reuse.
* This approach resolves my concerns about letting people tuning
the behaviour of merging, as now it's possible to distinguish between
calls with different number of constants up to the power of 10. So
I've decided to simplify the configuration and make the guc boolean to
turn it off or on.
* To separate queries with constants falling into different ranges
(10-99, 100-999, etc), the order of magnitude is added into the jumble
hash.
* I've incorporated feedback from Sergei and David, as well as tried to
make comments and documentation more clear.
Any feedback is welcomed, thanks!
Attachments:
v13-0001-Reusable-decimalLength-functions.patchtext/x-diff; charset=us-asciiDownload
From dbb4eab9f3efbcee2326278be6f70ff52685b2b0 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Fri, 17 Feb 2023 10:17:55 +0100
Subject: [PATCH v13 1/2] Reusable decimalLength functions
Move out decimalLength functions to reuse in the following patch.
---
src/backend/utils/adt/numutils.c | 50 +-----------------------
src/include/utils/numutils.h | 67 ++++++++++++++++++++++++++++++++
2 files changed, 68 insertions(+), 49 deletions(-)
create mode 100644 src/include/utils/numutils.h
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index 471fbb7ee6..df7418cce7 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -18,9 +18,8 @@
#include <limits.h>
#include <ctype.h>
-#include "common/int.h"
#include "utils/builtins.h"
-#include "port/pg_bitutils.h"
+#include "utils/numutils.h"
/*
* A table of all two-digit numbers. This is used to speed up decimal digit
@@ -38,53 +37,6 @@ static const char DIGIT_TABLE[200] =
"80" "81" "82" "83" "84" "85" "86" "87" "88" "89"
"90" "91" "92" "93" "94" "95" "96" "97" "98" "99";
-/*
- * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
- */
-static inline int
-decimalLength32(const uint32 v)
-{
- int t;
- static const uint32 PowersOfTen[] = {
- 1, 10, 100,
- 1000, 10000, 100000,
- 1000000, 10000000, 100000000,
- 1000000000
- };
-
- /*
- * Compute base-10 logarithm by dividing the base-2 logarithm by a
- * good-enough approximation of the base-2 logarithm of 10
- */
- t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
- return t + (v >= PowersOfTen[t]);
-}
-
-static inline int
-decimalLength64(const uint64 v)
-{
- int t;
- static const uint64 PowersOfTen[] = {
- UINT64CONST(1), UINT64CONST(10),
- UINT64CONST(100), UINT64CONST(1000),
- UINT64CONST(10000), UINT64CONST(100000),
- UINT64CONST(1000000), UINT64CONST(10000000),
- UINT64CONST(100000000), UINT64CONST(1000000000),
- UINT64CONST(10000000000), UINT64CONST(100000000000),
- UINT64CONST(1000000000000), UINT64CONST(10000000000000),
- UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
- UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
- UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
- };
-
- /*
- * Compute base-10 logarithm by dividing the base-2 logarithm by a
- * good-enough approximation of the base-2 logarithm of 10
- */
- t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
- return t + (v >= PowersOfTen[t]);
-}
-
static const int8 hexlookup[128] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
diff --git a/src/include/utils/numutils.h b/src/include/utils/numutils.h
new file mode 100644
index 0000000000..876e64f2df
--- /dev/null
+++ b/src/include/utils/numutils.h
@@ -0,0 +1,67 @@
+/*-------------------------------------------------------------------------
+ *
+ * numutils.h
+ * Decimal length functions for numutils.c
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/utils/numutils.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef NUMUTILS_H
+#define NUMUTILS_H
+
+#include "common/int.h"
+#include "port/pg_bitutils.h"
+
+/*
+ * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
+ */
+static inline int
+decimalLength32(const uint32 v)
+{
+ int t;
+ static const uint32 PowersOfTen[] = {
+ 1, 10, 100,
+ 1000, 10000, 100000,
+ 1000000, 10000000, 100000000,
+ 1000000000
+ };
+
+ /*
+ * Compute base-10 logarithm by dividing the base-2 logarithm by a
+ * good-enough approximation of the base-2 logarithm of 10
+ */
+ t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
+ return t + (v >= PowersOfTen[t]);
+}
+
+static inline int
+decimalLength64(const uint64 v)
+{
+ int t;
+ static const uint64 PowersOfTen[] = {
+ UINT64CONST(1), UINT64CONST(10),
+ UINT64CONST(100), UINT64CONST(1000),
+ UINT64CONST(10000), UINT64CONST(100000),
+ UINT64CONST(1000000), UINT64CONST(10000000),
+ UINT64CONST(100000000), UINT64CONST(1000000000),
+ UINT64CONST(10000000000), UINT64CONST(100000000000),
+ UINT64CONST(1000000000000), UINT64CONST(10000000000000),
+ UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
+ UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
+ UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
+ };
+
+ /*
+ * Compute base-10 logarithm by dividing the base-2 logarithm by a
+ * good-enough approximation of the base-2 logarithm of 10
+ */
+ t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
+ return t + (v >= PowersOfTen[t]);
+}
+
+#endif /* NUMUTILS_H */
--
2.32.0
v13-0002-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/x-diff; charset=us-asciiDownload
From 717595218d9668b5ea5bd5d88052959786148d7d Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Sun, 24 Jul 2022 11:43:25 +0200
Subject: [PATCH v13 2/2] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on the number of parameters, because every element of
ArrayExpr is jumbled. In certain situations it's undesirable, especially
if the list becomes too large.
Make an array of Const expressions contribute only the number of
elements (up to power of 10) to the jumble hash. Allow to enable this
behavior via the new GUC query_id_const_merge with the default value off.
Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane,
Michael Paquier, Sergei Kornilov, Alvaro Herrera, David Geier
Tested-by: Chengxi Sun
---
.../expected/pg_stat_statements.out | 227 ++++++++++++++++++
.../pg_stat_statements/pg_stat_statements.c | 46 +++-
.../sql/pg_stat_statements.sql | 68 ++++++
doc/src/sgml/config.sgml | 29 +++
doc/src/sgml/pgstatstatements.sgml | 27 ++-
src/backend/nodes/gen_node_support.pl | 2 +-
src/backend/nodes/queryjumblefuncs.c | 111 ++++++++-
src/backend/utils/misc/guc_tables.c | 10 +
src/backend/utils/misc/postgresql.conf.sample | 2 +-
src/include/nodes/primnodes.h | 2 +
src/include/nodes/queryjumble.h | 12 +-
11 files changed, 517 insertions(+), 19 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out
index 8c0b2235e8..f32e8cc5f0 100644
--- a/contrib/pg_stat_statements/expected/pg_stat_statements.out
+++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out
@@ -1156,4 +1156,231 @@ SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%';
2
(1 row)
+--
+-- Const merging functionality
+--
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(5 rows)
+
+-- Normal scenario, too many simple constants for an IN query
+SET query_id_const_merge = on;
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(4 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(6 rows)
+
+-- Second order of magnitude, brace yourself
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [100-999 entries]) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- No unmerged constants
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) and data = $10 | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) and data = $3 | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(4 rows)
+
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (... [10-99 entries]) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- Test constants evaluation
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result
+--------
+(0 rows)
+
+RESET query_id_const_merge;
DROP EXTENSION pg_stat_statements;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index ad1fe44496..ece56e99b2 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2666,6 +2666,10 @@ generate_normalized_query(JumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool skip = false; /* Signals that certain constants are
+ merged together and have to be skipped */
+ int magnitude; /* Order of magnitute for number merged constants */
+
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -2689,7 +2693,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
{
int off, /* Offset from start for cur tok */
tok_len; /* Length (in bytes) of that tok */
-
off = jstate->clocations[i].location;
/* Adjust recorded location if we're dealing with partial string */
off -= query_loc;
@@ -2704,12 +2707,43 @@ generate_normalized_query(JumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ /* Normal path, non merged constant */
+ magnitude = jstate->clocations[i].magnitude;
+ if (magnitude == 0)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id);
+
+ /* In case previous constants were merged away, stop doing that */
+ if (skip)
+ skip = false;
+ }
+ /* The firsts merged constant */
+ else if (!skip)
+ {
+ static const uint32 powers_of_ten[] = {
+ 1, 10, 100,
+ 1000, 10000, 100000,
+ 1000000, 10000000, 100000000,
+ 1000000000
+ };
+ int lower_merged = powers_of_ten[magnitude - 1];
+ int upper_merged = powers_of_ten[magnitude];
+
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* Skip the following until a non merged constant appear */
+ skip = true;
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "... [%d-%d entries]",
+ lower_merged, upper_merged - 1);
+ }
+ /* Otherwise the constant is merged away */
quer_loc = off + tok_len;
last_off = off;
diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
index cebde7392b..dc4b3e355b 100644
--- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql
+++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
@@ -473,4 +473,72 @@ SELECT (
SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%';
+--
+-- Const merging functionality
+--
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal scenario, too many simple constants for an IN query
+SET query_id_const_merge = on;
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Second order of magnitude, brace yourself
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- No unmerged constants
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset();
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test constants evaluation
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+RESET query_id_const_merge;
+
DROP EXTENSION pg_stat_statements;
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index ecd9aa73ef..5b15863ac7 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8278,6 +8278,35 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
</listitem>
</varlistentry>
+ <varlistentry id="guc-query-id-const-merge" xreflabel="query_id_const_merge">
+ <term><varname>query_id_const_merge</varname> (<type>bool</type>)
+ <indexterm>
+ <primary><varname>query_id_const_merge</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Specifies how arrays of constants (e.g. for an "IN" clause) contribute
+ to the query identifier computation. Normally every element of an array
+ contributes to a query identifier, which means effectively the same
+ query could get multiple different identifiers, depending of size of the
+ array.
+
+ If this parameter is on, two queries with an array will get the same
+ query identifier if the only difference between them is the number of
+ constants, both numbers is of the same order of magnitude and greater or
+ equal 10 (so the order of magnitude is greather than 1, it is not worth
+ the efforts otherwise).
+
+ The parameter could be used to reduce amount of repeating data stored
+ via <xref linkend="pgstatstatements"/> extension. The default value is off.
+
+ The <xref linkend="pgstatstatements"/> extension will represent such
+ queries in form <literal>'(... [10-99 entries])'</literal>.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
</sect2>
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index efc36da602..84dedb870e 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -519,10 +519,29 @@
<para>
In some cases, queries with visibly different texts might get merged into a
single <structname>pg_stat_statements</structname> entry. Normally this will happen
- only for semantically equivalent queries, but there is a small chance of
- hash collisions causing unrelated queries to be merged into one entry.
- (This cannot happen for queries belonging to different users or databases,
- however.)
+ only for semantically equivalent queries, for example when queries are
+ different only in values of constants they use. Another valid possibility for
+ merging queries into a single <structname>pg_stat_statements</structname>
+ entry is when <xref linkend="guc-query-id-const-merge"/> is enabled and the
+ queries contain an array of constants of similar size:
+
+<screen>
+=# SET query_id_const_merge = on;
+=# SELECT pg_stat_statements_reset();
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
+=# SELECT query, calls FROM pg_stat_statements;
+-[ RECORD 1 ]------------------------------
+query | SELECT * FROM test WHERE a IN (... [10-99 entries])
+calls | 2
+-[ RECORD 2 ]------------------------------
+query | SELECT pg_stat_statements_reset()
+calls | 1
+</screen>
+
+ But there is a small chance of hash collisions causing unrelated queries to
+ be merged into one entry. (This cannot happen for queries belonging to
+ different users or databases, however.)
</para>
<para>
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index ecbcadb8bf..93aed18c2e 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -1308,7 +1308,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
# Track the node's location only if directly requested.
if ($query_jumble_location)
{
- print $jff "\tJUMBLE_LOCATION($f);\n"
+ print $jff "\tJUMBLE_LOCATION($f, false);\n"
unless $query_jumble_ignore;
}
}
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index d7fd72d70f..b2508cb0bd 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -37,17 +37,26 @@
#include "nodes/queryjumble.h"
#include "parser/scansup.h"
+#include "utils/numutils.h"
+
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
+#define QUERY_ID_CONST_MERGE_THRESHOLD 10 /* when to start merging constants,
+ * purely to avoid unnecessary work */
+
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+/* Whether to merge constants in a list when computing query_id */
+bool query_id_const_merge = false;
+
/* True when compute_query_id is ON, or AUTO and a module requests them */
bool query_id_enabled = false;
static void AppendJumble(JumbleState *jstate,
const unsigned char *item, Size size);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void RecordConstLocation(JumbleState *jstate,
+ int location, int magnitude);
static void _jumbleNode(JumbleState *jstate, Node *node);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
static void _jumbleList(JumbleState *jstate, Node *node);
@@ -186,11 +195,18 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
}
/*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked.
+ *
+ * Magnitude argument larger than zero signals that the constant represents the
+ * first or the last element in a series of merged constants, and everything
+ * but such first/last element will contribute nothing to the jumble hash. In
+ * this case magnitute value describes order of magnitute for the number of
+ * elements in the series (i.e. how many digits it has), to represent the fact
+ * of merging later on.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, int magnitude)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -206,15 +222,73 @@ RecordConstLocation(JumbleState *jstate, int location)
}
jstate->clocations[jstate->clocations_count].location = location;
/* initialize lengths to -1 to simplify third-party module usage */
+ jstate->clocations[jstate->clocations_count].magnitude = magnitude;
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
}
}
+/*
+ * Verify if the provided list contains could be merged down, which means it
+ * contains only constant expressions and its length is greater than or equal
+ * to CONST_MERGE_THRESHOLD.
+ *
+ * Return value is an order of magnitude for size of the list (to use for
+ * representation purposes later on) if merging is possible, otherwise zero.
+ *
+ * Note that this function searches only for Const directly and do not tries to
+ * simplify expressions.
+ */
+static int
+IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
+{
+ ListCell *temp;
+ Node *firstExpr = NULL;
+
+ if (elements == NULL)
+ return 0;
+
+ if (!query_id_const_merge)
+ {
+ /* Merging is disabled, process everything one by one */
+ return 0;
+ }
+
+ if (elements->length < QUERY_ID_CONST_MERGE_THRESHOLD)
+ {
+ /* It is not worth it to consider small lists for merging */
+ return 0;
+ }
+
+ firstExpr = linitial(elements);
+
+ /*
+ * If the first expression is a constant, verify if the following elements
+ * are constants as well. If yes, the list is eligible for merging, and the
+ * order of magnitude need to be calculated.
+ */
+ if (IsA(firstExpr, Const))
+ {
+ foreach(temp, elements)
+ if (!IsA(lfirst(temp), Const))
+ return 0;
+
+ *firstConst = (Const *) firstExpr;
+ *lastConst = llast_node(Const, elements);
+ return decimalLength32(elements->length);
+ }
+
+ /*
+ * If we end up here, it means no constants merging is possible, process
+ * the list as usual.
+ */
+ return 0;
+}
+
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
-#define JUMBLE_LOCATION(location) \
- RecordConstLocation(jstate, expr->location)
+#define JUMBLE_LOCATION(location, magnitude) \
+ RecordConstLocation(jstate, expr->location, magnitude)
#define JUMBLE_FIELD(item) \
AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
#define JUMBLE_FIELD_SINGLE(item) \
@@ -227,6 +301,31 @@ do { \
#include "queryjumblefuncs.funcs.c"
+static void
+_jumbleArrayExpr(JumbleState *jstate, Node *node)
+{
+ ArrayExpr *expr = (ArrayExpr *) node;
+ Const *first, *last;
+ int magnitude = IsMergeableConstList(expr->elements, &first, &last);
+
+ if (magnitude)
+ {
+ RecordConstLocation(jstate, first->location, magnitude);
+ RecordConstLocation(jstate, last->location, magnitude);
+
+ /*
+ * After merging constants down we end up with only two constants, the
+ * first and the last one. To distinguish the order of magnitute behind
+ * merged constants, add its value into the jumble.
+ */
+ JUMBLE_FIELD_SINGLE(magnitude);
+ }
+ else
+ {
+ JUMBLE_NODE(elements);
+ }
+}
+
static void
_jumbleNode(JumbleState *jstate, Node *node)
{
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 43b9d92660..11147e72fc 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -1972,6 +1972,16 @@ struct config_bool ConfigureNamesBool[] =
NULL, NULL, NULL
},
+ {
+ {"query_id_const_merge", PGC_SUSET, STATS_MONITORING,
+ gettext_noop("Sets whether an array of constants will contribute to"
+ " query identified computation."),
+ },
+ &query_id_const_merge,
+ false,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index d06074b86f..57b83b296d 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -627,7 +627,7 @@
#log_parser_stats = off
#log_planner_stats = off
#log_executor_stats = off
-
+#query_id_const_merge = off
#------------------------------------------------------------------------------
# AUTOVACUUM
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 1be1642d92..d79b663825 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1291,6 +1291,8 @@ typedef struct CaseTestExpr
*/
typedef struct ArrayExpr
{
+ pg_node_attr(custom_query_jumble)
+
Expr xpr;
/* type of expression result */
Oid array_typeid pg_node_attr(query_jumble_ignore);
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 204b8f74fd..001694aee9 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -15,6 +15,7 @@
#define QUERYJUBLE_H
#include "nodes/parsenodes.h"
+#include "nodes/nodeFuncs.h"
/*
* Struct for tracking locations/lengths of constants during normalization
@@ -23,6 +24,15 @@ typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+
+ /*
+ * The constant may represent the beginning or the end of a merged
+ * constants interval. In this case the magnitude value contains how many
+ * constants were merged away (to a power of 10), in other words order of
+ * manitude for number of merged constants. Otherwise the value is 0,
+ * indicating that no merging is involved.
+ */
+ int magnitude;
} LocationLen;
/*
@@ -61,7 +71,7 @@ enum ComputeQueryIdType
/* GUC parameters */
extern PGDLLIMPORT int compute_query_id;
-
+extern PGDLLIMPORT bool query_id_const_merge;
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query, const char *querytext);
--
2.32.0
Hi,
Seems like supporting only constants is a good starting
point. The only thing that is likely confusing for users is that NUMERICs
(and potentially constants of other types) are unsupported. Wouldn't it be
fairly simple to support them via something like the following?is_const(element) || (is_coercion(element) && is_const(element->child))
It definitely makes sense to implement that, although I don't think it's
going to be acceptable to do that via directly listing conditions an
element has to satisfy. It probably has to be more flexible, sice we
would like to extend it in the future. My plan is to address this in a
follow-up patch, when the main mechanism is approved. Would you agree
with this approach?
I still think it's counterintuitive and I'm pretty sure people would
even report this as a bug because not knowing about the difference in
internal representation you would expect NUMERICs to work the same way
other constants work. If anything we would have to document it.
Can't we do something pragmatic and have something like
IsMergableInElement() which for now only supports constants and later
can be extended? Or what exactly do you mean by "more flexible"?
--
David Geier
(ServiceNow)
On Thu, Feb 23, 2023 at 09:48:35AM +0100, David Geier wrote:
Hi,Seems like supporting only constants is a good starting
point. The only thing that is likely confusing for users is that NUMERICs
(and potentially constants of other types) are unsupported. Wouldn't it be
fairly simple to support them via something like the following?��� is_const(element) || (is_coercion(element) && is_const(element->child))
It definitely makes sense to implement that, although I don't think it's
going to be acceptable to do that via directly listing conditions an
element has to satisfy. It probably has to be more flexible, sice we
would like to extend it in the future. My plan is to address this in a
follow-up patch, when the main mechanism is approved. Would you agree
with this approach?I still think it's counterintuitive and I'm pretty sure people would even
report this as a bug because not knowing about the difference in internal
representation you would expect NUMERICs to work the same way other
constants work. If anything we would have to document it.Can't we do something pragmatic and have something like
IsMergableInElement() which for now only supports constants and later can be
extended? Or what exactly do you mean by "more flexible"?
Here is how I see it (pls correct me if I'm wrong at any point). To
support numerics as presented in the tests from this patch, we have to
deal with FuncExpr (the function converting a value into a numeric).
Having in mind only numerics, we would need to filter out any other
FuncExpr (which already sounds dubious). Then we need to validate for
example that the function is immutable and have constant arguments,
which is already implemented in evaluate_function and is a part of
eval_const_expression. There is nothing special about numerics at this
point, and this approach leads us back to eval_const_expression to a
certain degree. Do you see any other way?
I'm thinking about Michael idea in this context, and want to see if it
would be possible to make the mechanism more flexible using some node
attributes. But I see it only as a follow-up step, not a prerequisite.
So I was seeing that this patch needs a rebase according to cfbot.
However it looks like the review feedback you're looking for is more
of design questions. What jumbling is best to include in the feature
set and which is best to add in later patches. It sounds like you've
gotten conflicting feedback from initial reviews.
It does sound like the patch is pretty mature and you're actively
responding to feedback so if you got more authoritative feedback it
might even be committable now. It's already been two years of being
rolled forward so it would be a shame to keep rolling it forward.
Or is there some fatal problem that you're trying to work around and
still haven't found the magic combination that convinces any
committers this is something we want? In which case perhaps we set
this patch returned? I don't get that impression myself though.
On Tue, Mar 14, 2023 at 02:14:17PM -0400, Gregory Stark (as CFM) wrote:
So I was seeing that this patch needs a rebase according to cfbot.
Yeah, folks are getting up to speed in with pgss improvements recently.
Thanks for letting me know.
However it looks like the review feedback you're looking for is more
of design questions. What jumbling is best to include in the feature
set and which is best to add in later patches. It sounds like you've
gotten conflicting feedback from initial reviews.It does sound like the patch is pretty mature and you're actively
responding to feedback so if you got more authoritative feedback it
might even be committable now. It's already been two years of being
rolled forward so it would be a shame to keep rolling it forward.
You got it about right. There is a balance to strike between
implementation, that would cover more useful cases, but has more
dependencies (something like possibility of having multiple query id),
and more minimalistic implementation that would actually be acceptable
in the way it is now. But I haven't heard back from David about it, so I
assume everybody is fine with the minimalistic approach.
Or is there some fatal problem that you're trying to work around and
still haven't found the magic combination that convinces any
committers this is something we want? In which case perhaps we set
this patch returned? I don't get that impression myself though.
Nothing like this on my side, although I'm not good at conjuring
committing powers of the nature.
On Tue, Mar 14, 2023 at 08:04:32PM +0100, Dmitry Dolgov wrote:
On Tue, Mar 14, 2023 at 02:14:17PM -0400, Gregory Stark (as CFM) wrote:
So I was seeing that this patch needs a rebase according to cfbot.Yeah, folks are getting up to speed in with pgss improvements recently.
Thanks for letting me know.
Following recent refactoring of pg_stat_statements tests, I've created a
new one for merging functionality in the patch. This should solve any
conflicts.
Attachments:
v14-0001-Reusable-decimalLength-functions.patchtext/x-diff; charset=us-asciiDownload
From 4f8ad43c1f0547913e5da8fa97fe08bccf06c91d Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Fri, 17 Feb 2023 10:17:55 +0100
Subject: [PATCH v14 1/2] Reusable decimalLength functions
Move out decimalLength functions to reuse in the following patch.
---
src/backend/utils/adt/numutils.c | 50 +-----------------------
src/include/utils/numutils.h | 67 ++++++++++++++++++++++++++++++++
2 files changed, 68 insertions(+), 49 deletions(-)
create mode 100644 src/include/utils/numutils.h
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index 471fbb7ee6..df7418cce7 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -18,9 +18,8 @@
#include <limits.h>
#include <ctype.h>
-#include "common/int.h"
#include "utils/builtins.h"
-#include "port/pg_bitutils.h"
+#include "utils/numutils.h"
/*
* A table of all two-digit numbers. This is used to speed up decimal digit
@@ -38,53 +37,6 @@ static const char DIGIT_TABLE[200] =
"80" "81" "82" "83" "84" "85" "86" "87" "88" "89"
"90" "91" "92" "93" "94" "95" "96" "97" "98" "99";
-/*
- * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
- */
-static inline int
-decimalLength32(const uint32 v)
-{
- int t;
- static const uint32 PowersOfTen[] = {
- 1, 10, 100,
- 1000, 10000, 100000,
- 1000000, 10000000, 100000000,
- 1000000000
- };
-
- /*
- * Compute base-10 logarithm by dividing the base-2 logarithm by a
- * good-enough approximation of the base-2 logarithm of 10
- */
- t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
- return t + (v >= PowersOfTen[t]);
-}
-
-static inline int
-decimalLength64(const uint64 v)
-{
- int t;
- static const uint64 PowersOfTen[] = {
- UINT64CONST(1), UINT64CONST(10),
- UINT64CONST(100), UINT64CONST(1000),
- UINT64CONST(10000), UINT64CONST(100000),
- UINT64CONST(1000000), UINT64CONST(10000000),
- UINT64CONST(100000000), UINT64CONST(1000000000),
- UINT64CONST(10000000000), UINT64CONST(100000000000),
- UINT64CONST(1000000000000), UINT64CONST(10000000000000),
- UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
- UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
- UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
- };
-
- /*
- * Compute base-10 logarithm by dividing the base-2 logarithm by a
- * good-enough approximation of the base-2 logarithm of 10
- */
- t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
- return t + (v >= PowersOfTen[t]);
-}
-
static const int8 hexlookup[128] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
diff --git a/src/include/utils/numutils.h b/src/include/utils/numutils.h
new file mode 100644
index 0000000000..876e64f2df
--- /dev/null
+++ b/src/include/utils/numutils.h
@@ -0,0 +1,67 @@
+/*-------------------------------------------------------------------------
+ *
+ * numutils.h
+ * Decimal length functions for numutils.c
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/utils/numutils.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef NUMUTILS_H
+#define NUMUTILS_H
+
+#include "common/int.h"
+#include "port/pg_bitutils.h"
+
+/*
+ * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
+ */
+static inline int
+decimalLength32(const uint32 v)
+{
+ int t;
+ static const uint32 PowersOfTen[] = {
+ 1, 10, 100,
+ 1000, 10000, 100000,
+ 1000000, 10000000, 100000000,
+ 1000000000
+ };
+
+ /*
+ * Compute base-10 logarithm by dividing the base-2 logarithm by a
+ * good-enough approximation of the base-2 logarithm of 10
+ */
+ t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
+ return t + (v >= PowersOfTen[t]);
+}
+
+static inline int
+decimalLength64(const uint64 v)
+{
+ int t;
+ static const uint64 PowersOfTen[] = {
+ UINT64CONST(1), UINT64CONST(10),
+ UINT64CONST(100), UINT64CONST(1000),
+ UINT64CONST(10000), UINT64CONST(100000),
+ UINT64CONST(1000000), UINT64CONST(10000000),
+ UINT64CONST(100000000), UINT64CONST(1000000000),
+ UINT64CONST(10000000000), UINT64CONST(100000000000),
+ UINT64CONST(1000000000000), UINT64CONST(10000000000000),
+ UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
+ UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
+ UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
+ };
+
+ /*
+ * Compute base-10 logarithm by dividing the base-2 logarithm by a
+ * good-enough approximation of the base-2 logarithm of 10
+ */
+ t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
+ return t + (v >= PowersOfTen[t]);
+}
+
+#endif /* NUMUTILS_H */
--
2.32.0
v14-0002-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/x-diff; charset=us-asciiDownload
From 27e4f29ac1052163cf9bbcf0a0c96a1b227d7d6c Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Sun, 19 Mar 2023 13:01:07 +0100
Subject: [PATCH v14 2/2] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on the number of parameters, because every element of
ArrayExpr is jumbled. In certain situations it's undesirable, especially
if the list becomes too large.
Make an array of Const expressions contribute only the number of
elements (up to power of 10) to the jumble hash. Allow to enable this
behavior via the new GUC query_id_const_merge with the default value off.
Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane,
Michael Paquier, Sergei Kornilov, Alvaro Herrera, David Geier
Tested-by: Chengxi Sun
---
contrib/pg_stat_statements/Makefile | 2 +-
.../pg_stat_statements/expected/merging.out | 221 ++++++++++++++++++
contrib/pg_stat_statements/meson.build | 1 +
.../pg_stat_statements/pg_stat_statements.c | 46 +++-
contrib/pg_stat_statements/sql/merging.sql | 69 ++++++
doc/src/sgml/config.sgml | 29 +++
doc/src/sgml/pgstatstatements.sgml | 27 ++-
src/backend/nodes/gen_node_support.pl | 2 +-
src/backend/nodes/queryjumblefuncs.c | 111 ++++++++-
src/backend/utils/misc/guc_tables.c | 10 +
src/backend/utils/misc/postgresql.conf.sample | 2 +-
src/include/nodes/primnodes.h | 2 +
src/include/nodes/queryjumble.h | 12 +-
13 files changed, 514 insertions(+), 20 deletions(-)
create mode 100644 contrib/pg_stat_statements/expected/merging.out
create mode 100644 contrib/pg_stat_statements/sql/merging.sql
diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile
index 5578a9dd4e..2e30324334 100644
--- a/contrib/pg_stat_statements/Makefile
+++ b/contrib/pg_stat_statements/Makefile
@@ -18,7 +18,7 @@ LDFLAGS_SL += $(filter -lm, $(LIBS))
REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_statements/pg_stat_statements.conf
REGRESS = select dml cursors utility level_tracking planning \
- user_activity wal cleanup oldextversions
+ user_activity wal cleanup oldextversions merging
# Disabled because these tests require "shared_preload_libraries=pg_stat_statements",
# which typical installcheck users do not have (e.g. buildfarm clients).
NO_INSTALLCHECK = 1
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
new file mode 100644
index 0000000000..069cd4709d
--- /dev/null
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -0,0 +1,221 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) | 1
+ SELECT pg_stat_statements_reset() | 1
+(4 rows)
+
+-- Normal scenario, too many simple constants for an IN query
+SET query_id_const_merge = on;
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT pg_stat_statements_reset() | 1
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(6 rows)
+
+-- Second order of magnitude, brace yourself
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+--------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [100-999 entries]) | 1
+ SELECT pg_stat_statements_reset() | 1
+(2 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+(2 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- No unmerged constants
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 1
+ SELECT pg_stat_statements_reset() | 1
+(2 rows)
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) and data = $10 | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) and data = $3 | 2
+ SELECT pg_stat_statements_reset() | 1
+(3 rows)
+
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+--------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (... [10-99 entries]) | 1
+ SELECT pg_stat_statements_reset() | 1
+(2 rows)
+
+-- Test constants evaluation
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result
+--------
+(0 rows)
+
+RESET query_id_const_merge;
diff --git a/contrib/pg_stat_statements/meson.build b/contrib/pg_stat_statements/meson.build
index 3e3062ada9..3c43c9f283 100644
--- a/contrib/pg_stat_statements/meson.build
+++ b/contrib/pg_stat_statements/meson.build
@@ -50,6 +50,7 @@ tests += {
'wal',
'cleanup',
'oldextversions',
+ 'merging',
],
'regress_args': ['--temp-config', files('pg_stat_statements.conf')],
# Disabled because these tests require
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 5285c3f7fa..70920a7853 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2668,6 +2668,10 @@ generate_normalized_query(JumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool skip = false; /* Signals that certain constants are
+ merged together and have to be skipped */
+ int magnitude; /* Order of magnitute for number merged constants */
+
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -2691,7 +2695,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
{
int off, /* Offset from start for cur tok */
tok_len; /* Length (in bytes) of that tok */
-
off = jstate->clocations[i].location;
/* Adjust recorded location if we're dealing with partial string */
off -= query_loc;
@@ -2706,12 +2709,43 @@ generate_normalized_query(JumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ /* Normal path, non merged constant */
+ magnitude = jstate->clocations[i].magnitude;
+ if (magnitude == 0)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id);
+
+ /* In case previous constants were merged away, stop doing that */
+ if (skip)
+ skip = false;
+ }
+ /* The firsts merged constant */
+ else if (!skip)
+ {
+ static const uint32 powers_of_ten[] = {
+ 1, 10, 100,
+ 1000, 10000, 100000,
+ 1000000, 10000000, 100000000,
+ 1000000000
+ };
+ int lower_merged = powers_of_ten[magnitude - 1];
+ int upper_merged = powers_of_ten[magnitude];
+
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* Skip the following until a non merged constant appear */
+ skip = true;
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "... [%d-%d entries]",
+ lower_merged, upper_merged - 1);
+ }
+ /* Otherwise the constant is merged away */
quer_loc = off + tok_len;
last_off = off;
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
new file mode 100644
index 0000000000..0b431bf9a2
--- /dev/null
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -0,0 +1,69 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal scenario, too many simple constants for an IN query
+SET query_id_const_merge = on;
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Second order of magnitude, brace yourself
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- No unmerged constants
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset();
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test constants evaluation
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+RESET query_id_const_merge;
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index e5c41cc6c6..25e4897648 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8278,6 +8278,35 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
</listitem>
</varlistentry>
+ <varlistentry id="guc-query-id-const-merge" xreflabel="query_id_const_merge">
+ <term><varname>query_id_const_merge</varname> (<type>bool</type>)
+ <indexterm>
+ <primary><varname>query_id_const_merge</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Specifies how arrays of constants (e.g. for an "IN" clause) contribute
+ to the query identifier computation. Normally every element of an array
+ contributes to a query identifier, which means effectively the same
+ query could get multiple different identifiers, depending of size of the
+ array.
+
+ If this parameter is on, two queries with an array will get the same
+ query identifier if the only difference between them is the number of
+ constants, both numbers is of the same order of magnitude and greater or
+ equal 10 (so the order of magnitude is greather than 1, it is not worth
+ the efforts otherwise).
+
+ The parameter could be used to reduce amount of repeating data stored
+ via <xref linkend="pgstatstatements"/> extension. The default value is off.
+
+ The <xref linkend="pgstatstatements"/> extension will represent such
+ queries in form <literal>'(... [10-99 entries])'</literal>.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
</sect2>
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index b1214ee645..4ec8ba2c4f 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -529,10 +529,29 @@
<para>
In some cases, queries with visibly different texts might get merged into a
single <structname>pg_stat_statements</structname> entry. Normally this will happen
- only for semantically equivalent queries, but there is a small chance of
- hash collisions causing unrelated queries to be merged into one entry.
- (This cannot happen for queries belonging to different users or databases,
- however.)
+ only for semantically equivalent queries, for example when queries are
+ different only in values of constants they use. Another valid possibility for
+ merging queries into a single <structname>pg_stat_statements</structname>
+ entry is when <xref linkend="guc-query-id-const-merge"/> is enabled and the
+ queries contain an array of constants of similar size:
+
+<screen>
+=# SET query_id_const_merge = on;
+=# SELECT pg_stat_statements_reset();
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
+=# SELECT query, calls FROM pg_stat_statements;
+-[ RECORD 1 ]------------------------------
+query | SELECT * FROM test WHERE a IN (... [10-99 entries])
+calls | 2
+-[ RECORD 2 ]------------------------------
+query | SELECT pg_stat_statements_reset()
+calls | 1
+</screen>
+
+ But there is a small chance of hash collisions causing unrelated queries to
+ be merged into one entry. (This cannot happen for queries belonging to
+ different users or databases, however.)
</para>
<para>
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index ecbcadb8bf..93aed18c2e 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -1308,7 +1308,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
# Track the node's location only if directly requested.
if ($query_jumble_location)
{
- print $jff "\tJUMBLE_LOCATION($f);\n"
+ print $jff "\tJUMBLE_LOCATION($f, false);\n"
unless $query_jumble_ignore;
}
}
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index d7fd72d70f..b2508cb0bd 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -37,17 +37,26 @@
#include "nodes/queryjumble.h"
#include "parser/scansup.h"
+#include "utils/numutils.h"
+
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
+#define QUERY_ID_CONST_MERGE_THRESHOLD 10 /* when to start merging constants,
+ * purely to avoid unnecessary work */
+
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+/* Whether to merge constants in a list when computing query_id */
+bool query_id_const_merge = false;
+
/* True when compute_query_id is ON, or AUTO and a module requests them */
bool query_id_enabled = false;
static void AppendJumble(JumbleState *jstate,
const unsigned char *item, Size size);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void RecordConstLocation(JumbleState *jstate,
+ int location, int magnitude);
static void _jumbleNode(JumbleState *jstate, Node *node);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
static void _jumbleList(JumbleState *jstate, Node *node);
@@ -186,11 +195,18 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
}
/*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked.
+ *
+ * Magnitude argument larger than zero signals that the constant represents the
+ * first or the last element in a series of merged constants, and everything
+ * but such first/last element will contribute nothing to the jumble hash. In
+ * this case magnitute value describes order of magnitute for the number of
+ * elements in the series (i.e. how many digits it has), to represent the fact
+ * of merging later on.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, int magnitude)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -206,15 +222,73 @@ RecordConstLocation(JumbleState *jstate, int location)
}
jstate->clocations[jstate->clocations_count].location = location;
/* initialize lengths to -1 to simplify third-party module usage */
+ jstate->clocations[jstate->clocations_count].magnitude = magnitude;
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
}
}
+/*
+ * Verify if the provided list contains could be merged down, which means it
+ * contains only constant expressions and its length is greater than or equal
+ * to CONST_MERGE_THRESHOLD.
+ *
+ * Return value is an order of magnitude for size of the list (to use for
+ * representation purposes later on) if merging is possible, otherwise zero.
+ *
+ * Note that this function searches only for Const directly and do not tries to
+ * simplify expressions.
+ */
+static int
+IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
+{
+ ListCell *temp;
+ Node *firstExpr = NULL;
+
+ if (elements == NULL)
+ return 0;
+
+ if (!query_id_const_merge)
+ {
+ /* Merging is disabled, process everything one by one */
+ return 0;
+ }
+
+ if (elements->length < QUERY_ID_CONST_MERGE_THRESHOLD)
+ {
+ /* It is not worth it to consider small lists for merging */
+ return 0;
+ }
+
+ firstExpr = linitial(elements);
+
+ /*
+ * If the first expression is a constant, verify if the following elements
+ * are constants as well. If yes, the list is eligible for merging, and the
+ * order of magnitude need to be calculated.
+ */
+ if (IsA(firstExpr, Const))
+ {
+ foreach(temp, elements)
+ if (!IsA(lfirst(temp), Const))
+ return 0;
+
+ *firstConst = (Const *) firstExpr;
+ *lastConst = llast_node(Const, elements);
+ return decimalLength32(elements->length);
+ }
+
+ /*
+ * If we end up here, it means no constants merging is possible, process
+ * the list as usual.
+ */
+ return 0;
+}
+
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
-#define JUMBLE_LOCATION(location) \
- RecordConstLocation(jstate, expr->location)
+#define JUMBLE_LOCATION(location, magnitude) \
+ RecordConstLocation(jstate, expr->location, magnitude)
#define JUMBLE_FIELD(item) \
AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
#define JUMBLE_FIELD_SINGLE(item) \
@@ -227,6 +301,31 @@ do { \
#include "queryjumblefuncs.funcs.c"
+static void
+_jumbleArrayExpr(JumbleState *jstate, Node *node)
+{
+ ArrayExpr *expr = (ArrayExpr *) node;
+ Const *first, *last;
+ int magnitude = IsMergeableConstList(expr->elements, &first, &last);
+
+ if (magnitude)
+ {
+ RecordConstLocation(jstate, first->location, magnitude);
+ RecordConstLocation(jstate, last->location, magnitude);
+
+ /*
+ * After merging constants down we end up with only two constants, the
+ * first and the last one. To distinguish the order of magnitute behind
+ * merged constants, add its value into the jumble.
+ */
+ JUMBLE_FIELD_SINGLE(magnitude);
+ }
+ else
+ {
+ JUMBLE_NODE(elements);
+ }
+}
+
static void
_jumbleNode(JumbleState *jstate, Node *node)
{
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 1c0583fe26..64bbd737e7 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -1973,6 +1973,16 @@ struct config_bool ConfigureNamesBool[] =
NULL, NULL, NULL
},
+ {
+ {"query_id_const_merge", PGC_SUSET, STATS_MONITORING,
+ gettext_noop("Sets whether an array of constants will contribute to"
+ " query identified computation."),
+ },
+ &query_id_const_merge,
+ false,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index d06074b86f..57b83b296d 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -627,7 +627,7 @@
#log_parser_stats = off
#log_planner_stats = off
#log_executor_stats = off
-
+#query_id_const_merge = off
#------------------------------------------------------------------------------
# AUTOVACUUM
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 8fb5b4b919..f5ef01ce4d 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1294,6 +1294,8 @@ typedef struct CaseTestExpr
*/
typedef struct ArrayExpr
{
+ pg_node_attr(custom_query_jumble)
+
Expr xpr;
/* type of expression result */
Oid array_typeid pg_node_attr(query_jumble_ignore);
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 204b8f74fd..001694aee9 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -15,6 +15,7 @@
#define QUERYJUBLE_H
#include "nodes/parsenodes.h"
+#include "nodes/nodeFuncs.h"
/*
* Struct for tracking locations/lengths of constants during normalization
@@ -23,6 +24,15 @@ typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+
+ /*
+ * The constant may represent the beginning or the end of a merged
+ * constants interval. In this case the magnitude value contains how many
+ * constants were merged away (to a power of 10), in other words order of
+ * manitude for number of merged constants. Otherwise the value is 0,
+ * indicating that no merging is involved.
+ */
+ int magnitude;
} LocationLen;
/*
@@ -61,7 +71,7 @@ enum ComputeQueryIdType
/* GUC parameters */
extern PGDLLIMPORT int compute_query_id;
-
+extern PGDLLIMPORT bool query_id_const_merge;
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query, const char *querytext);
--
2.32.0
On Sun, Mar 19, 2023 at 01:27:34PM +0100, Dmitry Dolgov wrote:
+ If this parameter is on, two queries with an array will get the same + query identifier if the only difference between them is the number of + constants, both numbers is of the same order of magnitude and greater or + equal 10 (so the order of magnitude is greather than 1, it is not worth + the efforts otherwise).
IMHO this adds way too much complexity to something that most users would
expect to be an on/off switch. If I understand �lvaro's suggestion [0]/messages/by-id/20230209172651.cfgrebpyyr72h7fv@alvherre.pgsql
correctly, he's saying that in addition to allowing "on" and "off", it
might be worth allowing something like "powers" to yield roughly the
behavior described above. I don't think he's suggesting that this "powers"
behavior should be the only available option. Also, it seems
counterintuitive that queries with fewer than 10 constants are not merged.
In the interest of moving this patch forward, I would suggest making it a
simple on/off switch in 0002 and moving the "powers" functionality to a new
0003 patch. I think separating out the core part of this feature might
help reviewers. As you can see, I got distracted by the complicated
threshold logic and ended up focusing my first round of review there.
[0]: /messages/by-id/20230209172651.cfgrebpyyr72h7fv@alvherre.pgsql
--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com
On Mon, Jul 03, 2023 at 09:46:11PM -0700, Nathan Bossart wrote:
Thanks for reviewing.
On Sun, Mar 19, 2023 at 01:27:34PM +0100, Dmitry Dolgov wrote:
+ If this parameter is on, two queries with an array will get the same + query identifier if the only difference between them is the number of + constants, both numbers is of the same order of magnitude and greater or + equal 10 (so the order of magnitude is greather than 1, it is not worth + the efforts otherwise).IMHO this adds way too much complexity to something that most users would
expect to be an on/off switch.
This documentation is exclusively to be precise about how does it work.
Users don't have to worry about all this, and pretty much turn it
on/off, as you've described. I agree though, I could probably write this
text a bit differently.
If I understand �lvaro's suggestion [0] correctly, he's saying that in
addition to allowing "on" and "off", it might be worth allowing
something like "powers" to yield roughly the behavior described above.
I don't think he's suggesting that this "powers" behavior should be
the only available option.
Independently of what �lvaro was suggesting, I find the "powers"
approach more suitable, because it answers my own concerns about the
previous implementation. Having "on"/"off" values means we would have to
scratch heads coming up with a one-size-fit-all default value, or to
introduce another option for the actual cut-off threshold. I would like
to avoid both of those options, that's why I went with "powers" only.
Also, it seems counterintuitive that queries with fewer than 10
constants are not merged.
Why? What would be your intuition using this feature?
In the interest of moving this patch forward, I would suggest making it a
simple on/off switch in 0002 and moving the "powers" functionality to a new
0003 patch. I think separating out the core part of this feature might
help reviewers. As you can see, I got distracted by the complicated
threshold logic and ended up focusing my first round of review there.
I would disagree. As I've described above, to me "powers" seems to be a
better fit, and the complicated logic is in fact reusing one already
existing function. Do those arguments sound convincing to you?
The following review has been posted through the commitfest application:
make installcheck-world: tested, passed
Implements feature: tested, passed
Spec compliant: not tested
Documentation: tested, passed
I've tested the patched on 17devel/master and it is my feeling - especially given the proliferation of the ORMs - that we need such thing in pgss. Thread already took almost 3 years, so it would be pity to waste so much development time of yours. Cfbot is green, and patch works very well for me. IMVHO commitfest status should be even set to ready-for-comitter.
Given the:
SET query_id_const_merge = on;
SELECT pg_stat_statements_reset();
SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11);
SELECT * FROM test WHERE a IN (1, 2, 3);
SELECT * FROM test WHERE a = ALL('{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}');
SELECT * FROM test WHERE a = ANY (ARRAY[11,10,9,8,7,6,5,4,3,2,1]);
The patch results in:
q | calls
-----------------------------------------------------+-------
SELECT * FROM test WHERE a = ALL($1) | 1
SELECT pg_stat_statements_reset() | 1
SELECT * FROM test WHERE a IN ($1, $2, $3) | 1
SELECT * FROM test WHERE a IN (... [10-99 entries]) | 2
Of course it's pity it doesn't collapse the below ones:
SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10), (11)) AS t (num);
INSERT INTO dummy VALUES(1, 'text 1'),(2, 'text 2'),(3, 'text 3'),(4, 'text 3'),(5, 'text 3'),(6, 'text 3'),(7, 'text 3'),(8, 'text 3'),(9, 'text 3'),(10, 'text 3') ON CONFLICT (id) DO NOTHING;
PREPARE s3(int[], int[], int[], int[], int[], int[], int[], int[], int[], int[], int[]) AS SELECT * FROM test WHERE
a = ANY ($1::int[]) OR
a = ANY ($2::int[]) OR
[..]
a = ANY ($11::int[]) ;
but given the convoluted thread history, it's understandable and as you stated - maybe in future.
There's one additional benefit to this patch: the pg_hint_plan extension seems to borrow pgss's generate_normalized_query(). So if that's changed in next major release, the pg_hint_plan hint table (transparent plan rewrite using table) will automatically benefit from generalization of the query string here (imagine fixing plans for ORM that generate N {1,1024} number of IN() array elements; today that would be N number of entries in the "hint_plan.hints" table).
The new status of this patch is: Needs review
I've also tried the patch and I see the same results as Jakub, which
make sense to me. I did have issues getting it to apply, though: `git
am` complains about a conflict, though patch itself was able to apply
it.
Hi, this is my first email to the pgsql hackers.
I came across this email thread while looking at
https://github.com/rails/rails/pull/49388 for Ruby on Rails one of the
popular web application framework by replacing every query `in` clause
with `any` to reduce similar entries in `pg_stat_statements`.
I want this to be solved on the PostgreSQL side, mainly because I want
to avoid replacing
every in clause with any to reduce similar entries in pg_stat_statements.
It would be nice to have this patch reviewed.
As I'm not familiar with C and PostgreSQL source code, I'm not
reviewing this patch myself,
I applied this patch to my local PostgreSQL and the Active Record unit
tests ran successfully.
--
Yasuo Honda
On Tue, Jul 04, 2023 at 09:02:56PM +0200, Dmitry Dolgov wrote:
On Mon, Jul 03, 2023 at 09:46:11PM -0700, Nathan Bossart wrote:
IMHO this adds way too much complexity to something that most users would
expect to be an on/off switch.This documentation is exclusively to be precise about how does it work.
Users don't have to worry about all this, and pretty much turn it
on/off, as you've described. I agree though, I could probably write this
text a bit differently.
FWIW, I am going to side with Nathan on this one, but not completely
either. I was looking at the patch and it brings too much complexity
for a monitoring feature in this code path. In my experience, I've
seen people complain about IN/ANY never strimmed down to a single
parameter in pg_stat_statements but I still have to hear from somebody
outside this thread that they'd like to reduce an IN clause depending
on the number of items, or something else.
If I understand Álvaro's suggestion [0] correctly, he's saying that in
addition to allowing "on" and "off", it might be worth allowing
something like "powers" to yield roughly the behavior described above.
I don't think he's suggesting that this "powers" behavior should be
the only available option.Independently of what Álvaro was suggesting, I find the "powers"
approach more suitable, because it answers my own concerns about the
previous implementation. Having "on"/"off" values means we would have to
scratch heads coming up with a one-size-fit-all default value, or to
introduce another option for the actual cut-off threshold. I would like
to avoid both of those options, that's why I went with "powers" only.
Now, it doesn't mean that this approach with the "powers" will never
happen, but based on the set of opinions I am gathering on this thread
I would suggest to rework the patch as follows:
- First implement an on/off switch that reduces the lists in IN and/or
ANY to one parameter. Simply.
- Second, refactor the powers routine.
- Third, extend the on/off switch, or just implement a threshold with
a second switch.
When it comes to my opinion, I am not seeing any objections to the
feature as a whole, and I'm OK with the first step. I'm also OK to
keep the door open for more improvements in controlling how these
IN/ANY lists show up, but there could be more than just the number of
items as parameter (say the query size, different behaviors depending
on the number of clauses in queries, subquery context or CTEs/WITH,
etc. just to name a few things coming in mind).
--
Michael
On Fri, Oct 13, 2023 at 05:07:00PM +0900, Michael Paquier wrote:
Now, it doesn't mean that this approach with the "powers" will never
happen, but based on the set of opinions I am gathering on this thread
I would suggest to rework the patch as follows:
- First implement an on/off switch that reduces the lists in IN and/or
ANY to one parameter. Simply.
- Second, refactor the powers routine.
- Third, extend the on/off switch, or just implement a threshold with
a second switch.
Well, if it will help move this patch forward, why not. To clarify, I'm
going to split the current implementation into three patches, one for
each point you've mentioned.
When it comes to my opinion, I am not seeing any objections to the
feature as a whole, and I'm OK with the first step. I'm also OK to
keep the door open for more improvements in controlling how these
IN/ANY lists show up, but there could be more than just the number of
items as parameter (say the query size, different behaviors depending
on the number of clauses in queries, subquery context or CTEs/WITH,
etc. just to name a few things coming in mind).
Interesting point, but now it's my turn to have troubles imagining the
case, where list representation could be controlled depending on
something else than the number of elements in it. Do you have any
specific example in mind?
On Tue, Jul 04, 2023 at 09:02:56PM +0200, Dmitry Dolgov wrote:
On Mon, Jul 03, 2023 at 09:46:11PM -0700, Nathan Bossart wrote:
Also, it seems counterintuitive that queries with fewer than 10
constants are not merged.Why? What would be your intuition using this feature?
For the "powers" setting, I would've expected queries with 0-9 constants to
be merged. Then 10-99, 100-999, 1000-9999, etc. I suppose there might be
an argument for separating 0 from 1-9, too.
--
Nathan Bossart
Amazon Web Services: https://aws.amazon.com
On Fri, Oct 13, 2023 at 03:35:19PM +0200, Dmitry Dolgov wrote:
On Fri, Oct 13, 2023 at 05:07:00PM +0900, Michael Paquier wrote:
Now, it doesn't mean that this approach with the "powers" will never
happen, but based on the set of opinions I am gathering on this thread
I would suggest to rework the patch as follows:
- First implement an on/off switch that reduces the lists in IN and/or
ANY to one parameter. Simply.
- Second, refactor the powers routine.
- Third, extend the on/off switch, or just implement a threshold with
a second switch.Well, if it will help move this patch forward, why not. To clarify, I'm
going to split the current implementation into three patches, one for
each point you've mentioned.
Here is what I had mind. The first patch implements the basic notion of
merging, and I guess everyone agrees on its usefulness. The second and
third implement merging into groups power of 10, which I still find
useful as well. The last one adds a lower threshold for merging on top
of that. My intentions are to get the first one in, ideally I would love
to see the second and third applied as well.
When it comes to my opinion, I am not seeing any objections to the
feature as a whole, and I'm OK with the first step. I'm also OK to
keep the door open for more improvements in controlling how these
IN/ANY lists show up, but there could be more than just the number of
items as parameter (say the query size, different behaviors depending
on the number of clauses in queries, subquery context or CTEs/WITH,
etc. just to name a few things coming in mind).Interesting point, but now it's my turn to have troubles imagining the
case, where list representation could be controlled depending on
something else than the number of elements in it. Do you have any
specific example in mind?
In the current patch version I didn't add anything yet to address the
question of having more parameters to tune constants merging. The main
obstacle as I see it is that the information for that has to be
collected when jumbling various query nodes. Anything except information
about the ArrayExpr itself would have to be acquired when jumbling some
other parts of the query, not directly related to the ArrayExpr. It
seems to me this interdependency between otherwise unrelated nodes
outweigh the value it brings, and would require some more elaborate (and
more invasive for the purpose of this patch) mechanism to implement.
Attachments:
v15-0003-Merge-constants-in-ArrayExpr-into-groups.patchtext/x-diff; charset=us-asciiDownload
From 4ad001ab9866467c90d5b1ddc664bfe1b6b87ee4 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Sun, 15 Oct 2023 10:06:09 +0200
Subject: [PATCH v15 3/4] Merge constants in ArrayExpr into groups
Using query_id_const_merge only first/last element in an ArrayExpr will
be used to compute query id. Extend this to take into account number of
elements, and merge constants into groups based on it. Resulting groups
are powers of 10, i.e. 1 to 9, 10 to 99, etc.
---
.../pg_stat_statements/expected/merging.out | 84 +++++++++++++++----
.../pg_stat_statements/pg_stat_statements.c | 17 +++-
contrib/pg_stat_statements/sql/merging.sql | 13 +++
doc/src/sgml/config.sgml | 9 +-
doc/src/sgml/pgstatstatements.sgml | 2 +-
src/backend/nodes/queryjumblefuncs.c | 52 ++++++++----
src/include/nodes/queryjumble.h | 7 +-
7 files changed, 142 insertions(+), 42 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
index 7711572e0b7..1bb75a93045 100644
--- a/contrib/pg_stat_statements/expected/merging.out
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -54,11 +54,11 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3);
(0 rows)
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
- query | calls
---------------------------------------------+-------
- SELECT * FROM test_merge WHERE id IN ($1) | 1
- SELECT * FROM test_merge WHERE id IN (...) | 1
- SELECT pg_stat_statements_reset() | 1
+ query | calls
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 1
+ SELECT pg_stat_statements_reset() | 1
(3 rows)
SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
@@ -80,7 +80,60 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
query | calls
------------------------------------------------------------------------+-------
SELECT * FROM test_merge WHERE id IN ($1) | 1
- SELECT * FROM test_merge WHERE id IN (...) | 4
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 2
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(5 rows)
+
+-- Second order of magnitude, brace yourself
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+--------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [100-999 entries]) | 1
+ SELECT pg_stat_statements_reset() | 1
+(2 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 1
+ SELECT pg_stat_statements_reset() | 1
+(2 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 1
SELECT pg_stat_statements_reset() | 1
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
(4 rows)
@@ -108,11 +161,12 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and dat
(0 rows)
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
- query | calls
-----------------------------------------------------------+-------
- SELECT * FROM test_merge WHERE id IN (...) and data = $3 | 3
- SELECT pg_stat_statements_reset() | 1
-(2 rows)
+ query | calls
+--------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) and data = $3 | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) and data = $3 | 2
+ SELECT pg_stat_statements_reset() | 1
+(3 rows)
-- No constants simplification
SELECT pg_stat_statements_reset();
@@ -147,10 +201,10 @@ SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
(0 rows)
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
- query | calls
-----------------------------------------------------+-------
- SELECT * FROM test_merge_numeric WHERE id IN (...) | 1
- SELECT pg_stat_statements_reset() | 1
+ query | calls
+--------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (... [10-99 entries]) | 1
+ SELECT pg_stat_statements_reset() | 1
(2 rows)
-- Test constants evaluation, verifies a tricky part to make sure there are no
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 0b03009a053..24d7401257d 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2697,6 +2697,8 @@ generate_normalized_query(JumbleState *jstate, const char *query,
last_tok_len = 0; /* Length (in bytes) of that tok */
bool skip = false; /* Signals that certain constants are
merged together and have to be skipped */
+ int magnitude; /* Order of magnitute for number of merged
+ constants */
/*
@@ -2737,7 +2739,8 @@ generate_normalized_query(JumbleState *jstate, const char *query,
Assert(len_to_wrt >= 0);
/* Normal path, non merged constant */
- if (!jstate->clocations[i].merged)
+ magnitude = jstate->clocations[i].magnitude;
+ if (magnitude == 0)
{
memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
n_quer_loc += len_to_wrt;
@@ -2753,12 +2756,22 @@ generate_normalized_query(JumbleState *jstate, const char *query,
/* The firsts merged constant */
else if (!skip)
{
+ static const uint32 powers_of_ten[] = {
+ 1, 10, 100,
+ 1000, 10000, 100000,
+ 1000000, 10000000, 100000000,
+ 1000000000
+ };
+ int lower_merged = powers_of_ten[magnitude - 1];
+ int upper_merged = powers_of_ten[magnitude];
+
memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
n_quer_loc += len_to_wrt;
/* Skip the following until a non merged constant appear */
skip = true;
- n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "... [%d-%d entries]",
+ lower_merged, upper_merged - 1);
}
/* Otherwise the constant is merged away */
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
index 02266a92ce8..75960159a1d 100644
--- a/contrib/pg_stat_statements/sql/merging.sql
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -27,6 +27,19 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+-- Second order of magnitude, brace yourself
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
-- More conditions in the query
SELECT pg_stat_statements_reset();
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 55b0795bce1..4fd997284c9 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8205,15 +8205,16 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
with an array of different lenght.
If this parameter is on, an array of constants will contribute only the
- first and the last elements to the query identifier. It means two
- occurences of the same query, where the only difference is number of
- constants in the array, are going to get the same query identifier.
+ first element, the last element and the number of elements to the query
+ identifier. It means two occurences of the same query, where the only
+ difference is number of constants in the array, are going to get the
+ same query identifier if the arrays are of similar length.
The parameter could be used to reduce amount of repeating data stored
via <xref linkend="pgstatstatements"/> extension. The default value is off.
The <xref linkend="pgstatstatements"/> extension will represent such
- queries in form <literal>'(...)'</literal>.
+ queries in form <literal>'(... [10-99 entries])'</literal>.
</para>
</listitem>
</varlistentry>
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 8df4064cbf4..1f6e6a0e76b 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -559,7 +559,7 @@
=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
=# SELECT query, calls FROM pg_stat_statements;
-[ RECORD 1 ]------------------------------
-query | SELECT * FROM test WHERE a IN (...)
+query | SELECT * FROM test WHERE a IN (... [10-99 entries])
calls | 2
-[ RECORD 2 ]------------------------------
query | SELECT pg_stat_statements_reset()
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index 52d45c2b093..b198fe2579e 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -37,6 +37,8 @@
#include "nodes/queryjumble.h"
#include "parser/scansup.h"
+#include "utils/numutils.h"
+
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
/* GUC parameters */
@@ -51,7 +53,7 @@ bool query_id_enabled = false;
static void AppendJumble(JumbleState *jstate,
const unsigned char *item, Size size);
static void RecordConstLocation(JumbleState *jstate,
- int location, bool merged);
+ int location, int magnitude);
static void _jumbleNode(JumbleState *jstate, Node *node);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
static void _jumbleList(JumbleState *jstate, Node *node);
@@ -193,12 +195,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
* Record location of constant within query string of query tree that is
* currently being walked.
*
- * Merged argument signals that the constant represents the first or the last
- * element in a series of merged constants, and everything but the first/last
- * element contributes nothing to the jumble hash.
+ * Magnitude argument larger than zero signals that the constant represents the
+ * first or the last element in a series of merged constants, and everything
+ * but such first/last element will contribute nothing to the jumble hash. The
+ * magnitute value specifies order of magnitute (i.e. how many digits it has)
+ * for the number of elements in the series, to represent the fact of merging
+ * later on.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location, bool merged)
+RecordConstLocation(JumbleState *jstate, int location, int magnitude)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -214,7 +219,7 @@ RecordConstLocation(JumbleState *jstate, int location, bool merged)
}
jstate->clocations[jstate->clocations_count].location = location;
/* initialize lengths to -1 to simplify third-party module usage */
- jstate->clocations[jstate->clocations_count].merged = merged;
+ jstate->clocations[jstate->clocations_count].magnitude = magnitude;
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
}
@@ -224,24 +229,26 @@ RecordConstLocation(JumbleState *jstate, int location, bool merged)
* Verify if the provided list contains could be merged down, which means it
* contains only constant expressions.
*
- * Return value indicates if merging is possible.
+ * Return value is the order of magnitude (i.e. how many digits it has) for
+ * length of the list (to use for representation purposes later on) if merging
+ * is possible, otherwise zero.
*
* Note that this function searches only for explicit Const nodes and does not
* try to simplify expressions.
*/
-static bool
+static int
IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
{
ListCell *temp;
Node *firstExpr = NULL;
if (elements == NULL)
- return false;
+ return 0;
if (!query_id_const_merge)
{
/* Merging is disabled, process everything one by one */
- return false;
+ return 0;
}
firstExpr = linitial(elements);
@@ -255,24 +262,24 @@ IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
{
foreach(temp, elements)
if (!IsA(lfirst(temp), Const))
- return false;
+ return 0;
*firstConst = (Const *) firstExpr;
*lastConst = llast_node(Const, elements);
- return true;
+ return decimalLength32(elements->length);
}
/*
* If we end up here, it means no constants merging is possible, process
* the list as usual.
*/
- return false;
+ return 0;
}
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
-#define JUMBLE_LOCATION(location, merged) \
- RecordConstLocation(jstate, expr->location, merged)
+#define JUMBLE_LOCATION(location, magnitude) \
+ RecordConstLocation(jstate, expr->location, magnitude)
#define JUMBLE_FIELD(item) \
AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
#define JUMBLE_FIELD_SINGLE(item) \
@@ -290,10 +297,19 @@ _jumbleArrayExpr(JumbleState *jstate, Node *node)
{
ArrayExpr *expr = (ArrayExpr *) node;
Const *first, *last;
- if(IsMergeableConstList(expr->elements, &first, &last))
+ int magnitude = IsMergeableConstList(expr->elements, &first, &last);
+
+ if (magnitude)
{
- RecordConstLocation(jstate, first->location, true);
- RecordConstLocation(jstate, last->location, true);
+ RecordConstLocation(jstate, first->location, magnitude);
+ RecordConstLocation(jstate, last->location, magnitude);
+
+ /*
+ * After merging constants down we end up with only two constants, the
+ * first and the last one. To distinguish the order of magnitute behind
+ * merged constants, add its value into the jumble.
+ */
+ JUMBLE_FIELD_SINGLE(magnitude);
}
else
{
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 225957d7b3c..cfa99efc14e 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -27,9 +27,12 @@ typedef struct LocationLen
/*
* Indicates the constant represents the beginning or the end of a merged
- * constants interval.
+ * constants interval. The value shows how many constants were merged away
+ * (up to a power of 10), or in other words the order of manitude for
+ * number of merged constants (i.e. how many digits it has). Otherwise the
+ * value is 0, indicating that no merging was performed.
*/
- bool merged;
+ int magnitude;
} LocationLen;
/*
--
2.41.0
v15-0004-Add-query_id_const_merge_threshold.patchtext/x-diff; charset=us-asciiDownload
From e119d74ddd5cd5550246213fc66cd75cae606571 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Mon, 16 Oct 2023 16:52:27 +0200
Subject: [PATCH v15 4/4] Add query_id_const_merge_threshold
Extend query_id_const_merge to allow merging only if the number of
elements is larger than specified value, which could be configured using
new GUC query_id_const_merge_threshold.
---
.../pg_stat_statements/expected/merging.out | 64 +++++++++++++++++++
contrib/pg_stat_statements/sql/merging.sql | 17 +++++
doc/src/sgml/config.sgml | 15 +++++
doc/src/sgml/pgstatstatements.sgml | 6 ++
src/backend/nodes/queryjumblefuncs.c | 12 +++-
src/backend/utils/misc/guc_tables.c | 11 ++++
src/backend/utils/misc/postgresql.conf.sample | 1 +
src/include/nodes/queryjumble.h | 1 +
8 files changed, 126 insertions(+), 1 deletion(-)
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
index 1bb75a93045..81811b2db0d 100644
--- a/contrib/pg_stat_statements/expected/merging.out
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -218,4 +218,68 @@ FROM cte;
--------
(0 rows)
+-- With the threshold
+SET query_id_const_merge_threshold = 10;
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 2
+ SELECT pg_stat_statements_reset() | 1
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+(2 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
RESET query_id_const_merge;
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
index 75960159a1d..1dc0fef9984 100644
--- a/contrib/pg_stat_statements/sql/merging.sql
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -68,4 +68,21 @@ WITH cte AS (
SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
FROM cte;
+-- With the threshold
+SET query_id_const_merge_threshold = 10;
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
RESET query_id_const_merge;
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 4fd997284c9..c31d4806c1c 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8219,6 +8219,21 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
</listitem>
</varlistentry>
+ <varlistentry id="guc-query-id-const-merge-threshold" xreflabel="query_id_const_merge_threshold">
+ <term><varname>query_id_const_merge_threshold</varname> (<type>integer</type>)
+ <indexterm>
+ <primary><varname>query_id_const_merge_threshold</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ If <xref linkend="guc-query-id-const-merge"/> parameter is enabled,
+ specifies the minimal number of element an array must have to apply
+ constants merge.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
</sect2>
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 1f6e6a0e76b..eb00c1ecd2a 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -566,6 +566,12 @@ query | SELECT pg_stat_statements_reset()
calls | 1
</screen>
+ Such constants merging could be configured apply only starting from certain
+ number of constants in the array. The threshold could be specified using
+ <xref linkend="guc-query-id-const-merge"/>.
+ </para>
+
+ <para>
But there is a small chance of hash collisions causing unrelated queries to
be merged into one entry. (This cannot happen for queries belonging to
different users or databases, however.)
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index b198fe2579e..5d3acf1115c 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -47,6 +47,9 @@ int compute_query_id = COMPUTE_QUERY_ID_AUTO;
/* Whether to merge constants in a list when computing query_id */
bool query_id_const_merge = false;
+/* Lower threshold for the list length to merge constants when computing query_id */
+bool query_id_const_merge_threshold = 1;
+
/* True when compute_query_id is ON, or AUTO and a module requests them */
bool query_id_enabled = false;
@@ -227,7 +230,8 @@ RecordConstLocation(JumbleState *jstate, int location, int magnitude)
/*
* Verify if the provided list contains could be merged down, which means it
- * contains only constant expressions.
+ * contains only constant expressions and the list contains more than
+ * query_id_const_merge_threshold elements.
*
* Return value is the order of magnitude (i.e. how many digits it has) for
* length of the list (to use for representation purposes later on) if merging
@@ -251,6 +255,12 @@ IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
return 0;
}
+ if (elements->length < query_id_const_merge_threshold)
+ {
+ /* The list is not large enough */
+ return 0;
+ }
+
firstExpr = linitial(elements);
/*
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index fbb50adb9f9..ee239ae7302 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -3518,6 +3518,17 @@ struct config_int ConfigureNamesInt[] =
NULL, NULL, NULL
},
+ {
+ {"query_id_const_merge_threshold", PGC_SUSET, STATS_MONITORING,
+ gettext_noop("Sets lower threshold for an array length to apply"
+ " constants merging when computing query identifier."),
+ gettext_noop("Not used if query_id_const_merge is disabled"),
+ },
+ &query_id_const_merge_threshold,
+ 1, 1, INT_MAX,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 3397780dc72..6cc30e116c4 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -629,6 +629,7 @@
#log_planner_stats = off
#log_executor_stats = off
#query_id_const_merge = off
+#query_id_const_merge_threshold = 1
#------------------------------------------------------------------------------
# AUTOVACUUM
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index cfa99efc14e..8f823c3c491 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -72,6 +72,7 @@ enum ComputeQueryIdType
/* GUC parameters */
extern PGDLLIMPORT int compute_query_id;
extern PGDLLIMPORT bool query_id_const_merge;
+extern PGDLLIMPORT bool query_id_const_merge_threshold;
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query);
--
2.41.0
v15-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/x-diff; charset=us-asciiDownload
From 4367571d3d886a14690ba31ad0163d0d309a52a3 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Sat, 14 Oct 2023 15:00:48 +0200
Subject: [PATCH v15 1/4] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on the number of parameters, because every element of
ArrayExpr is jumbled. In certain situations it's undesirable, especially
if the list becomes too large.
Make an array of Const expressions contribute only the first/last
elements to the jumble hash. Allow to enable this behavior via the new
GUC query_id_const_merge with the default value off.
Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane,
Michael Paquier, Sergei Kornilov, Alvaro Herrera, David Geier
Tested-by: Chengxi Sun
---
contrib/pg_stat_statements/Makefile | 2 +-
.../pg_stat_statements/expected/merging.out | 167 ++++++++++++++++++
contrib/pg_stat_statements/meson.build | 1 +
.../pg_stat_statements/pg_stat_statements.c | 34 +++-
contrib/pg_stat_statements/sql/merging.sql | 58 ++++++
doc/src/sgml/config.sgml | 28 +++
doc/src/sgml/pgstatstatements.sgml | 25 ++-
src/backend/nodes/gen_node_support.pl | 2 +-
src/backend/nodes/queryjumblefuncs.c | 86 ++++++++-
src/backend/utils/misc/guc_tables.c | 10 ++
src/backend/utils/misc/postgresql.conf.sample | 2 +-
src/include/nodes/primnodes.h | 2 +
src/include/nodes/queryjumble.h | 9 +-
13 files changed, 406 insertions(+), 20 deletions(-)
create mode 100644 contrib/pg_stat_statements/expected/merging.out
create mode 100644 contrib/pg_stat_statements/sql/merging.sql
diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile
index eba4a95d91a..af731fc9a58 100644
--- a/contrib/pg_stat_statements/Makefile
+++ b/contrib/pg_stat_statements/Makefile
@@ -19,7 +19,7 @@ LDFLAGS_SL += $(filter -lm, $(LIBS))
REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_statements/pg_stat_statements.conf
REGRESS = select dml cursors utility level_tracking planning \
- user_activity wal cleanup oldextversions
+ user_activity wal cleanup oldextversions merging
# Disabled because these tests require "shared_preload_libraries=pg_stat_statements",
# which typical installcheck users do not have (e.g. buildfarm clients).
NO_INSTALLCHECK = 1
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
new file mode 100644
index 00000000000..7711572e0b7
--- /dev/null
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -0,0 +1,167 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) | 1
+ SELECT pg_stat_statements_reset() | 1
+(4 rows)
+
+-- Normal scenario, too many simple constants for an IN query
+SET query_id_const_merge = on;
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+--------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 4
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) and data = $3 | 3
+ SELECT pg_stat_statements_reset() | 1
+(2 rows)
+
+-- No constants simplification
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 + $2, $3 + $4, $5 + $6, $7 + $8, $9 + $10, $11 + $12, $13 + $14, $15 + $16, $17 + $18) | 1
+ SELECT pg_stat_statements_reset() | 1
+(2 rows)
+
+-- Numeric type
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+(2 rows)
+
+-- Test constants evaluation, verifies a tricky part to make sure there are no
+-- issues in the merging implementation
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result
+--------
+(0 rows)
+
+RESET query_id_const_merge;
diff --git a/contrib/pg_stat_statements/meson.build b/contrib/pg_stat_statements/meson.build
index 15b7c7f2b02..6371c81e138 100644
--- a/contrib/pg_stat_statements/meson.build
+++ b/contrib/pg_stat_statements/meson.build
@@ -51,6 +51,7 @@ tests += {
'wal',
'cleanup',
'oldextversions',
+ 'merging',
],
'regress_args': ['--temp-config', files('pg_stat_statements.conf')],
# Disabled because these tests require
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index a46f2db352b..0b03009a053 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2695,6 +2695,9 @@ generate_normalized_query(JumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool skip = false; /* Signals that certain constants are
+ merged together and have to be skipped */
+
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -2718,7 +2721,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
{
int off, /* Offset from start for cur tok */
tok_len; /* Length (in bytes) of that tok */
-
off = jstate->clocations[i].location;
/* Adjust recorded location if we're dealing with partial string */
off -= query_loc;
@@ -2733,12 +2735,32 @@ generate_normalized_query(JumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ /* Normal path, non merged constant */
+ if (!jstate->clocations[i].merged)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id);
+
+ /* In case previous constants were merged away, stop doing that */
+ if (skip)
+ skip = false;
+ }
+ /* The firsts merged constant */
+ else if (!skip)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* Skip the following until a non merged constant appear */
+ skip = true;
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ }
+ /* Otherwise the constant is merged away */
quer_loc = off + tok_len;
last_off = off;
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
new file mode 100644
index 00000000000..02266a92ce8
--- /dev/null
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -0,0 +1,58 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal scenario, too many simple constants for an IN query
+SET query_id_const_merge = on;
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset();
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- No constants simplification
+SELECT pg_stat_statements_reset();
+
+SELECT * FROM test_merge WHERE id IN (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Numeric type
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test constants evaluation, verifies a tricky part to make sure there are no
+-- issues in the merging implementation
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+RESET query_id_const_merge;
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 3839c72c868..55b0795bce1 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8190,6 +8190,34 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
</listitem>
</varlistentry>
+ <varlistentry id="guc-query-id-const-merge" xreflabel="query_id_const_merge">
+ <term><varname>query_id_const_merge</varname> (<type>bool</type>)
+ <indexterm>
+ <primary><varname>query_id_const_merge</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Specifies how an array of constants (e.g. for an "IN" clause)
+ contributes to the query identifier computation. Normally every element
+ of an array contributes to the query identifier, which means the same
+ query will get multiple different identifiers, one for each occurrence
+ with an array of different lenght.
+
+ If this parameter is on, an array of constants will contribute only the
+ first and the last elements to the query identifier. It means two
+ occurences of the same query, where the only difference is number of
+ constants in the array, are going to get the same query identifier.
+
+ The parameter could be used to reduce amount of repeating data stored
+ via <xref linkend="pgstatstatements"/> extension. The default value is off.
+
+ The <xref linkend="pgstatstatements"/> extension will represent such
+ queries in form <literal>'(...)'</literal>.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
</sect2>
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 7e7c5c9ff82..8df4064cbf4 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -548,10 +548,27 @@
<para>
In some cases, queries with visibly different texts might get merged into a
single <structname>pg_stat_statements</structname> entry. Normally this will happen
- only for semantically equivalent queries, but there is a small chance of
- hash collisions causing unrelated queries to be merged into one entry.
- (This cannot happen for queries belonging to different users or databases,
- however.)
+ only for semantically equivalent queries, or if
+ <xref linkend="guc-query-id-const-merge"/> is enabled and the only
+ difference between queries is the length of an array with constants they contain:
+
+<screen>
+=# SET query_id_const_merge = on;
+=# SELECT pg_stat_statements_reset();
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
+=# SELECT query, calls FROM pg_stat_statements;
+-[ RECORD 1 ]------------------------------
+query | SELECT * FROM test WHERE a IN (...)
+calls | 2
+-[ RECORD 2 ]------------------------------
+query | SELECT pg_stat_statements_reset()
+calls | 1
+</screen>
+
+ But there is a small chance of hash collisions causing unrelated queries to
+ be merged into one entry. (This cannot happen for queries belonging to
+ different users or databases, however.)
</para>
<para>
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 72c79635781..5b4a3f37a2b 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -1308,7 +1308,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
# Track the node's location only if directly requested.
if ($query_jumble_location)
{
- print $jff "\tJUMBLE_LOCATION($f);\n"
+ print $jff "\tJUMBLE_LOCATION($f, false);\n"
unless $query_jumble_ignore;
}
}
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index 281907a4d83..52d45c2b093 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -42,12 +42,16 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+/* Whether to merge constants in a list when computing query_id */
+bool query_id_const_merge = false;
+
/* True when compute_query_id is ON, or AUTO and a module requests them */
bool query_id_enabled = false;
static void AppendJumble(JumbleState *jstate,
const unsigned char *item, Size size);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void RecordConstLocation(JumbleState *jstate,
+ int location, bool merged);
static void _jumbleNode(JumbleState *jstate, Node *node);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
static void _jumbleList(JumbleState *jstate, Node *node);
@@ -186,11 +190,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
}
/*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked.
+ *
+ * Merged argument signals that the constant represents the first or the last
+ * element in a series of merged constants, and everything but the first/last
+ * element contributes nothing to the jumble hash.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -206,15 +214,65 @@ RecordConstLocation(JumbleState *jstate, int location)
}
jstate->clocations[jstate->clocations_count].location = location;
/* initialize lengths to -1 to simplify third-party module usage */
+ jstate->clocations[jstate->clocations_count].merged = merged;
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
}
}
+/*
+ * Verify if the provided list contains could be merged down, which means it
+ * contains only constant expressions.
+ *
+ * Return value indicates if merging is possible.
+ *
+ * Note that this function searches only for explicit Const nodes and does not
+ * try to simplify expressions.
+ */
+static bool
+IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
+{
+ ListCell *temp;
+ Node *firstExpr = NULL;
+
+ if (elements == NULL)
+ return false;
+
+ if (!query_id_const_merge)
+ {
+ /* Merging is disabled, process everything one by one */
+ return false;
+ }
+
+ firstExpr = linitial(elements);
+
+ /*
+ * If the first expression is a constant, verify if the following elements
+ * are constants as well. If yes, the list is eligible for merging, and the
+ * order of magnitude need to be calculated.
+ */
+ if (IsA(firstExpr, Const))
+ {
+ foreach(temp, elements)
+ if (!IsA(lfirst(temp), Const))
+ return false;
+
+ *firstConst = (Const *) firstExpr;
+ *lastConst = llast_node(Const, elements);
+ return true;
+ }
+
+ /*
+ * If we end up here, it means no constants merging is possible, process
+ * the list as usual.
+ */
+ return false;
+}
+
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
-#define JUMBLE_LOCATION(location) \
- RecordConstLocation(jstate, expr->location)
+#define JUMBLE_LOCATION(location, merged) \
+ RecordConstLocation(jstate, expr->location, merged)
#define JUMBLE_FIELD(item) \
AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
#define JUMBLE_FIELD_SINGLE(item) \
@@ -227,6 +285,22 @@ do { \
#include "queryjumblefuncs.funcs.c"
+static void
+_jumbleArrayExpr(JumbleState *jstate, Node *node)
+{
+ ArrayExpr *expr = (ArrayExpr *) node;
+ Const *first, *last;
+ if(IsMergeableConstList(expr->elements, &first, &last))
+ {
+ RecordConstLocation(jstate, first->location, true);
+ RecordConstLocation(jstate, last->location, true);
+ }
+ else
+ {
+ JUMBLE_NODE(elements);
+ }
+}
+
static void
_jumbleNode(JumbleState *jstate, Node *node)
{
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 4c585741661..fbb50adb9f9 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -2011,6 +2011,16 @@ struct config_bool ConfigureNamesBool[] =
NULL, NULL, NULL
},
+ {
+ {"query_id_const_merge", PGC_SUSET, STATS_MONITORING,
+ gettext_noop("Sets whether an array of constants will contribute to"
+ " query identified computation."),
+ },
+ &query_id_const_merge,
+ false,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index d08d55c3fe4..3397780dc72 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -628,7 +628,7 @@
#log_parser_stats = off
#log_planner_stats = off
#log_executor_stats = off
-
+#query_id_const_merge = off
#------------------------------------------------------------------------------
# AUTOVACUUM
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 60d72a876b4..fde0320b263 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1296,6 +1296,8 @@ typedef struct CaseTestExpr
*/
typedef struct ArrayExpr
{
+ pg_node_attr(custom_query_jumble)
+
Expr xpr;
/* type of expression result */
Oid array_typeid pg_node_attr(query_jumble_ignore);
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 7649e095aa5..225957d7b3c 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -15,6 +15,7 @@
#define QUERYJUMBLE_H
#include "nodes/parsenodes.h"
+#include "nodes/nodeFuncs.h"
/*
* Struct for tracking locations/lengths of constants during normalization
@@ -23,6 +24,12 @@ typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+
+ /*
+ * Indicates the constant represents the beginning or the end of a merged
+ * constants interval.
+ */
+ bool merged;
} LocationLen;
/*
@@ -61,7 +68,7 @@ enum ComputeQueryIdType
/* GUC parameters */
extern PGDLLIMPORT int compute_query_id;
-
+extern PGDLLIMPORT bool query_id_const_merge;
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query);
base-commit: 22655aa23132a0645fdcdce4b233a1fff0c0cf8f
--
2.41.0
v15-0002-Reusable-decimalLength-functions.patchtext/x-diff; charset=us-asciiDownload
From 0eadf14630fa0e888cdd27e41a091c6564a4637c Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Fri, 17 Feb 2023 10:17:55 +0100
Subject: [PATCH v15 2/4] Reusable decimalLength functions
Move out decimalLength functions to reuse in the following patch.
---
src/backend/utils/adt/numutils.c | 50 +-----------------------
src/include/utils/numutils.h | 67 ++++++++++++++++++++++++++++++++
2 files changed, 68 insertions(+), 49 deletions(-)
create mode 100644 src/include/utils/numutils.h
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index a597e5ed796..02fe132f285 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -18,9 +18,8 @@
#include <limits.h>
#include <ctype.h>
-#include "common/int.h"
#include "utils/builtins.h"
-#include "port/pg_bitutils.h"
+#include "utils/numutils.h"
/*
* A table of all two-digit numbers. This is used to speed up decimal digit
@@ -38,53 +37,6 @@ static const char DIGIT_TABLE[200] =
"80" "81" "82" "83" "84" "85" "86" "87" "88" "89"
"90" "91" "92" "93" "94" "95" "96" "97" "98" "99";
-/*
- * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
- */
-static inline int
-decimalLength32(const uint32 v)
-{
- int t;
- static const uint32 PowersOfTen[] = {
- 1, 10, 100,
- 1000, 10000, 100000,
- 1000000, 10000000, 100000000,
- 1000000000
- };
-
- /*
- * Compute base-10 logarithm by dividing the base-2 logarithm by a
- * good-enough approximation of the base-2 logarithm of 10
- */
- t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
- return t + (v >= PowersOfTen[t]);
-}
-
-static inline int
-decimalLength64(const uint64 v)
-{
- int t;
- static const uint64 PowersOfTen[] = {
- UINT64CONST(1), UINT64CONST(10),
- UINT64CONST(100), UINT64CONST(1000),
- UINT64CONST(10000), UINT64CONST(100000),
- UINT64CONST(1000000), UINT64CONST(10000000),
- UINT64CONST(100000000), UINT64CONST(1000000000),
- UINT64CONST(10000000000), UINT64CONST(100000000000),
- UINT64CONST(1000000000000), UINT64CONST(10000000000000),
- UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
- UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
- UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
- };
-
- /*
- * Compute base-10 logarithm by dividing the base-2 logarithm by a
- * good-enough approximation of the base-2 logarithm of 10
- */
- t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
- return t + (v >= PowersOfTen[t]);
-}
-
static const int8 hexlookup[128] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
diff --git a/src/include/utils/numutils.h b/src/include/utils/numutils.h
new file mode 100644
index 00000000000..876e64f2df9
--- /dev/null
+++ b/src/include/utils/numutils.h
@@ -0,0 +1,67 @@
+/*-------------------------------------------------------------------------
+ *
+ * numutils.h
+ * Decimal length functions for numutils.c
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/utils/numutils.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef NUMUTILS_H
+#define NUMUTILS_H
+
+#include "common/int.h"
+#include "port/pg_bitutils.h"
+
+/*
+ * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
+ */
+static inline int
+decimalLength32(const uint32 v)
+{
+ int t;
+ static const uint32 PowersOfTen[] = {
+ 1, 10, 100,
+ 1000, 10000, 100000,
+ 1000000, 10000000, 100000000,
+ 1000000000
+ };
+
+ /*
+ * Compute base-10 logarithm by dividing the base-2 logarithm by a
+ * good-enough approximation of the base-2 logarithm of 10
+ */
+ t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
+ return t + (v >= PowersOfTen[t]);
+}
+
+static inline int
+decimalLength64(const uint64 v)
+{
+ int t;
+ static const uint64 PowersOfTen[] = {
+ UINT64CONST(1), UINT64CONST(10),
+ UINT64CONST(100), UINT64CONST(1000),
+ UINT64CONST(10000), UINT64CONST(100000),
+ UINT64CONST(1000000), UINT64CONST(10000000),
+ UINT64CONST(100000000), UINT64CONST(1000000000),
+ UINT64CONST(10000000000), UINT64CONST(100000000000),
+ UINT64CONST(1000000000000), UINT64CONST(10000000000000),
+ UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
+ UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
+ UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
+ };
+
+ /*
+ * Compute base-10 logarithm by dividing the base-2 logarithm by a
+ * good-enough approximation of the base-2 logarithm of 10
+ */
+ t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
+ return t + (v >= PowersOfTen[t]);
+}
+
+#endif /* NUMUTILS_H */
--
2.41.0
On Tue, Oct 17, 2023 at 10:15:41AM +0200, Dmitry Dolgov wrote:
In the current patch version I didn't add anything yet to address the
question of having more parameters to tune constants merging. The main
obstacle as I see it is that the information for that has to be
collected when jumbling various query nodes. Anything except information
about the ArrayExpr itself would have to be acquired when jumbling some
other parts of the query, not directly related to the ArrayExpr. It
seems to me this interdependency between otherwise unrelated nodes
outweigh the value it brings, and would require some more elaborate (and
more invasive for the purpose of this patch) mechanism to implement.
typedef struct ArrayExpr
{
+ pg_node_attr(custom_query_jumble)
+
Hmm. I am not sure that this is the best approach
implementation-wise. Wouldn't it be better to invent a new
pg_node_attr (these can include parameters as well!), say
query_jumble_merge or query_jumble_agg_location that aggregates all
the parameters of a list to be considered as a single element. To put
it short, we could also apply the same property to other parts of a
parsed tree, and not only an ArrayExpr's list.
/* GUC parameters */
extern PGDLLIMPORT int compute_query_id;
-
+extern PGDLLIMPORT bool query_id_const_merge;
Not much a fan of this addition as well for an in-core GUC. I would
suggest pushing the GUC layer to pg_stat_statements, maintaining the
computation method to use as a field of JumbleState as I suspect that
this is something we should not enforce system-wide, but at
extension-level instead.
+ /*
+ * Indicates the constant represents the beginning or the end of a merged
+ * constants interval.
+ */
+ bool merged;
Not sure that this is the best thing to do either. Instead of this
extra boolean flag, could it be simpler if we switch LocationLen so as
we track the start position and the end position of a constant in a
query string, so as we'd use one LocationLen for a whole set of Const
nodes in an ArrayExpr? Perhaps this could just be a refactoring piece
of its own?
+ /*
+ * If the first expression is a constant, verify if the following elements
+ * are constants as well. If yes, the list is eligible for merging, and the
+ * order of magnitude need to be calculated.
+ */
+ if (IsA(firstExpr, Const))
+ {
+ foreach(temp, elements)
+ if (!IsA(lfirst(temp), Const))
+ return false;
This path should be benchmarked, IMO.
--
Michael
On Thu, Oct 26, 2023 at 09:08:42AM +0900, Michael Paquier wrote:
typedef struct ArrayExpr
{
+ pg_node_attr(custom_query_jumble)
+Hmm. I am not sure that this is the best approach
implementation-wise. Wouldn't it be better to invent a new
pg_node_attr (these can include parameters as well!), say
query_jumble_merge or query_jumble_agg_location that aggregates all
the parameters of a list to be considered as a single element. To put
it short, we could also apply the same property to other parts of a
parsed tree, and not only an ArrayExpr's list.
Sounds like an interesting idea, something like:
typedef struct ArrayExpr
{
...
List *elements pg_node_attr(query_jumble_merge);
to replace simple JUMBLE_NODE(elements) with more elaborated logic.
/* GUC parameters */ extern PGDLLIMPORT int compute_query_id; - +extern PGDLLIMPORT bool query_id_const_merge;Not much a fan of this addition as well for an in-core GUC. I would
suggest pushing the GUC layer to pg_stat_statements, maintaining the
computation method to use as a field of JumbleState as I suspect that
this is something we should not enforce system-wide, but at
extension-level instead.
I also do not particularly like an extra GUC here, but as far as I can
tell to make it pg_stat_statements GUC only it has to be something
similar to EnableQueryId (e.g. EnableQueryConstMerging), that will be
called from pgss. Does this sound better?
+ /* + * Indicates the constant represents the beginning or the end of a merged + * constants interval. + */ + bool merged;Not sure that this is the best thing to do either. Instead of this
extra boolean flag, could it be simpler if we switch LocationLen so as
we track the start position and the end position of a constant in a
query string, so as we'd use one LocationLen for a whole set of Const
nodes in an ArrayExpr? Perhaps this could just be a refactoring piece
of its own?
Sounds interesting as well, but it seems to me there is a catch. I'll
try to elaborate, bear with me:
* if the start and the end positions of a constant means the first and the
last character representing it, we need the textual length of the
constant in the query to be able to construct such a LocationLen. The
lengths are calculated in pg_stat_statements later, not in JumbleQuery,
and it uses parser for that. Doing all of this in JumbleQuery doesn't
sound reasonable to me.
* if instead we talk about the start and the end positions in a
set of constants, that would mean locations of the first and the last
constants in the set, and everything seems fine. But for such
LocationLen to represent a single constant (not a set of constants), it
means that only the start position would be meaningful, the end position
will not be used.
The second approach is somewhat close to be simpler than the merge flag,
but assumes the ugliness for a single constant. What do you think about
this?
+ /* + * If the first expression is a constant, verify if the following elements + * are constants as well. If yes, the list is eligible for merging, and the + * order of magnitude need to be calculated. + */ + if (IsA(firstExpr, Const)) + { + foreach(temp, elements) + if (!IsA(lfirst(temp), Const)) + return false;This path should be benchmarked, IMO.
I can do some benchmarking here, but of course it's going to be slower
than the baseline. The main idea behind the patch is to trade this
overhead for the benefits in the future while processing pgss records,
hoping that it's going to be worth it (and in those extreme cases I'm
trying to address it's definitely worth it).
On Fri, Oct 27, 2023 at 05:02:44PM +0200, Dmitry Dolgov wrote:
On Thu, Oct 26, 2023 at 09:08:42AM +0900, Michael Paquier wrote:
typedef struct ArrayExpr
{
+ pg_node_attr(custom_query_jumble)
+Hmm. I am not sure that this is the best approach
implementation-wise. Wouldn't it be better to invent a new
pg_node_attr (these can include parameters as well!), say
query_jumble_merge or query_jumble_agg_location that aggregates all
the parameters of a list to be considered as a single element. To put
it short, we could also apply the same property to other parts of a
parsed tree, and not only an ArrayExpr's list.Sounds like an interesting idea, something like:
typedef struct ArrayExpr
{
...
List *elements pg_node_attr(query_jumble_merge);to replace simple JUMBLE_NODE(elements) with more elaborated logic.
/* GUC parameters */ extern PGDLLIMPORT int compute_query_id; - +extern PGDLLIMPORT bool query_id_const_merge;Not much a fan of this addition as well for an in-core GUC. I would
suggest pushing the GUC layer to pg_stat_statements, maintaining the
computation method to use as a field of JumbleState as I suspect that
this is something we should not enforce system-wide, but at
extension-level instead.I also do not particularly like an extra GUC here, but as far as I can
tell to make it pg_stat_statements GUC only it has to be something
similar to EnableQueryId (e.g. EnableQueryConstMerging), that will be
called from pgss. Does this sound better?
For clarity, here is what I had in mind for those two points.
Attachments:
v16-0004-Introduce-query_id_const_merge_threshold.patchtext/x-diff; charset=us-asciiDownload
From 12a515de2950a8d78e7f19e08b76aedc20a3433f Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Mon, 16 Oct 2023 16:52:27 +0200
Subject: [PATCH v16 4/4] Introduce query_id_const_merge_threshold
Replace query_id_const_merge with a threshold to allow merging only if
the number of elements is larger than specified value, which could be
configured using pg_stat_statements parameter query_id_const_merge_threshold.
---
.../pg_stat_statements/expected/merging.out | 68 ++++++++++++++++++-
.../pg_stat_statements/pg_stat_statements.c | 36 +++++-----
contrib/pg_stat_statements/sql/merging.sql | 21 +++++-
doc/src/sgml/pgstatstatements.sgml | 19 +++---
src/backend/nodes/queryjumblefuncs.c | 23 +++++--
src/backend/postmaster/postmaster.c | 6 +-
src/include/nodes/queryjumble.h | 4 +-
7 files changed, 135 insertions(+), 42 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
index 7400870f3f6..93d59149bf0 100644
--- a/contrib/pg_stat_statements/expected/merging.out
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -36,7 +36,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
(4 rows)
-- Normal scenario, too many simple constants for an IN query
-SET pg_stat_statements.query_id_const_merge = on;
+SET pg_stat_statements.query_id_const_merge_threshold = 1;
SELECT pg_stat_statements_reset();
pg_stat_statements_reset
--------------------------
@@ -218,4 +218,68 @@ FROM cte;
--------
(0 rows)
-RESET pg_stat_statements.query_id_const_merge;
+-- With the threshold
+SET pg_stat_statements.query_id_const_merge_threshold = 10;
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 2
+ SELECT pg_stat_statements_reset() | 1
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+(2 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+RESET pg_stat_statements.query_id_const_merge_threshold;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index a5702c3d749..e02171c6767 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -266,8 +266,8 @@ static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
static ProcessUtility_hook_type prev_ProcessUtility = NULL;
-/* An assign hook to keep query_id_const_merge in sync */
-static void pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra);
+/* An assign hook to keep query_id_const_merge_threshold in sync */
+static void pgss_query_id_const_merge_assign_hook(int newvalue, void *extra);
/* Links to shared memory state */
static pgssSharedState *pgss = NULL;
@@ -296,8 +296,8 @@ static bool pgss_track_utility = true; /* whether to track utility commands */
static bool pgss_track_planning = false; /* whether to track planning
* duration */
static bool pgss_save = true; /* whether to save stats across shutdown */
-static bool pgss_query_id_const_merge = false; /* request constants merging
- * when computing query_id */
+static int pgss_query_id_const_merge_threshold = 0; /* request constants merging
+ * when computing query_id */
#define pgss_enabled(level) \
(!IsParallelWorker() && \
@@ -459,20 +459,22 @@ _PG_init(void)
NULL,
NULL);
- DefineCustomBoolVariable("pg_stat_statements.query_id_const_merge",
- "Whether to merge constants in a list when computing query_id.",
- NULL,
- &pgss_query_id_const_merge,
- false,
- PGC_SUSET,
- 0,
- NULL,
- pgss_query_id_const_merge_assign_hook,
- NULL);
+ DefineCustomIntVariable("pg_stat_statements.query_id_const_merge_threshold",
+ "Whether to merge constants in a list when computing query_id.",
+ NULL,
+ &pgss_query_id_const_merge_threshold,
+ 0,
+ 0,
+ INT_MAX,
+ PGC_SUSET,
+ 0,
+ NULL,
+ pgss_query_id_const_merge_assign_hook,
+ NULL);
MarkGUCPrefixReserved("pg_stat_statements");
- SetQueryIdConstMerge(pgss_query_id_const_merge);
+ SetQueryIdConstMerge(pgss_query_id_const_merge_threshold);
/*
* Install hooks.
@@ -2956,10 +2958,10 @@ comp_location(const void *a, const void *b)
}
/*
- * Notify query jumbling about query_id_const_merge status
+ * Notify query jumbling about query_id_const_merge_threshold status
*/
static void
-pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra)
+pgss_query_id_const_merge_assign_hook(int newvalue, void *extra)
{
SetQueryIdConstMerge(newvalue);
}
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
index c515e48d50c..52ee4fcb216 100644
--- a/contrib/pg_stat_statements/sql/merging.sql
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -15,7 +15,7 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-- Normal scenario, too many simple constants for an IN query
-SET pg_stat_statements.query_id_const_merge = on;
+SET pg_stat_statements.query_id_const_merge_threshold = 1;
SELECT pg_stat_statements_reset();
SELECT * FROM test_merge WHERE id IN (1);
@@ -68,4 +68,21 @@ WITH cte AS (
SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
FROM cte;
-RESET pg_stat_statements.query_id_const_merge;
+-- With the threshold
+SET pg_stat_statements.query_id_const_merge_threshold = 10;
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+RESET pg_stat_statements.query_id_const_merge_threshold;
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index a919696abc2..81e6ab2c0c8 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -549,11 +549,11 @@
In some cases, queries with visibly different texts might get merged into a
single <structname>pg_stat_statements</structname> entry. Normally this will happen
only for semantically equivalent queries, or if
- <xref linkend="guc-query-id-const-merge"/> is enabled and the only
+ <xref linkend="guc-query-id-const-merge-threshold"/> is greater than 0 and the only
difference between queries is the length of an array with constants they contain:
<screen>
-=# SET query_id_const_merge = on;
+=# SET query_id_const_merge_threshold = 1;
=# SELECT pg_stat_statements_reset();
=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
@@ -881,9 +881,9 @@ calls | 1
<varlistentry>
<term>
- <varname>pg_stat_statements.query_id_const_merge</varname> (<type>bool</type>)
+ <varname>pg_stat_statements.query_id_const_merge_threshold</varname> (<type>integer</type>)
<indexterm>
- <primary><varname>pg_stat_statements.query_id_const_merge</varname> configuration parameter</primary>
+ <primary><varname>pg_stat_statements.query_id_const_merge_threshold</varname> configuration parameter</primary>
</indexterm>
</term>
@@ -895,11 +895,12 @@ calls | 1
query will get multiple different identifiers, one for each occurrence
with an array of different lenght.
- If this parameter is on, an array of constants will contribute only the
- first element, the last element and the number of elements to the query
- identifier. It means two occurences of the same query, where the only
- difference is number of constants in the array, are going to get the
- same query identifier if the arrays are of similar length.
+ If this parameter is greater than 0, an array with more than
+ <varname>pg_stat_statements.query_id_const_merge_threshold</varname>
+ constants will contribute only the first element, the last element
+ and the number of elements to the query identifier. It means two
+ occurences of the same query, where the only difference is number of
+ constants in the array, are going to get the same query identifier.
Such queries are represented in form <literal>'(... [10-99 entries])'</literal>.
The parameter could be used to reduce amount of repeating data stored
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index a1d4567ca66..10be62f1331 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -44,8 +44,8 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
-/* Whether to merge constants in a list when computing query_id */
-bool query_id_const_merge = false;
+/* Lower threshold for the list length to merge constants when computing query_id */
+int query_id_const_merge_threshold = 1;
/* True when compute_query_id is ON, or AUTO and a module requests them */
bool query_id_enabled = false;
@@ -159,12 +159,14 @@ EnableQueryId(void)
* Controls constants merging for query identifier computation.
*
* Third-party plugins can use this function to enable/disable merging
- * of constants in a list when query identifier is computed.
+ * of constants in a list when query identifier is computed. The argument
+ * specifies the lower threshold for an array length, above which merging will
+ * be applied.
*/
void
-SetQueryIdConstMerge(bool value)
+SetQueryIdConstMerge(int threshold)
{
- query_id_const_merge = value;
+ query_id_const_merge_threshold = threshold;
}
/*
@@ -240,7 +242,8 @@ RecordConstLocation(JumbleState *jstate, int location, int magnitude)
/*
* Verify if the provided list contains could be merged down, which means it
- * contains only constant expressions.
+ * contains only constant expressions and the list contains more than
+ * query_id_const_merge_threshold elements.
*
* Return value is the order of magnitude (i.e. how many digits it has) for
* length of the list (to use for representation purposes later on) if merging
@@ -258,12 +261,18 @@ IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
if (elements == NULL)
return 0;
- if (!query_id_const_merge)
+ if (query_id_const_merge_threshold < 1)
{
/* Merging is disabled, process everything one by one */
return 0;
}
+ if (elements->length < query_id_const_merge_threshold)
+ {
+ /* The list is not large enough */
+ return 0;
+ }
+
firstExpr = linitial(elements);
/*
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 3e5c43ede81..3094d54bab8 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -528,7 +528,7 @@ typedef struct
bool redirection_done;
bool IsBinaryUpgrade;
bool query_id_enabled;
- bool query_id_const_merge;
+ int query_id_const_merge_threshold;
int max_safe_fds;
int MaxBackends;
#ifdef WIN32
@@ -6076,7 +6076,7 @@ save_backend_variables(BackendParameters *param, Port *port,
param->redirection_done = redirection_done;
param->IsBinaryUpgrade = IsBinaryUpgrade;
param->query_id_enabled = query_id_enabled;
- param->query_id_const_merge = query_id_const_merge;
+ param->query_id_const_merge_threshold = query_id_const_merge_threshold;
param->max_safe_fds = max_safe_fds;
param->MaxBackends = MaxBackends;
@@ -6308,7 +6308,7 @@ restore_backend_variables(BackendParameters *param, Port *port)
redirection_done = param->redirection_done;
IsBinaryUpgrade = param->IsBinaryUpgrade;
query_id_enabled = param->query_id_enabled;
- query_id_const_merge = param->query_id_const_merge;
+ query_id_const_merge_threshold = param->query_id_const_merge_threshold;
max_safe_fds = param->max_safe_fds;
MaxBackends = param->MaxBackends;
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 8ee2e9afbb6..a9f8cfcbed9 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -74,10 +74,10 @@ extern PGDLLIMPORT int compute_query_id;
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query);
extern void EnableQueryId(void);
-extern void SetQueryIdConstMerge(bool value);
+extern void SetQueryIdConstMerge(int threshold);
extern PGDLLIMPORT bool query_id_enabled;
-extern PGDLLIMPORT bool query_id_const_merge;
+extern PGDLLIMPORT int query_id_const_merge_threshold;
/*
* Returns whether query identifier computation has been enabled, either
--
2.41.0
v16-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/x-diff; charset=us-asciiDownload
From 760420fc4aeb96c20d99ae205ee57670d73dc27b Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Sat, 14 Oct 2023 15:00:48 +0200
Subject: [PATCH v16 1/4] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on the number of parameters, because every element of
ArrayExpr is jumbled. In certain situations it's undesirable, especially
if the list becomes too large.
Make an array of Const expressions contribute only the first/last
elements to the jumble hash. Allow to enable this behavior via the new
pg_stat_statements parameter query_id_const_merge with the default value off.
Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane,
Michael Paquier, Sergei Kornilov, Alvaro Herrera, David Geier
Tested-by: Chengxi Sun
---
contrib/pg_stat_statements/Makefile | 2 +-
.../pg_stat_statements/expected/merging.out | 167 ++++++++++++++++++
contrib/pg_stat_statements/meson.build | 1 +
.../pg_stat_statements/pg_stat_statements.c | 62 ++++++-
contrib/pg_stat_statements/sql/merging.sql | 58 ++++++
doc/src/sgml/pgstatstatements.sgml | 54 +++++-
src/backend/nodes/gen_node_support.pl | 21 ++-
src/backend/nodes/queryjumblefuncs.c | 100 ++++++++++-
src/backend/postmaster/postmaster.c | 3 +
src/backend/utils/misc/postgresql.conf.sample | 1 -
src/include/nodes/primnodes.h | 2 +-
src/include/nodes/queryjumble.h | 9 +-
12 files changed, 456 insertions(+), 24 deletions(-)
create mode 100644 contrib/pg_stat_statements/expected/merging.out
create mode 100644 contrib/pg_stat_statements/sql/merging.sql
diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile
index eba4a95d91a..af731fc9a58 100644
--- a/contrib/pg_stat_statements/Makefile
+++ b/contrib/pg_stat_statements/Makefile
@@ -19,7 +19,7 @@ LDFLAGS_SL += $(filter -lm, $(LIBS))
REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_statements/pg_stat_statements.conf
REGRESS = select dml cursors utility level_tracking planning \
- user_activity wal cleanup oldextversions
+ user_activity wal cleanup oldextversions merging
# Disabled because these tests require "shared_preload_libraries=pg_stat_statements",
# which typical installcheck users do not have (e.g. buildfarm clients).
NO_INSTALLCHECK = 1
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
new file mode 100644
index 00000000000..f286c735a36
--- /dev/null
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -0,0 +1,167 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) | 1
+ SELECT pg_stat_statements_reset() | 1
+(4 rows)
+
+-- Normal scenario, too many simple constants for an IN query
+SET pg_stat_statements.query_id_const_merge = on;
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+--------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 4
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) and data = $3 | 3
+ SELECT pg_stat_statements_reset() | 1
+(2 rows)
+
+-- No constants simplification
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 + $2, $3 + $4, $5 + $6, $7 + $8, $9 + $10, $11 + $12, $13 + $14, $15 + $16, $17 + $18) | 1
+ SELECT pg_stat_statements_reset() | 1
+(2 rows)
+
+-- Numeric type
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+(2 rows)
+
+-- Test constants evaluation, verifies a tricky part to make sure there are no
+-- issues in the merging implementation
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result
+--------
+(0 rows)
+
+RESET pg_stat_statements.query_id_const_merge;
diff --git a/contrib/pg_stat_statements/meson.build b/contrib/pg_stat_statements/meson.build
index 15b7c7f2b02..6371c81e138 100644
--- a/contrib/pg_stat_statements/meson.build
+++ b/contrib/pg_stat_statements/meson.build
@@ -51,6 +51,7 @@ tests += {
'wal',
'cleanup',
'oldextversions',
+ 'merging',
],
'regress_args': ['--temp-config', files('pg_stat_statements.conf')],
# Disabled because these tests require
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index a46f2db352b..4d47a746670 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -266,6 +266,9 @@ static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
static ProcessUtility_hook_type prev_ProcessUtility = NULL;
+/* An assign hook to keep query_id_const_merge in sync */
+static void pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra);
+
/* Links to shared memory state */
static pgssSharedState *pgss = NULL;
static HTAB *pgss_hash = NULL;
@@ -293,7 +296,8 @@ static bool pgss_track_utility = true; /* whether to track utility commands */
static bool pgss_track_planning = false; /* whether to track planning
* duration */
static bool pgss_save = true; /* whether to save stats across shutdown */
-
+static bool pgss_query_id_const_merge = false; /* request constants merging
+ * when computing query_id */
#define pgss_enabled(level) \
(!IsParallelWorker() && \
@@ -455,8 +459,21 @@ _PG_init(void)
NULL,
NULL);
+ DefineCustomBoolVariable("pg_stat_statements.query_id_const_merge",
+ "Whether to merge constants in a list when computing query_id.",
+ NULL,
+ &pgss_query_id_const_merge,
+ false,
+ PGC_SUSET,
+ 0,
+ NULL,
+ pgss_query_id_const_merge_assign_hook,
+ NULL);
+
MarkGUCPrefixReserved("pg_stat_statements");
+ SetQueryIdConstMerge(pgss_query_id_const_merge);
+
/*
* Install hooks.
*/
@@ -2695,6 +2712,9 @@ generate_normalized_query(JumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool skip = false; /* Signals that certain constants are
+ merged together and have to be skipped */
+
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -2718,7 +2738,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
{
int off, /* Offset from start for cur tok */
tok_len; /* Length (in bytes) of that tok */
-
off = jstate->clocations[i].location;
/* Adjust recorded location if we're dealing with partial string */
off -= query_loc;
@@ -2733,12 +2752,32 @@ generate_normalized_query(JumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ /* Normal path, non merged constant */
+ if (!jstate->clocations[i].merged)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id);
+
+ /* In case previous constants were merged away, stop doing that */
+ if (skip)
+ skip = false;
+ }
+ /* The firsts merged constant */
+ else if (!skip)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* Skip the following until a non merged constant appear */
+ skip = true;
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ }
+ /* Otherwise the constant is merged away */
quer_loc = off + tok_len;
last_off = off;
@@ -2902,3 +2941,12 @@ comp_location(const void *a, const void *b)
else
return 0;
}
+
+/*
+ * Notify query jumbling about query_id_const_merge status
+ */
+static void
+pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra)
+{
+ SetQueryIdConstMerge(newvalue);
+}
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
new file mode 100644
index 00000000000..8b589135daa
--- /dev/null
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -0,0 +1,58 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal scenario, too many simple constants for an IN query
+SET pg_stat_statements.query_id_const_merge = on;
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset();
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- No constants simplification
+SELECT pg_stat_statements_reset();
+
+SELECT * FROM test_merge WHERE id IN (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Numeric type
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test constants evaluation, verifies a tricky part to make sure there are no
+-- issues in the merging implementation
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+RESET pg_stat_statements.query_id_const_merge;
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 7e7c5c9ff82..bba8e5e11ed 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -548,10 +548,27 @@
<para>
In some cases, queries with visibly different texts might get merged into a
single <structname>pg_stat_statements</structname> entry. Normally this will happen
- only for semantically equivalent queries, but there is a small chance of
- hash collisions causing unrelated queries to be merged into one entry.
- (This cannot happen for queries belonging to different users or databases,
- however.)
+ only for semantically equivalent queries, or if
+ <xref linkend="guc-query-id-const-merge"/> is enabled and the only
+ difference between queries is the length of an array with constants they contain:
+
+<screen>
+=# SET query_id_const_merge = on;
+=# SELECT pg_stat_statements_reset();
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
+=# SELECT query, calls FROM pg_stat_statements;
+-[ RECORD 1 ]------------------------------
+query | SELECT * FROM test WHERE a IN (...)
+calls | 2
+-[ RECORD 2 ]------------------------------
+query | SELECT pg_stat_statements_reset()
+calls | 1
+</screen>
+
+ But there is a small chance of hash collisions causing unrelated queries to
+ be merged into one entry. (This cannot happen for queries belonging to
+ different users or databases, however.)
</para>
<para>
@@ -861,6 +878,35 @@
</para>
</listitem>
</varlistentry>
+
+ <varlistentry>
+ <term>
+ <varname>pg_stat_statements.query_id_const_merge</varname> (<type>bool</type>)
+ <indexterm>
+ <primary><varname>pg_stat_statements.query_id_const_merge</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+
+ <listitem>
+ <para>
+ Specifies how an array of constants (e.g. for an "IN" clause)
+ contributes to the query identifier computation. Normally every element
+ of an array contributes to the query identifier, which means the same
+ query will get multiple different identifiers, one for each occurrence
+ with an array of different lenght.
+
+ If this parameter is on, an array of constants will contribute only the
+ first and the last elements to the query identifier. It means two
+ occurences of the same query, where the only difference is number of
+ constants in the array, are going to get the same query identifier.
+ Such queries are represented in form <literal>'(...)'</literal>.
+
+ The parameter could be used to reduce amount of repeating data stored
+ via <structname>pg_stat_statements</structname>. The default value is off.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
<para>
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 72c79635781..9eb1f2dde77 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -475,6 +475,7 @@ foreach my $infile (@ARGV)
equal_ignore_if_zero
query_jumble_ignore
query_jumble_location
+ query_jumble_merge
read_write_ignore
write_only_relids
write_only_nondefault_pathtarget
@@ -1282,6 +1283,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
my @a = @{ $node_type_info{$n}->{field_attrs}{$f} };
my $query_jumble_ignore = $struct_no_query_jumble;
my $query_jumble_location = 0;
+ my $query_jumble_merge = 0;
# extract per-field attributes
foreach my $a (@a)
@@ -1294,21 +1296,34 @@ _jumble${n}(JumbleState *jstate, Node *node)
{
$query_jumble_location = 1;
}
+ elsif ($a eq 'query_jumble_merge')
+ {
+ $query_jumble_merge = 1;
+ }
}
# node type
if (($t =~ /^(\w+)\*$/ or $t =~ /^struct\s+(\w+)\*$/)
and elem $1, @node_types)
{
- print $jff "\tJUMBLE_NODE($f);\n"
- unless $query_jumble_ignore;
+ # Merge constants if requested.
+ if ($query_jumble_merge)
+ {
+ print $jff "\tJUMBLE_ELEMENTS($f);\n"
+ unless $query_jumble_ignore;
+ }
+ else
+ {
+ print $jff "\tJUMBLE_NODE($f);\n"
+ unless $query_jumble_ignore;
+ }
}
elsif ($t eq 'int' && $f =~ 'location$')
{
# Track the node's location only if directly requested.
if ($query_jumble_location)
{
- print $jff "\tJUMBLE_LOCATION($f);\n"
+ print $jff "\tJUMBLE_LOCATION($f, false);\n"
unless $query_jumble_ignore;
}
}
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index 281907a4d83..4bc16dde6a0 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -42,13 +42,18 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+/* Whether to merge constants in a list when computing query_id */
+bool query_id_const_merge = false;
+
/* True when compute_query_id is ON, or AUTO and a module requests them */
bool query_id_enabled = false;
static void AppendJumble(JumbleState *jstate,
const unsigned char *item, Size size);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void RecordConstLocation(JumbleState *jstate,
+ int location, bool merged);
static void _jumbleNode(JumbleState *jstate, Node *node);
+static void _jumbleElements(JumbleState *jstate, List *elements);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
static void _jumbleList(JumbleState *jstate, Node *node);
static void _jumbleRangeTblEntry(JumbleState *jstate, Node *node);
@@ -148,6 +153,18 @@ EnableQueryId(void)
query_id_enabled = true;
}
+/*
+ * Controls constants merging for query identifier computation.
+ *
+ * Third-party plugins can use this function to enable/disable merging
+ * of constants in a list when query identifier is computed.
+ */
+void
+SetQueryIdConstMerge(bool value)
+{
+ query_id_const_merge = value;
+}
+
/*
* AppendJumble: Append a value that is substantive in a given query to
* the current jumble.
@@ -186,11 +203,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
}
/*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked.
+ *
+ * Merged argument signals that the constant represents the first or the last
+ * element in a series of merged constants, and everything but the first/last
+ * element contributes nothing to the jumble hash.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -206,15 +227,67 @@ RecordConstLocation(JumbleState *jstate, int location)
}
jstate->clocations[jstate->clocations_count].location = location;
/* initialize lengths to -1 to simplify third-party module usage */
+ jstate->clocations[jstate->clocations_count].merged = merged;
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
}
}
+/*
+ * Verify if the provided list contains could be merged down, which means it
+ * contains only constant expressions.
+ *
+ * Return value indicates if merging is possible.
+ *
+ * Note that this function searches only for explicit Const nodes and does not
+ * try to simplify expressions.
+ */
+static bool
+IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
+{
+ ListCell *temp;
+ Node *firstExpr = NULL;
+
+ if (elements == NULL)
+ return false;
+
+ if (!query_id_const_merge)
+ {
+ /* Merging is disabled, process everything one by one */
+ return false;
+ }
+
+ firstExpr = linitial(elements);
+
+ /*
+ * If the first expression is a constant, verify if the following elements
+ * are constants as well. If yes, the list is eligible for merging, and the
+ * order of magnitude need to be calculated.
+ */
+ if (IsA(firstExpr, Const))
+ {
+ foreach(temp, elements)
+ if (!IsA(lfirst(temp), Const))
+ return false;
+
+ *firstConst = (Const *) firstExpr;
+ *lastConst = llast_node(Const, elements);
+ return true;
+ }
+
+ /*
+ * If we end up here, it means no constants merging is possible, process
+ * the list as usual.
+ */
+ return false;
+}
+
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
-#define JUMBLE_LOCATION(location) \
- RecordConstLocation(jstate, expr->location)
+#define JUMBLE_ELEMENTS(list) \
+ _jumbleElements(jstate, (List *) expr->list)
+#define JUMBLE_LOCATION(location, merged) \
+ RecordConstLocation(jstate, expr->location, merged)
#define JUMBLE_FIELD(item) \
AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
#define JUMBLE_FIELD_SINGLE(item) \
@@ -227,6 +300,21 @@ do { \
#include "queryjumblefuncs.funcs.c"
+static void
+_jumbleElements(JumbleState *jstate, List *elements)
+{
+ Const *first, *last;
+ if(IsMergeableConstList(elements, &first, &last))
+ {
+ RecordConstLocation(jstate, first->location, true);
+ RecordConstLocation(jstate, last->location, true);
+ }
+ else
+ {
+ _jumbleNode(jstate, (Node *) elements);
+ }
+}
+
static void
_jumbleNode(JumbleState *jstate, Node *node)
{
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 9cb624eab81..3e5c43ede81 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -528,6 +528,7 @@ typedef struct
bool redirection_done;
bool IsBinaryUpgrade;
bool query_id_enabled;
+ bool query_id_const_merge;
int max_safe_fds;
int MaxBackends;
#ifdef WIN32
@@ -6075,6 +6076,7 @@ save_backend_variables(BackendParameters *param, Port *port,
param->redirection_done = redirection_done;
param->IsBinaryUpgrade = IsBinaryUpgrade;
param->query_id_enabled = query_id_enabled;
+ param->query_id_const_merge = query_id_const_merge;
param->max_safe_fds = max_safe_fds;
param->MaxBackends = MaxBackends;
@@ -6306,6 +6308,7 @@ restore_backend_variables(BackendParameters *param, Port *port)
redirection_done = param->redirection_done;
IsBinaryUpgrade = param->IsBinaryUpgrade;
query_id_enabled = param->query_id_enabled;
+ query_id_const_merge = param->query_id_const_merge;
max_safe_fds = param->max_safe_fds;
MaxBackends = param->MaxBackends;
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index d08d55c3fe4..97a8251bb2d 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -629,7 +629,6 @@
#log_planner_stats = off
#log_executor_stats = off
-
#------------------------------------------------------------------------------
# AUTOVACUUM
#------------------------------------------------------------------------------
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 60d72a876b4..0b50e20fa69 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1304,7 +1304,7 @@ typedef struct ArrayExpr
/* common type of array elements */
Oid element_typeid pg_node_attr(query_jumble_ignore);
/* the array elements or sub-arrays */
- List *elements;
+ List *elements pg_node_attr(query_jumble_merge);
/* true if elements are sub-arrays */
bool multidims pg_node_attr(query_jumble_ignore);
/* token location, or -1 if unknown */
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 7649e095aa5..c64a007ad3f 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -23,6 +23,12 @@ typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+
+ /*
+ * Indicates the constant represents the beginning or the end of a merged
+ * constants interval.
+ */
+ bool merged;
} LocationLen;
/*
@@ -62,12 +68,13 @@ enum ComputeQueryIdType
/* GUC parameters */
extern PGDLLIMPORT int compute_query_id;
-
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query);
extern void EnableQueryId(void);
+extern void SetQueryIdConstMerge(bool value);
extern PGDLLIMPORT bool query_id_enabled;
+extern PGDLLIMPORT bool query_id_const_merge;
/*
* Returns whether query identifier computation has been enabled, either
base-commit: 22655aa23132a0645fdcdce4b233a1fff0c0cf8f
--
2.41.0
v16-0002-Reusable-decimalLength-functions.patchtext/x-diff; charset=us-asciiDownload
From 72ed59a704b67a0b26b860a34311df9d37851a13 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Fri, 17 Feb 2023 10:17:55 +0100
Subject: [PATCH v16 2/4] Reusable decimalLength functions
Move out decimalLength functions to reuse in the following patch.
---
src/backend/utils/adt/numutils.c | 50 +-----------------------
src/include/utils/numutils.h | 67 ++++++++++++++++++++++++++++++++
2 files changed, 68 insertions(+), 49 deletions(-)
create mode 100644 src/include/utils/numutils.h
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index a597e5ed796..02fe132f285 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -18,9 +18,8 @@
#include <limits.h>
#include <ctype.h>
-#include "common/int.h"
#include "utils/builtins.h"
-#include "port/pg_bitutils.h"
+#include "utils/numutils.h"
/*
* A table of all two-digit numbers. This is used to speed up decimal digit
@@ -38,53 +37,6 @@ static const char DIGIT_TABLE[200] =
"80" "81" "82" "83" "84" "85" "86" "87" "88" "89"
"90" "91" "92" "93" "94" "95" "96" "97" "98" "99";
-/*
- * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
- */
-static inline int
-decimalLength32(const uint32 v)
-{
- int t;
- static const uint32 PowersOfTen[] = {
- 1, 10, 100,
- 1000, 10000, 100000,
- 1000000, 10000000, 100000000,
- 1000000000
- };
-
- /*
- * Compute base-10 logarithm by dividing the base-2 logarithm by a
- * good-enough approximation of the base-2 logarithm of 10
- */
- t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
- return t + (v >= PowersOfTen[t]);
-}
-
-static inline int
-decimalLength64(const uint64 v)
-{
- int t;
- static const uint64 PowersOfTen[] = {
- UINT64CONST(1), UINT64CONST(10),
- UINT64CONST(100), UINT64CONST(1000),
- UINT64CONST(10000), UINT64CONST(100000),
- UINT64CONST(1000000), UINT64CONST(10000000),
- UINT64CONST(100000000), UINT64CONST(1000000000),
- UINT64CONST(10000000000), UINT64CONST(100000000000),
- UINT64CONST(1000000000000), UINT64CONST(10000000000000),
- UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
- UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
- UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
- };
-
- /*
- * Compute base-10 logarithm by dividing the base-2 logarithm by a
- * good-enough approximation of the base-2 logarithm of 10
- */
- t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
- return t + (v >= PowersOfTen[t]);
-}
-
static const int8 hexlookup[128] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
diff --git a/src/include/utils/numutils.h b/src/include/utils/numutils.h
new file mode 100644
index 00000000000..876e64f2df9
--- /dev/null
+++ b/src/include/utils/numutils.h
@@ -0,0 +1,67 @@
+/*-------------------------------------------------------------------------
+ *
+ * numutils.h
+ * Decimal length functions for numutils.c
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/utils/numutils.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef NUMUTILS_H
+#define NUMUTILS_H
+
+#include "common/int.h"
+#include "port/pg_bitutils.h"
+
+/*
+ * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
+ */
+static inline int
+decimalLength32(const uint32 v)
+{
+ int t;
+ static const uint32 PowersOfTen[] = {
+ 1, 10, 100,
+ 1000, 10000, 100000,
+ 1000000, 10000000, 100000000,
+ 1000000000
+ };
+
+ /*
+ * Compute base-10 logarithm by dividing the base-2 logarithm by a
+ * good-enough approximation of the base-2 logarithm of 10
+ */
+ t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
+ return t + (v >= PowersOfTen[t]);
+}
+
+static inline int
+decimalLength64(const uint64 v)
+{
+ int t;
+ static const uint64 PowersOfTen[] = {
+ UINT64CONST(1), UINT64CONST(10),
+ UINT64CONST(100), UINT64CONST(1000),
+ UINT64CONST(10000), UINT64CONST(100000),
+ UINT64CONST(1000000), UINT64CONST(10000000),
+ UINT64CONST(100000000), UINT64CONST(1000000000),
+ UINT64CONST(10000000000), UINT64CONST(100000000000),
+ UINT64CONST(1000000000000), UINT64CONST(10000000000000),
+ UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
+ UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
+ UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
+ };
+
+ /*
+ * Compute base-10 logarithm by dividing the base-2 logarithm by a
+ * good-enough approximation of the base-2 logarithm of 10
+ */
+ t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
+ return t + (v >= PowersOfTen[t]);
+}
+
+#endif /* NUMUTILS_H */
--
2.41.0
v16-0003-Merge-constants-in-ArrayExpr-into-groups.patchtext/x-diff; charset=us-asciiDownload
From 0b6f74eecd2a6ec0fe7361c7933f28cfffb30a18 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Sun, 15 Oct 2023 10:06:09 +0200
Subject: [PATCH v16 3/4] Merge constants in ArrayExpr into groups
Using query_id_const_merge only first/last element in an ArrayExpr will
be used to compute query id. Extend this to take into account number of
elements, and merge constants into groups based on it. Resulting groups
are powers of 10, i.e. 1 to 9, 10 to 99, etc.
---
.../pg_stat_statements/expected/merging.out | 84 +++++++++++++++----
.../pg_stat_statements/pg_stat_statements.c | 17 +++-
contrib/pg_stat_statements/sql/merging.sql | 13 +++
doc/src/sgml/pgstatstatements.sgml | 11 +--
src/backend/nodes/queryjumblefuncs.c | 52 ++++++++----
src/include/nodes/queryjumble.h | 7 +-
6 files changed, 142 insertions(+), 42 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
index f286c735a36..7400870f3f6 100644
--- a/contrib/pg_stat_statements/expected/merging.out
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -54,11 +54,11 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3);
(0 rows)
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
- query | calls
---------------------------------------------+-------
- SELECT * FROM test_merge WHERE id IN ($1) | 1
- SELECT * FROM test_merge WHERE id IN (...) | 1
- SELECT pg_stat_statements_reset() | 1
+ query | calls
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 1
+ SELECT pg_stat_statements_reset() | 1
(3 rows)
SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
@@ -80,7 +80,60 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
query | calls
------------------------------------------------------------------------+-------
SELECT * FROM test_merge WHERE id IN ($1) | 1
- SELECT * FROM test_merge WHERE id IN (...) | 4
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 2
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(5 rows)
+
+-- Second order of magnitude, brace yourself
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+--------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [100-999 entries]) | 1
+ SELECT pg_stat_statements_reset() | 1
+(2 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 1
+ SELECT pg_stat_statements_reset() | 1
+(2 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 1
SELECT pg_stat_statements_reset() | 1
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
(4 rows)
@@ -108,11 +161,12 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and dat
(0 rows)
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
- query | calls
-----------------------------------------------------------+-------
- SELECT * FROM test_merge WHERE id IN (...) and data = $3 | 3
- SELECT pg_stat_statements_reset() | 1
-(2 rows)
+ query | calls
+--------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) and data = $3 | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) and data = $3 | 2
+ SELECT pg_stat_statements_reset() | 1
+(3 rows)
-- No constants simplification
SELECT pg_stat_statements_reset();
@@ -147,10 +201,10 @@ SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
(0 rows)
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
- query | calls
-----------------------------------------------------+-------
- SELECT * FROM test_merge_numeric WHERE id IN (...) | 1
- SELECT pg_stat_statements_reset() | 1
+ query | calls
+--------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (... [10-99 entries]) | 1
+ SELECT pg_stat_statements_reset() | 1
(2 rows)
-- Test constants evaluation, verifies a tricky part to make sure there are no
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 4d47a746670..a5702c3d749 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2714,6 +2714,8 @@ generate_normalized_query(JumbleState *jstate, const char *query,
last_tok_len = 0; /* Length (in bytes) of that tok */
bool skip = false; /* Signals that certain constants are
merged together and have to be skipped */
+ int magnitude; /* Order of magnitute for number of merged
+ constants */
/*
@@ -2754,7 +2756,8 @@ generate_normalized_query(JumbleState *jstate, const char *query,
Assert(len_to_wrt >= 0);
/* Normal path, non merged constant */
- if (!jstate->clocations[i].merged)
+ magnitude = jstate->clocations[i].magnitude;
+ if (magnitude == 0)
{
memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
n_quer_loc += len_to_wrt;
@@ -2770,12 +2773,22 @@ generate_normalized_query(JumbleState *jstate, const char *query,
/* The firsts merged constant */
else if (!skip)
{
+ static const uint32 powers_of_ten[] = {
+ 1, 10, 100,
+ 1000, 10000, 100000,
+ 1000000, 10000000, 100000000,
+ 1000000000
+ };
+ int lower_merged = powers_of_ten[magnitude - 1];
+ int upper_merged = powers_of_ten[magnitude];
+
memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
n_quer_loc += len_to_wrt;
/* Skip the following until a non merged constant appear */
skip = true;
- n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "... [%d-%d entries]",
+ lower_merged, upper_merged - 1);
}
/* Otherwise the constant is merged away */
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
index 8b589135daa..c515e48d50c 100644
--- a/contrib/pg_stat_statements/sql/merging.sql
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -27,6 +27,19 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+-- Second order of magnitude, brace yourself
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
-- More conditions in the query
SELECT pg_stat_statements_reset();
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index bba8e5e11ed..a919696abc2 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -559,7 +559,7 @@
=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
=# SELECT query, calls FROM pg_stat_statements;
-[ RECORD 1 ]------------------------------
-query | SELECT * FROM test WHERE a IN (...)
+query | SELECT * FROM test WHERE a IN (... [10-99 entries])
calls | 2
-[ RECORD 2 ]------------------------------
query | SELECT pg_stat_statements_reset()
@@ -896,10 +896,11 @@ calls | 1
with an array of different lenght.
If this parameter is on, an array of constants will contribute only the
- first and the last elements to the query identifier. It means two
- occurences of the same query, where the only difference is number of
- constants in the array, are going to get the same query identifier.
- Such queries are represented in form <literal>'(...)'</literal>.
+ first element, the last element and the number of elements to the query
+ identifier. It means two occurences of the same query, where the only
+ difference is number of constants in the array, are going to get the
+ same query identifier if the arrays are of similar length.
+ Such queries are represented in form <literal>'(... [10-99 entries])'</literal>.
The parameter could be used to reduce amount of repeating data stored
via <structname>pg_stat_statements</structname>. The default value is off.
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index 4bc16dde6a0..a1d4567ca66 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -37,6 +37,8 @@
#include "nodes/queryjumble.h"
#include "parser/scansup.h"
+#include "utils/numutils.h"
+
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
/* GUC parameters */
@@ -51,7 +53,7 @@ bool query_id_enabled = false;
static void AppendJumble(JumbleState *jstate,
const unsigned char *item, Size size);
static void RecordConstLocation(JumbleState *jstate,
- int location, bool merged);
+ int location, int magnitude);
static void _jumbleNode(JumbleState *jstate, Node *node);
static void _jumbleElements(JumbleState *jstate, List *elements);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
@@ -206,12 +208,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
* Record location of constant within query string of query tree that is
* currently being walked.
*
- * Merged argument signals that the constant represents the first or the last
- * element in a series of merged constants, and everything but the first/last
- * element contributes nothing to the jumble hash.
+ * Magnitude argument larger than zero signals that the constant represents the
+ * first or the last element in a series of merged constants, and everything
+ * but such first/last element will contribute nothing to the jumble hash. The
+ * magnitute value specifies order of magnitute (i.e. how many digits it has)
+ * for the number of elements in the series, to represent the fact of merging
+ * later on.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location, bool merged)
+RecordConstLocation(JumbleState *jstate, int location, int magnitude)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -227,7 +232,7 @@ RecordConstLocation(JumbleState *jstate, int location, bool merged)
}
jstate->clocations[jstate->clocations_count].location = location;
/* initialize lengths to -1 to simplify third-party module usage */
- jstate->clocations[jstate->clocations_count].merged = merged;
+ jstate->clocations[jstate->clocations_count].magnitude = magnitude;
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
}
@@ -237,24 +242,26 @@ RecordConstLocation(JumbleState *jstate, int location, bool merged)
* Verify if the provided list contains could be merged down, which means it
* contains only constant expressions.
*
- * Return value indicates if merging is possible.
+ * Return value is the order of magnitude (i.e. how many digits it has) for
+ * length of the list (to use for representation purposes later on) if merging
+ * is possible, otherwise zero.
*
* Note that this function searches only for explicit Const nodes and does not
* try to simplify expressions.
*/
-static bool
+static int
IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
{
ListCell *temp;
Node *firstExpr = NULL;
if (elements == NULL)
- return false;
+ return 0;
if (!query_id_const_merge)
{
/* Merging is disabled, process everything one by one */
- return false;
+ return 0;
}
firstExpr = linitial(elements);
@@ -268,26 +275,26 @@ IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
{
foreach(temp, elements)
if (!IsA(lfirst(temp), Const))
- return false;
+ return 0;
*firstConst = (Const *) firstExpr;
*lastConst = llast_node(Const, elements);
- return true;
+ return decimalLength32(elements->length);
}
/*
* If we end up here, it means no constants merging is possible, process
* the list as usual.
*/
- return false;
+ return 0;
}
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
#define JUMBLE_ELEMENTS(list) \
_jumbleElements(jstate, (List *) expr->list)
-#define JUMBLE_LOCATION(location, merged) \
- RecordConstLocation(jstate, expr->location, merged)
+#define JUMBLE_LOCATION(location, magnitude) \
+ RecordConstLocation(jstate, expr->location, magnitude)
#define JUMBLE_FIELD(item) \
AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
#define JUMBLE_FIELD_SINGLE(item) \
@@ -304,10 +311,19 @@ static void
_jumbleElements(JumbleState *jstate, List *elements)
{
Const *first, *last;
- if(IsMergeableConstList(elements, &first, &last))
+ int magnitude = IsMergeableConstList(elements, &first, &last);
+
+ if (magnitude)
{
- RecordConstLocation(jstate, first->location, true);
- RecordConstLocation(jstate, last->location, true);
+ RecordConstLocation(jstate, first->location, magnitude);
+ RecordConstLocation(jstate, last->location, magnitude);
+
+ /*
+ * After merging constants down we end up with only two constants, the
+ * first and the last one. To distinguish the order of magnitute behind
+ * merged constants, add its value into the jumble.
+ */
+ JUMBLE_FIELD_SINGLE(magnitude);
}
else
{
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index c64a007ad3f..8ee2e9afbb6 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -26,9 +26,12 @@ typedef struct LocationLen
/*
* Indicates the constant represents the beginning or the end of a merged
- * constants interval.
+ * constants interval. The value shows how many constants were merged away
+ * (up to a power of 10), or in other words the order of manitude for
+ * number of merged constants (i.e. how many digits it has). Otherwise the
+ * value is 0, indicating that no merging was performed.
*/
- bool merged;
+ int magnitude;
} LocationLen;
/*
--
2.41.0
On Tue, 31 Oct 2023 at 14:36, Dmitry Dolgov <9erthalion6@gmail.com> wrote:
On Fri, Oct 27, 2023 at 05:02:44PM +0200, Dmitry Dolgov wrote:
On Thu, Oct 26, 2023 at 09:08:42AM +0900, Michael Paquier wrote:
typedef struct ArrayExpr
{
+ pg_node_attr(custom_query_jumble)
+Hmm. I am not sure that this is the best approach
implementation-wise. Wouldn't it be better to invent a new
pg_node_attr (these can include parameters as well!), say
query_jumble_merge or query_jumble_agg_location that aggregates all
the parameters of a list to be considered as a single element. To put
it short, we could also apply the same property to other parts of a
parsed tree, and not only an ArrayExpr's list.Sounds like an interesting idea, something like:
typedef struct ArrayExpr
{
...
List *elements pg_node_attr(query_jumble_merge);to replace simple JUMBLE_NODE(elements) with more elaborated logic.
/* GUC parameters */ extern PGDLLIMPORT int compute_query_id; - +extern PGDLLIMPORT bool query_id_const_merge;Not much a fan of this addition as well for an in-core GUC. I would
suggest pushing the GUC layer to pg_stat_statements, maintaining the
computation method to use as a field of JumbleState as I suspect that
this is something we should not enforce system-wide, but at
extension-level instead.I also do not particularly like an extra GUC here, but as far as I can
tell to make it pg_stat_statements GUC only it has to be something
similar to EnableQueryId (e.g. EnableQueryConstMerging), that will be
called from pgss. Does this sound better?For clarity, here is what I had in mind for those two points.
CFBot shows documentation build has failed at [1]https://cirrus-ci.com/task/6688578378399744 with:
[07:44:55.531] time make -s -j${BUILD_JOBS} -C doc
[07:44:57.987] postgres.sgml:572: element xref: validity error : IDREF
attribute linkend references an unknown ID
"guc-query-id-const-merge-threshold"
[07:44:58.179] make[2]: *** [Makefile:70: postgres-full.xml] Error 4
[07:44:58.179] make[2]: *** Deleting file 'postgres-full.xml'
[07:44:58.181] make[1]https://cirrus-ci.com/task/6688578378399744: *** [Makefile:8: all] Error 2
[07:44:58.182] make: *** [Makefile:16: all] Error 2
[1]: https://cirrus-ci.com/task/6688578378399744
Regards,
Vignesh
On Sat, Jan 06, 2024 at 09:04:54PM +0530, vignesh C wrote:
CFBot shows documentation build has failed at [1] with:
[07:44:55.531] time make -s -j${BUILD_JOBS} -C doc
[07:44:57.987] postgres.sgml:572: element xref: validity error : IDREF
attribute linkend references an unknown ID
"guc-query-id-const-merge-threshold"
[07:44:58.179] make[2]: *** [Makefile:70: postgres-full.xml] Error 4
[07:44:58.179] make[2]: *** Deleting file 'postgres-full.xml'
[07:44:58.181] make[1]: *** [Makefile:8: all] Error 2
[07:44:58.182] make: *** [Makefile:16: all] Error 2
Indeed, after moving the configuration option to pgss I forgot to update
its reference in the docs. Thanks for noticing, will update soon.
On Mon, Jan 08, 2024 at 05:10:20PM +0100, Dmitry Dolgov wrote:
On Sat, Jan 06, 2024 at 09:04:54PM +0530, vignesh C wrote:
CFBot shows documentation build has failed at [1] with:
[07:44:55.531] time make -s -j${BUILD_JOBS} -C doc
[07:44:57.987] postgres.sgml:572: element xref: validity error : IDREF
attribute linkend references an unknown ID
"guc-query-id-const-merge-threshold"
[07:44:58.179] make[2]: *** [Makefile:70: postgres-full.xml] Error 4
[07:44:58.179] make[2]: *** Deleting file 'postgres-full.xml'
[07:44:58.181] make[1]: *** [Makefile:8: all] Error 2
[07:44:58.182] make: *** [Makefile:16: all] Error 2Indeed, after moving the configuration option to pgss I forgot to update
its reference in the docs. Thanks for noticing, will update soon.
Here is the fixed version.
Attachments:
v17-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/x-diff; charset=us-asciiDownload
From 1b99ffd68de6e82d9bbc45c18153ef965a228e28 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Sat, 14 Oct 2023 15:00:48 +0200
Subject: [PATCH v17 1/4] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on the number of parameters, because every element of
ArrayExpr is jumbled. In certain situations it's undesirable, especially
if the list becomes too large.
Make an array of Const expressions contribute only the first/last
elements to the jumble hash. Allow to enable this behavior via the new
pg_stat_statements parameter query_id_const_merge with the default value off.
Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane,
Michael Paquier, Sergei Kornilov, Alvaro Herrera, David Geier
Tested-by: Chengxi Sun
---
contrib/pg_stat_statements/Makefile | 2 +-
.../pg_stat_statements/expected/merging.out | 167 ++++++++++++++++++
contrib/pg_stat_statements/meson.build | 1 +
.../pg_stat_statements/pg_stat_statements.c | 62 ++++++-
contrib/pg_stat_statements/sql/merging.sql | 58 ++++++
doc/src/sgml/pgstatstatements.sgml | 57 +++++-
src/backend/nodes/gen_node_support.pl | 21 ++-
src/backend/nodes/queryjumblefuncs.c | 100 ++++++++++-
src/backend/postmaster/postmaster.c | 3 +
src/backend/utils/misc/postgresql.conf.sample | 1 -
src/include/nodes/primnodes.h | 2 +-
src/include/nodes/queryjumble.h | 9 +-
12 files changed, 458 insertions(+), 25 deletions(-)
create mode 100644 contrib/pg_stat_statements/expected/merging.out
create mode 100644 contrib/pg_stat_statements/sql/merging.sql
diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile
index eba4a95d91a..af731fc9a58 100644
--- a/contrib/pg_stat_statements/Makefile
+++ b/contrib/pg_stat_statements/Makefile
@@ -19,7 +19,7 @@ LDFLAGS_SL += $(filter -lm, $(LIBS))
REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_statements/pg_stat_statements.conf
REGRESS = select dml cursors utility level_tracking planning \
- user_activity wal cleanup oldextversions
+ user_activity wal cleanup oldextversions merging
# Disabled because these tests require "shared_preload_libraries=pg_stat_statements",
# which typical installcheck users do not have (e.g. buildfarm clients).
NO_INSTALLCHECK = 1
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
new file mode 100644
index 00000000000..f286c735a36
--- /dev/null
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -0,0 +1,167 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) | 1
+ SELECT pg_stat_statements_reset() | 1
+(4 rows)
+
+-- Normal scenario, too many simple constants for an IN query
+SET pg_stat_statements.query_id_const_merge = on;
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+--------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 4
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) and data = $3 | 3
+ SELECT pg_stat_statements_reset() | 1
+(2 rows)
+
+-- No constants simplification
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 + $2, $3 + $4, $5 + $6, $7 + $8, $9 + $10, $11 + $12, $13 + $14, $15 + $16, $17 + $18) | 1
+ SELECT pg_stat_statements_reset() | 1
+(2 rows)
+
+-- Numeric type
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+(2 rows)
+
+-- Test constants evaluation, verifies a tricky part to make sure there are no
+-- issues in the merging implementation
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result
+--------
+(0 rows)
+
+RESET pg_stat_statements.query_id_const_merge;
diff --git a/contrib/pg_stat_statements/meson.build b/contrib/pg_stat_statements/meson.build
index 15b7c7f2b02..6371c81e138 100644
--- a/contrib/pg_stat_statements/meson.build
+++ b/contrib/pg_stat_statements/meson.build
@@ -51,6 +51,7 @@ tests += {
'wal',
'cleanup',
'oldextversions',
+ 'merging',
],
'regress_args': ['--temp-config', files('pg_stat_statements.conf')],
# Disabled because these tests require
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index a46f2db352b..4d47a746670 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -266,6 +266,9 @@ static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
static ProcessUtility_hook_type prev_ProcessUtility = NULL;
+/* An assign hook to keep query_id_const_merge in sync */
+static void pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra);
+
/* Links to shared memory state */
static pgssSharedState *pgss = NULL;
static HTAB *pgss_hash = NULL;
@@ -293,7 +296,8 @@ static bool pgss_track_utility = true; /* whether to track utility commands */
static bool pgss_track_planning = false; /* whether to track planning
* duration */
static bool pgss_save = true; /* whether to save stats across shutdown */
-
+static bool pgss_query_id_const_merge = false; /* request constants merging
+ * when computing query_id */
#define pgss_enabled(level) \
(!IsParallelWorker() && \
@@ -455,8 +459,21 @@ _PG_init(void)
NULL,
NULL);
+ DefineCustomBoolVariable("pg_stat_statements.query_id_const_merge",
+ "Whether to merge constants in a list when computing query_id.",
+ NULL,
+ &pgss_query_id_const_merge,
+ false,
+ PGC_SUSET,
+ 0,
+ NULL,
+ pgss_query_id_const_merge_assign_hook,
+ NULL);
+
MarkGUCPrefixReserved("pg_stat_statements");
+ SetQueryIdConstMerge(pgss_query_id_const_merge);
+
/*
* Install hooks.
*/
@@ -2695,6 +2712,9 @@ generate_normalized_query(JumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool skip = false; /* Signals that certain constants are
+ merged together and have to be skipped */
+
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -2718,7 +2738,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
{
int off, /* Offset from start for cur tok */
tok_len; /* Length (in bytes) of that tok */
-
off = jstate->clocations[i].location;
/* Adjust recorded location if we're dealing with partial string */
off -= query_loc;
@@ -2733,12 +2752,32 @@ generate_normalized_query(JumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ /* Normal path, non merged constant */
+ if (!jstate->clocations[i].merged)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id);
+
+ /* In case previous constants were merged away, stop doing that */
+ if (skip)
+ skip = false;
+ }
+ /* The firsts merged constant */
+ else if (!skip)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* Skip the following until a non merged constant appear */
+ skip = true;
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ }
+ /* Otherwise the constant is merged away */
quer_loc = off + tok_len;
last_off = off;
@@ -2902,3 +2941,12 @@ comp_location(const void *a, const void *b)
else
return 0;
}
+
+/*
+ * Notify query jumbling about query_id_const_merge status
+ */
+static void
+pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra)
+{
+ SetQueryIdConstMerge(newvalue);
+}
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
new file mode 100644
index 00000000000..8b589135daa
--- /dev/null
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -0,0 +1,58 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal scenario, too many simple constants for an IN query
+SET pg_stat_statements.query_id_const_merge = on;
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset();
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- No constants simplification
+SELECT pg_stat_statements_reset();
+
+SELECT * FROM test_merge WHERE id IN (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Numeric type
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test constants evaluation, verifies a tricky part to make sure there are no
+-- issues in the merging implementation
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+RESET pg_stat_statements.query_id_const_merge;
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 7e7c5c9ff82..c78140f8858 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -547,11 +547,29 @@
<para>
In some cases, queries with visibly different texts might get merged into a
- single <structname>pg_stat_statements</structname> entry. Normally this will happen
- only for semantically equivalent queries, but there is a small chance of
- hash collisions causing unrelated queries to be merged into one entry.
- (This cannot happen for queries belonging to different users or databases,
- however.)
+ single <structname>pg_stat_statements</structname> entry. Normally this
+ will happen only for semantically equivalent queries, or if
+ <varname>pg_stat_statements.query_id_const_merge</varname> is enabled and
+ the only difference between queries is the length of an array with constants
+ they contain:
+
+<screen>
+=# SET query_id_const_merge = on;
+=# SELECT pg_stat_statements_reset();
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
+=# SELECT query, calls FROM pg_stat_statements;
+-[ RECORD 1 ]------------------------------
+query | SELECT * FROM test WHERE a IN (...)
+calls | 2
+-[ RECORD 2 ]------------------------------
+query | SELECT pg_stat_statements_reset()
+calls | 1
+</screen>
+
+ But there is a small chance of hash collisions causing unrelated queries to
+ be merged into one entry. (This cannot happen for queries belonging to
+ different users or databases, however.)
</para>
<para>
@@ -861,6 +879,35 @@
</para>
</listitem>
</varlistentry>
+
+ <varlistentry>
+ <term>
+ <varname>pg_stat_statements.query_id_const_merge</varname> (<type>bool</type>)
+ <indexterm>
+ <primary><varname>pg_stat_statements.query_id_const_merge</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+
+ <listitem>
+ <para>
+ Specifies how an array of constants (e.g. for an "IN" clause)
+ contributes to the query identifier computation. Normally every element
+ of an array contributes to the query identifier, which means the same
+ query will get multiple different identifiers, one for each occurrence
+ with an array of different lenght.
+
+ If this parameter is on, an array of constants will contribute only the
+ first and the last elements to the query identifier. It means two
+ occurences of the same query, where the only difference is number of
+ constants in the array, are going to get the same query identifier.
+ Such queries are represented in form <literal>'(...)'</literal>.
+
+ The parameter could be used to reduce amount of repeating data stored
+ via <structname>pg_stat_statements</structname>. The default value is off.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
<para>
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 72c79635781..9eb1f2dde77 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -475,6 +475,7 @@ foreach my $infile (@ARGV)
equal_ignore_if_zero
query_jumble_ignore
query_jumble_location
+ query_jumble_merge
read_write_ignore
write_only_relids
write_only_nondefault_pathtarget
@@ -1282,6 +1283,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
my @a = @{ $node_type_info{$n}->{field_attrs}{$f} };
my $query_jumble_ignore = $struct_no_query_jumble;
my $query_jumble_location = 0;
+ my $query_jumble_merge = 0;
# extract per-field attributes
foreach my $a (@a)
@@ -1294,21 +1296,34 @@ _jumble${n}(JumbleState *jstate, Node *node)
{
$query_jumble_location = 1;
}
+ elsif ($a eq 'query_jumble_merge')
+ {
+ $query_jumble_merge = 1;
+ }
}
# node type
if (($t =~ /^(\w+)\*$/ or $t =~ /^struct\s+(\w+)\*$/)
and elem $1, @node_types)
{
- print $jff "\tJUMBLE_NODE($f);\n"
- unless $query_jumble_ignore;
+ # Merge constants if requested.
+ if ($query_jumble_merge)
+ {
+ print $jff "\tJUMBLE_ELEMENTS($f);\n"
+ unless $query_jumble_ignore;
+ }
+ else
+ {
+ print $jff "\tJUMBLE_NODE($f);\n"
+ unless $query_jumble_ignore;
+ }
}
elsif ($t eq 'int' && $f =~ 'location$')
{
# Track the node's location only if directly requested.
if ($query_jumble_location)
{
- print $jff "\tJUMBLE_LOCATION($f);\n"
+ print $jff "\tJUMBLE_LOCATION($f, false);\n"
unless $query_jumble_ignore;
}
}
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index 281907a4d83..4bc16dde6a0 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -42,13 +42,18 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+/* Whether to merge constants in a list when computing query_id */
+bool query_id_const_merge = false;
+
/* True when compute_query_id is ON, or AUTO and a module requests them */
bool query_id_enabled = false;
static void AppendJumble(JumbleState *jstate,
const unsigned char *item, Size size);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void RecordConstLocation(JumbleState *jstate,
+ int location, bool merged);
static void _jumbleNode(JumbleState *jstate, Node *node);
+static void _jumbleElements(JumbleState *jstate, List *elements);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
static void _jumbleList(JumbleState *jstate, Node *node);
static void _jumbleRangeTblEntry(JumbleState *jstate, Node *node);
@@ -148,6 +153,18 @@ EnableQueryId(void)
query_id_enabled = true;
}
+/*
+ * Controls constants merging for query identifier computation.
+ *
+ * Third-party plugins can use this function to enable/disable merging
+ * of constants in a list when query identifier is computed.
+ */
+void
+SetQueryIdConstMerge(bool value)
+{
+ query_id_const_merge = value;
+}
+
/*
* AppendJumble: Append a value that is substantive in a given query to
* the current jumble.
@@ -186,11 +203,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
}
/*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked.
+ *
+ * Merged argument signals that the constant represents the first or the last
+ * element in a series of merged constants, and everything but the first/last
+ * element contributes nothing to the jumble hash.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -206,15 +227,67 @@ RecordConstLocation(JumbleState *jstate, int location)
}
jstate->clocations[jstate->clocations_count].location = location;
/* initialize lengths to -1 to simplify third-party module usage */
+ jstate->clocations[jstate->clocations_count].merged = merged;
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
}
}
+/*
+ * Verify if the provided list contains could be merged down, which means it
+ * contains only constant expressions.
+ *
+ * Return value indicates if merging is possible.
+ *
+ * Note that this function searches only for explicit Const nodes and does not
+ * try to simplify expressions.
+ */
+static bool
+IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
+{
+ ListCell *temp;
+ Node *firstExpr = NULL;
+
+ if (elements == NULL)
+ return false;
+
+ if (!query_id_const_merge)
+ {
+ /* Merging is disabled, process everything one by one */
+ return false;
+ }
+
+ firstExpr = linitial(elements);
+
+ /*
+ * If the first expression is a constant, verify if the following elements
+ * are constants as well. If yes, the list is eligible for merging, and the
+ * order of magnitude need to be calculated.
+ */
+ if (IsA(firstExpr, Const))
+ {
+ foreach(temp, elements)
+ if (!IsA(lfirst(temp), Const))
+ return false;
+
+ *firstConst = (Const *) firstExpr;
+ *lastConst = llast_node(Const, elements);
+ return true;
+ }
+
+ /*
+ * If we end up here, it means no constants merging is possible, process
+ * the list as usual.
+ */
+ return false;
+}
+
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
-#define JUMBLE_LOCATION(location) \
- RecordConstLocation(jstate, expr->location)
+#define JUMBLE_ELEMENTS(list) \
+ _jumbleElements(jstate, (List *) expr->list)
+#define JUMBLE_LOCATION(location, merged) \
+ RecordConstLocation(jstate, expr->location, merged)
#define JUMBLE_FIELD(item) \
AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
#define JUMBLE_FIELD_SINGLE(item) \
@@ -227,6 +300,21 @@ do { \
#include "queryjumblefuncs.funcs.c"
+static void
+_jumbleElements(JumbleState *jstate, List *elements)
+{
+ Const *first, *last;
+ if(IsMergeableConstList(elements, &first, &last))
+ {
+ RecordConstLocation(jstate, first->location, true);
+ RecordConstLocation(jstate, last->location, true);
+ }
+ else
+ {
+ _jumbleNode(jstate, (Node *) elements);
+ }
+}
+
static void
_jumbleNode(JumbleState *jstate, Node *node)
{
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 9cb624eab81..3e5c43ede81 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -528,6 +528,7 @@ typedef struct
bool redirection_done;
bool IsBinaryUpgrade;
bool query_id_enabled;
+ bool query_id_const_merge;
int max_safe_fds;
int MaxBackends;
#ifdef WIN32
@@ -6075,6 +6076,7 @@ save_backend_variables(BackendParameters *param, Port *port,
param->redirection_done = redirection_done;
param->IsBinaryUpgrade = IsBinaryUpgrade;
param->query_id_enabled = query_id_enabled;
+ param->query_id_const_merge = query_id_const_merge;
param->max_safe_fds = max_safe_fds;
param->MaxBackends = MaxBackends;
@@ -6306,6 +6308,7 @@ restore_backend_variables(BackendParameters *param, Port *port)
redirection_done = param->redirection_done;
IsBinaryUpgrade = param->IsBinaryUpgrade;
query_id_enabled = param->query_id_enabled;
+ query_id_const_merge = param->query_id_const_merge;
max_safe_fds = param->max_safe_fds;
MaxBackends = param->MaxBackends;
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index d08d55c3fe4..97a8251bb2d 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -629,7 +629,6 @@
#log_planner_stats = off
#log_executor_stats = off
-
#------------------------------------------------------------------------------
# AUTOVACUUM
#------------------------------------------------------------------------------
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 60d72a876b4..0b50e20fa69 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1304,7 +1304,7 @@ typedef struct ArrayExpr
/* common type of array elements */
Oid element_typeid pg_node_attr(query_jumble_ignore);
/* the array elements or sub-arrays */
- List *elements;
+ List *elements pg_node_attr(query_jumble_merge);
/* true if elements are sub-arrays */
bool multidims pg_node_attr(query_jumble_ignore);
/* token location, or -1 if unknown */
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 7649e095aa5..c64a007ad3f 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -23,6 +23,12 @@ typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+
+ /*
+ * Indicates the constant represents the beginning or the end of a merged
+ * constants interval.
+ */
+ bool merged;
} LocationLen;
/*
@@ -62,12 +68,13 @@ enum ComputeQueryIdType
/* GUC parameters */
extern PGDLLIMPORT int compute_query_id;
-
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query);
extern void EnableQueryId(void);
+extern void SetQueryIdConstMerge(bool value);
extern PGDLLIMPORT bool query_id_enabled;
+extern PGDLLIMPORT bool query_id_const_merge;
/*
* Returns whether query identifier computation has been enabled, either
base-commit: 22655aa23132a0645fdcdce4b233a1fff0c0cf8f
--
2.41.0
v17-0002-Reusable-decimalLength-functions.patchtext/x-diff; charset=us-asciiDownload
From a38b19f74c51ec48cade87e4df2b499a84c32823 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Fri, 17 Feb 2023 10:17:55 +0100
Subject: [PATCH v17 2/4] Reusable decimalLength functions
Move out decimalLength functions to reuse in the following patch.
---
src/backend/utils/adt/numutils.c | 50 +-----------------------
src/include/utils/numutils.h | 67 ++++++++++++++++++++++++++++++++
2 files changed, 68 insertions(+), 49 deletions(-)
create mode 100644 src/include/utils/numutils.h
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index a597e5ed796..02fe132f285 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -18,9 +18,8 @@
#include <limits.h>
#include <ctype.h>
-#include "common/int.h"
#include "utils/builtins.h"
-#include "port/pg_bitutils.h"
+#include "utils/numutils.h"
/*
* A table of all two-digit numbers. This is used to speed up decimal digit
@@ -38,53 +37,6 @@ static const char DIGIT_TABLE[200] =
"80" "81" "82" "83" "84" "85" "86" "87" "88" "89"
"90" "91" "92" "93" "94" "95" "96" "97" "98" "99";
-/*
- * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
- */
-static inline int
-decimalLength32(const uint32 v)
-{
- int t;
- static const uint32 PowersOfTen[] = {
- 1, 10, 100,
- 1000, 10000, 100000,
- 1000000, 10000000, 100000000,
- 1000000000
- };
-
- /*
- * Compute base-10 logarithm by dividing the base-2 logarithm by a
- * good-enough approximation of the base-2 logarithm of 10
- */
- t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
- return t + (v >= PowersOfTen[t]);
-}
-
-static inline int
-decimalLength64(const uint64 v)
-{
- int t;
- static const uint64 PowersOfTen[] = {
- UINT64CONST(1), UINT64CONST(10),
- UINT64CONST(100), UINT64CONST(1000),
- UINT64CONST(10000), UINT64CONST(100000),
- UINT64CONST(1000000), UINT64CONST(10000000),
- UINT64CONST(100000000), UINT64CONST(1000000000),
- UINT64CONST(10000000000), UINT64CONST(100000000000),
- UINT64CONST(1000000000000), UINT64CONST(10000000000000),
- UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
- UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
- UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
- };
-
- /*
- * Compute base-10 logarithm by dividing the base-2 logarithm by a
- * good-enough approximation of the base-2 logarithm of 10
- */
- t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
- return t + (v >= PowersOfTen[t]);
-}
-
static const int8 hexlookup[128] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
diff --git a/src/include/utils/numutils.h b/src/include/utils/numutils.h
new file mode 100644
index 00000000000..876e64f2df9
--- /dev/null
+++ b/src/include/utils/numutils.h
@@ -0,0 +1,67 @@
+/*-------------------------------------------------------------------------
+ *
+ * numutils.h
+ * Decimal length functions for numutils.c
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/utils/numutils.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef NUMUTILS_H
+#define NUMUTILS_H
+
+#include "common/int.h"
+#include "port/pg_bitutils.h"
+
+/*
+ * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
+ */
+static inline int
+decimalLength32(const uint32 v)
+{
+ int t;
+ static const uint32 PowersOfTen[] = {
+ 1, 10, 100,
+ 1000, 10000, 100000,
+ 1000000, 10000000, 100000000,
+ 1000000000
+ };
+
+ /*
+ * Compute base-10 logarithm by dividing the base-2 logarithm by a
+ * good-enough approximation of the base-2 logarithm of 10
+ */
+ t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
+ return t + (v >= PowersOfTen[t]);
+}
+
+static inline int
+decimalLength64(const uint64 v)
+{
+ int t;
+ static const uint64 PowersOfTen[] = {
+ UINT64CONST(1), UINT64CONST(10),
+ UINT64CONST(100), UINT64CONST(1000),
+ UINT64CONST(10000), UINT64CONST(100000),
+ UINT64CONST(1000000), UINT64CONST(10000000),
+ UINT64CONST(100000000), UINT64CONST(1000000000),
+ UINT64CONST(10000000000), UINT64CONST(100000000000),
+ UINT64CONST(1000000000000), UINT64CONST(10000000000000),
+ UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
+ UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
+ UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
+ };
+
+ /*
+ * Compute base-10 logarithm by dividing the base-2 logarithm by a
+ * good-enough approximation of the base-2 logarithm of 10
+ */
+ t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
+ return t + (v >= PowersOfTen[t]);
+}
+
+#endif /* NUMUTILS_H */
--
2.41.0
v17-0003-Merge-constants-in-ArrayExpr-into-groups.patchtext/x-diff; charset=us-asciiDownload
From 1a452c9c474812b3e670564aa7b5386928478d08 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Sun, 15 Oct 2023 10:06:09 +0200
Subject: [PATCH v17 3/4] Merge constants in ArrayExpr into groups
Using query_id_const_merge only first/last element in an ArrayExpr will
be used to compute query id. Extend this to take into account number of
elements, and merge constants into groups based on it. Resulting groups
are powers of 10, i.e. 1 to 9, 10 to 99, etc.
---
.../pg_stat_statements/expected/merging.out | 84 +++++++++++++++----
.../pg_stat_statements/pg_stat_statements.c | 17 +++-
contrib/pg_stat_statements/sql/merging.sql | 13 +++
doc/src/sgml/pgstatstatements.sgml | 11 +--
src/backend/nodes/queryjumblefuncs.c | 52 ++++++++----
src/include/nodes/queryjumble.h | 7 +-
6 files changed, 142 insertions(+), 42 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
index f286c735a36..7400870f3f6 100644
--- a/contrib/pg_stat_statements/expected/merging.out
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -54,11 +54,11 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3);
(0 rows)
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
- query | calls
---------------------------------------------+-------
- SELECT * FROM test_merge WHERE id IN ($1) | 1
- SELECT * FROM test_merge WHERE id IN (...) | 1
- SELECT pg_stat_statements_reset() | 1
+ query | calls
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 1
+ SELECT pg_stat_statements_reset() | 1
(3 rows)
SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
@@ -80,7 +80,60 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
query | calls
------------------------------------------------------------------------+-------
SELECT * FROM test_merge WHERE id IN ($1) | 1
- SELECT * FROM test_merge WHERE id IN (...) | 4
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 2
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(5 rows)
+
+-- Second order of magnitude, brace yourself
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+--------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [100-999 entries]) | 1
+ SELECT pg_stat_statements_reset() | 1
+(2 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 1
+ SELECT pg_stat_statements_reset() | 1
+(2 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 1
SELECT pg_stat_statements_reset() | 1
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
(4 rows)
@@ -108,11 +161,12 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and dat
(0 rows)
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
- query | calls
-----------------------------------------------------------+-------
- SELECT * FROM test_merge WHERE id IN (...) and data = $3 | 3
- SELECT pg_stat_statements_reset() | 1
-(2 rows)
+ query | calls
+--------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) and data = $3 | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) and data = $3 | 2
+ SELECT pg_stat_statements_reset() | 1
+(3 rows)
-- No constants simplification
SELECT pg_stat_statements_reset();
@@ -147,10 +201,10 @@ SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
(0 rows)
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
- query | calls
-----------------------------------------------------+-------
- SELECT * FROM test_merge_numeric WHERE id IN (...) | 1
- SELECT pg_stat_statements_reset() | 1
+ query | calls
+--------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (... [10-99 entries]) | 1
+ SELECT pg_stat_statements_reset() | 1
(2 rows)
-- Test constants evaluation, verifies a tricky part to make sure there are no
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 4d47a746670..a5702c3d749 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2714,6 +2714,8 @@ generate_normalized_query(JumbleState *jstate, const char *query,
last_tok_len = 0; /* Length (in bytes) of that tok */
bool skip = false; /* Signals that certain constants are
merged together and have to be skipped */
+ int magnitude; /* Order of magnitute for number of merged
+ constants */
/*
@@ -2754,7 +2756,8 @@ generate_normalized_query(JumbleState *jstate, const char *query,
Assert(len_to_wrt >= 0);
/* Normal path, non merged constant */
- if (!jstate->clocations[i].merged)
+ magnitude = jstate->clocations[i].magnitude;
+ if (magnitude == 0)
{
memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
n_quer_loc += len_to_wrt;
@@ -2770,12 +2773,22 @@ generate_normalized_query(JumbleState *jstate, const char *query,
/* The firsts merged constant */
else if (!skip)
{
+ static const uint32 powers_of_ten[] = {
+ 1, 10, 100,
+ 1000, 10000, 100000,
+ 1000000, 10000000, 100000000,
+ 1000000000
+ };
+ int lower_merged = powers_of_ten[magnitude - 1];
+ int upper_merged = powers_of_ten[magnitude];
+
memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
n_quer_loc += len_to_wrt;
/* Skip the following until a non merged constant appear */
skip = true;
- n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "... [%d-%d entries]",
+ lower_merged, upper_merged - 1);
}
/* Otherwise the constant is merged away */
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
index 8b589135daa..c515e48d50c 100644
--- a/contrib/pg_stat_statements/sql/merging.sql
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -27,6 +27,19 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+-- Second order of magnitude, brace yourself
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
-- More conditions in the query
SELECT pg_stat_statements_reset();
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index c78140f8858..ff24153c493 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -560,7 +560,7 @@
=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
=# SELECT query, calls FROM pg_stat_statements;
-[ RECORD 1 ]------------------------------
-query | SELECT * FROM test WHERE a IN (...)
+query | SELECT * FROM test WHERE a IN (... [10-99 entries])
calls | 2
-[ RECORD 2 ]------------------------------
query | SELECT pg_stat_statements_reset()
@@ -897,10 +897,11 @@ calls | 1
with an array of different lenght.
If this parameter is on, an array of constants will contribute only the
- first and the last elements to the query identifier. It means two
- occurences of the same query, where the only difference is number of
- constants in the array, are going to get the same query identifier.
- Such queries are represented in form <literal>'(...)'</literal>.
+ first element, the last element and the number of elements to the query
+ identifier. It means two occurences of the same query, where the only
+ difference is number of constants in the array, are going to get the
+ same query identifier if the arrays are of similar length.
+ Such queries are represented in form <literal>'(... [10-99 entries])'</literal>.
The parameter could be used to reduce amount of repeating data stored
via <structname>pg_stat_statements</structname>. The default value is off.
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index 4bc16dde6a0..a1d4567ca66 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -37,6 +37,8 @@
#include "nodes/queryjumble.h"
#include "parser/scansup.h"
+#include "utils/numutils.h"
+
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
/* GUC parameters */
@@ -51,7 +53,7 @@ bool query_id_enabled = false;
static void AppendJumble(JumbleState *jstate,
const unsigned char *item, Size size);
static void RecordConstLocation(JumbleState *jstate,
- int location, bool merged);
+ int location, int magnitude);
static void _jumbleNode(JumbleState *jstate, Node *node);
static void _jumbleElements(JumbleState *jstate, List *elements);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
@@ -206,12 +208,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
* Record location of constant within query string of query tree that is
* currently being walked.
*
- * Merged argument signals that the constant represents the first or the last
- * element in a series of merged constants, and everything but the first/last
- * element contributes nothing to the jumble hash.
+ * Magnitude argument larger than zero signals that the constant represents the
+ * first or the last element in a series of merged constants, and everything
+ * but such first/last element will contribute nothing to the jumble hash. The
+ * magnitute value specifies order of magnitute (i.e. how many digits it has)
+ * for the number of elements in the series, to represent the fact of merging
+ * later on.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location, bool merged)
+RecordConstLocation(JumbleState *jstate, int location, int magnitude)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -227,7 +232,7 @@ RecordConstLocation(JumbleState *jstate, int location, bool merged)
}
jstate->clocations[jstate->clocations_count].location = location;
/* initialize lengths to -1 to simplify third-party module usage */
- jstate->clocations[jstate->clocations_count].merged = merged;
+ jstate->clocations[jstate->clocations_count].magnitude = magnitude;
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
}
@@ -237,24 +242,26 @@ RecordConstLocation(JumbleState *jstate, int location, bool merged)
* Verify if the provided list contains could be merged down, which means it
* contains only constant expressions.
*
- * Return value indicates if merging is possible.
+ * Return value is the order of magnitude (i.e. how many digits it has) for
+ * length of the list (to use for representation purposes later on) if merging
+ * is possible, otherwise zero.
*
* Note that this function searches only for explicit Const nodes and does not
* try to simplify expressions.
*/
-static bool
+static int
IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
{
ListCell *temp;
Node *firstExpr = NULL;
if (elements == NULL)
- return false;
+ return 0;
if (!query_id_const_merge)
{
/* Merging is disabled, process everything one by one */
- return false;
+ return 0;
}
firstExpr = linitial(elements);
@@ -268,26 +275,26 @@ IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
{
foreach(temp, elements)
if (!IsA(lfirst(temp), Const))
- return false;
+ return 0;
*firstConst = (Const *) firstExpr;
*lastConst = llast_node(Const, elements);
- return true;
+ return decimalLength32(elements->length);
}
/*
* If we end up here, it means no constants merging is possible, process
* the list as usual.
*/
- return false;
+ return 0;
}
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
#define JUMBLE_ELEMENTS(list) \
_jumbleElements(jstate, (List *) expr->list)
-#define JUMBLE_LOCATION(location, merged) \
- RecordConstLocation(jstate, expr->location, merged)
+#define JUMBLE_LOCATION(location, magnitude) \
+ RecordConstLocation(jstate, expr->location, magnitude)
#define JUMBLE_FIELD(item) \
AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
#define JUMBLE_FIELD_SINGLE(item) \
@@ -304,10 +311,19 @@ static void
_jumbleElements(JumbleState *jstate, List *elements)
{
Const *first, *last;
- if(IsMergeableConstList(elements, &first, &last))
+ int magnitude = IsMergeableConstList(elements, &first, &last);
+
+ if (magnitude)
{
- RecordConstLocation(jstate, first->location, true);
- RecordConstLocation(jstate, last->location, true);
+ RecordConstLocation(jstate, first->location, magnitude);
+ RecordConstLocation(jstate, last->location, magnitude);
+
+ /*
+ * After merging constants down we end up with only two constants, the
+ * first and the last one. To distinguish the order of magnitute behind
+ * merged constants, add its value into the jumble.
+ */
+ JUMBLE_FIELD_SINGLE(magnitude);
}
else
{
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index c64a007ad3f..8ee2e9afbb6 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -26,9 +26,12 @@ typedef struct LocationLen
/*
* Indicates the constant represents the beginning or the end of a merged
- * constants interval.
+ * constants interval. The value shows how many constants were merged away
+ * (up to a power of 10), or in other words the order of manitude for
+ * number of merged constants (i.e. how many digits it has). Otherwise the
+ * value is 0, indicating that no merging was performed.
*/
- bool merged;
+ int magnitude;
} LocationLen;
/*
--
2.41.0
v17-0004-Introduce-query_id_const_merge_threshold.patchtext/x-diff; charset=us-asciiDownload
From 5f1f55547b2bc0380f9e6ee5d729f9eb4801bbb6 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Sat, 13 Jan 2024 14:52:39 +0100
Subject: [PATCH v17 4/4] Introduce query_id_const_merge_threshold
Replace query_id_const_merge with a threshold to allow merging only if
the number of elements is larger than specified value, which could be
configured using pg_stat_statements parameter query_id_const_merge_threshold.
---
.../pg_stat_statements/expected/merging.out | 68 ++++++++++++++++++-
.../pg_stat_statements/pg_stat_statements.c | 36 +++++-----
contrib/pg_stat_statements/sql/merging.sql | 21 +++++-
doc/src/sgml/pgstatstatements.sgml | 23 ++++---
src/backend/nodes/queryjumblefuncs.c | 23 +++++--
src/backend/postmaster/postmaster.c | 6 +-
src/include/nodes/queryjumble.h | 4 +-
7 files changed, 137 insertions(+), 44 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
index 7400870f3f6..93d59149bf0 100644
--- a/contrib/pg_stat_statements/expected/merging.out
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -36,7 +36,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
(4 rows)
-- Normal scenario, too many simple constants for an IN query
-SET pg_stat_statements.query_id_const_merge = on;
+SET pg_stat_statements.query_id_const_merge_threshold = 1;
SELECT pg_stat_statements_reset();
pg_stat_statements_reset
--------------------------
@@ -218,4 +218,68 @@ FROM cte;
--------
(0 rows)
-RESET pg_stat_statements.query_id_const_merge;
+-- With the threshold
+SET pg_stat_statements.query_id_const_merge_threshold = 10;
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 2
+ SELECT pg_stat_statements_reset() | 1
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+(2 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+RESET pg_stat_statements.query_id_const_merge_threshold;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index a5702c3d749..e02171c6767 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -266,8 +266,8 @@ static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
static ProcessUtility_hook_type prev_ProcessUtility = NULL;
-/* An assign hook to keep query_id_const_merge in sync */
-static void pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra);
+/* An assign hook to keep query_id_const_merge_threshold in sync */
+static void pgss_query_id_const_merge_assign_hook(int newvalue, void *extra);
/* Links to shared memory state */
static pgssSharedState *pgss = NULL;
@@ -296,8 +296,8 @@ static bool pgss_track_utility = true; /* whether to track utility commands */
static bool pgss_track_planning = false; /* whether to track planning
* duration */
static bool pgss_save = true; /* whether to save stats across shutdown */
-static bool pgss_query_id_const_merge = false; /* request constants merging
- * when computing query_id */
+static int pgss_query_id_const_merge_threshold = 0; /* request constants merging
+ * when computing query_id */
#define pgss_enabled(level) \
(!IsParallelWorker() && \
@@ -459,20 +459,22 @@ _PG_init(void)
NULL,
NULL);
- DefineCustomBoolVariable("pg_stat_statements.query_id_const_merge",
- "Whether to merge constants in a list when computing query_id.",
- NULL,
- &pgss_query_id_const_merge,
- false,
- PGC_SUSET,
- 0,
- NULL,
- pgss_query_id_const_merge_assign_hook,
- NULL);
+ DefineCustomIntVariable("pg_stat_statements.query_id_const_merge_threshold",
+ "Whether to merge constants in a list when computing query_id.",
+ NULL,
+ &pgss_query_id_const_merge_threshold,
+ 0,
+ 0,
+ INT_MAX,
+ PGC_SUSET,
+ 0,
+ NULL,
+ pgss_query_id_const_merge_assign_hook,
+ NULL);
MarkGUCPrefixReserved("pg_stat_statements");
- SetQueryIdConstMerge(pgss_query_id_const_merge);
+ SetQueryIdConstMerge(pgss_query_id_const_merge_threshold);
/*
* Install hooks.
@@ -2956,10 +2958,10 @@ comp_location(const void *a, const void *b)
}
/*
- * Notify query jumbling about query_id_const_merge status
+ * Notify query jumbling about query_id_const_merge_threshold status
*/
static void
-pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra)
+pgss_query_id_const_merge_assign_hook(int newvalue, void *extra)
{
SetQueryIdConstMerge(newvalue);
}
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
index c515e48d50c..52ee4fcb216 100644
--- a/contrib/pg_stat_statements/sql/merging.sql
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -15,7 +15,7 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-- Normal scenario, too many simple constants for an IN query
-SET pg_stat_statements.query_id_const_merge = on;
+SET pg_stat_statements.query_id_const_merge_threshold = 1;
SELECT pg_stat_statements_reset();
SELECT * FROM test_merge WHERE id IN (1);
@@ -68,4 +68,21 @@ WITH cte AS (
SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
FROM cte;
-RESET pg_stat_statements.query_id_const_merge;
+-- With the threshold
+SET pg_stat_statements.query_id_const_merge_threshold = 10;
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+RESET pg_stat_statements.query_id_const_merge_threshold;
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index ff24153c493..5545accc560 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -549,12 +549,12 @@
In some cases, queries with visibly different texts might get merged into a
single <structname>pg_stat_statements</structname> entry. Normally this
will happen only for semantically equivalent queries, or if
- <varname>pg_stat_statements.query_id_const_merge</varname> is enabled and
- the only difference between queries is the length of an array with constants
- they contain:
+ <varname>pg_stat_statements.query_id_const_merge_threshold</varname> is
+ enabled and the only difference between queries is the length of an array
+ with constants they contain:
<screen>
-=# SET query_id_const_merge = on;
+=# SET query_id_const_merge_threshold = 1;
=# SELECT pg_stat_statements_reset();
=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
@@ -882,9 +882,9 @@ calls | 1
<varlistentry>
<term>
- <varname>pg_stat_statements.query_id_const_merge</varname> (<type>bool</type>)
+ <varname>pg_stat_statements.query_id_const_merge_threshold</varname> (<type>integer</type>)
<indexterm>
- <primary><varname>pg_stat_statements.query_id_const_merge</varname> configuration parameter</primary>
+ <primary><varname>pg_stat_statements.query_id_const_merge_threshold</varname> configuration parameter</primary>
</indexterm>
</term>
@@ -896,11 +896,12 @@ calls | 1
query will get multiple different identifiers, one for each occurrence
with an array of different lenght.
- If this parameter is on, an array of constants will contribute only the
- first element, the last element and the number of elements to the query
- identifier. It means two occurences of the same query, where the only
- difference is number of constants in the array, are going to get the
- same query identifier if the arrays are of similar length.
+ If this parameter is greater than 0, an array with more than
+ <varname>pg_stat_statements.query_id_const_merge_threshold</varname>
+ constants will contribute only the first element, the last element
+ and the number of elements to the query identifier. It means two
+ occurences of the same query, where the only difference is number of
+ constants in the array, are going to get the same query identifier.
Such queries are represented in form <literal>'(... [10-99 entries])'</literal>.
The parameter could be used to reduce amount of repeating data stored
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index a1d4567ca66..10be62f1331 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -44,8 +44,8 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
-/* Whether to merge constants in a list when computing query_id */
-bool query_id_const_merge = false;
+/* Lower threshold for the list length to merge constants when computing query_id */
+int query_id_const_merge_threshold = 1;
/* True when compute_query_id is ON, or AUTO and a module requests them */
bool query_id_enabled = false;
@@ -159,12 +159,14 @@ EnableQueryId(void)
* Controls constants merging for query identifier computation.
*
* Third-party plugins can use this function to enable/disable merging
- * of constants in a list when query identifier is computed.
+ * of constants in a list when query identifier is computed. The argument
+ * specifies the lower threshold for an array length, above which merging will
+ * be applied.
*/
void
-SetQueryIdConstMerge(bool value)
+SetQueryIdConstMerge(int threshold)
{
- query_id_const_merge = value;
+ query_id_const_merge_threshold = threshold;
}
/*
@@ -240,7 +242,8 @@ RecordConstLocation(JumbleState *jstate, int location, int magnitude)
/*
* Verify if the provided list contains could be merged down, which means it
- * contains only constant expressions.
+ * contains only constant expressions and the list contains more than
+ * query_id_const_merge_threshold elements.
*
* Return value is the order of magnitude (i.e. how many digits it has) for
* length of the list (to use for representation purposes later on) if merging
@@ -258,12 +261,18 @@ IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
if (elements == NULL)
return 0;
- if (!query_id_const_merge)
+ if (query_id_const_merge_threshold < 1)
{
/* Merging is disabled, process everything one by one */
return 0;
}
+ if (elements->length < query_id_const_merge_threshold)
+ {
+ /* The list is not large enough */
+ return 0;
+ }
+
firstExpr = linitial(elements);
/*
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 3e5c43ede81..3094d54bab8 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -528,7 +528,7 @@ typedef struct
bool redirection_done;
bool IsBinaryUpgrade;
bool query_id_enabled;
- bool query_id_const_merge;
+ int query_id_const_merge_threshold;
int max_safe_fds;
int MaxBackends;
#ifdef WIN32
@@ -6076,7 +6076,7 @@ save_backend_variables(BackendParameters *param, Port *port,
param->redirection_done = redirection_done;
param->IsBinaryUpgrade = IsBinaryUpgrade;
param->query_id_enabled = query_id_enabled;
- param->query_id_const_merge = query_id_const_merge;
+ param->query_id_const_merge_threshold = query_id_const_merge_threshold;
param->max_safe_fds = max_safe_fds;
param->MaxBackends = MaxBackends;
@@ -6308,7 +6308,7 @@ restore_backend_variables(BackendParameters *param, Port *port)
redirection_done = param->redirection_done;
IsBinaryUpgrade = param->IsBinaryUpgrade;
query_id_enabled = param->query_id_enabled;
- query_id_const_merge = param->query_id_const_merge;
+ query_id_const_merge_threshold = param->query_id_const_merge_threshold;
max_safe_fds = param->max_safe_fds;
MaxBackends = param->MaxBackends;
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 8ee2e9afbb6..a9f8cfcbed9 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -74,10 +74,10 @@ extern PGDLLIMPORT int compute_query_id;
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query);
extern void EnableQueryId(void);
-extern void SetQueryIdConstMerge(bool value);
+extern void SetQueryIdConstMerge(int threshold);
extern PGDLLIMPORT bool query_id_enabled;
-extern PGDLLIMPORT bool query_id_const_merge;
+extern PGDLLIMPORT int query_id_const_merge_threshold;
/*
* Returns whether query identifier computation has been enabled, either
--
2.41.0
2024-01 Commitfest.
Hi, This patch has a CF status of "Needs Review" [1]https://commitfest.postgresql.org/46/2837/, but it seems
there was a CFbot test failure last time it was run [2]https://cirrus-ci.com/task/6688578378399744. Please have a
look and post an updated version if necessary.
======
[1]: https://commitfest.postgresql.org/46/2837/
[2]: https://cirrus-ci.com/task/6688578378399744
Kind Regards,
Peter Smith.
On Mon, Jan 22, 2024 at 05:33:26PM +1100, Peter Smith wrote:
2024-01 Commitfest.Hi, This patch has a CF status of "Needs Review" [1], but it seems
there was a CFbot test failure last time it was run [2]. Please have a
look and post an updated version if necessary.======
[1] https://commitfest.postgresql.org/46/2837/
[2] https://cirrus-ci.com/task/6688578378399744
It's the same failing pipeline Vignesh C was talking above. I've fixed
the issue in the latest patch version, but looks like it wasn't picked
up yet (from what I understand, the latest build for this CF is 8 weeks
old).
Dmitry Dolgov <9erthalion6@gmail.com> writes:
On Mon, Jan 22, 2024 at 05:33:26PM +1100, Peter Smith wrote:
Hi, This patch has a CF status of "Needs Review" [1], but it seems
there was a CFbot test failure last time it was run [2]. Please have a
look and post an updated version if necessary.======
[1] https://commitfest.postgresql.org/46/2837/
[2] https://cirrus-ci.com/task/6688578378399744
It's the same failing pipeline Vignesh C was talking above. I've fixed
the issue in the latest patch version, but looks like it wasn't picked
up yet (from what I understand, the latest build for this CF is 8 weeks
old).
Please notice that at the moment, it's not being tested at all because
of a patch-apply failure -- that's what the little triangular symbol
means. The rest of the display concerns the test results from the
last successfully-applied patch version. (Perhaps that isn't a
particularly great UI design.)
If you click on the triangle you find out
== Applying patches on top of PostgreSQL commit ID b0f0a9432d0b6f53634a96715f2666f6d4ea25a1 ===
=== applying patch ./v17-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patch
patching file contrib/pg_stat_statements/Makefile
Hunk #1 FAILED at 19.
1 out of 1 hunk FAILED -- saving rejects to file contrib/pg_stat_statements/Makefile.rej
patching file contrib/pg_stat_statements/expected/merging.out
patching file contrib/pg_stat_statements/meson.build
...
regards, tom lane
On Mon, Jan 22, 2024 at 11:35:22AM -0500, Tom Lane wrote:
Dmitry Dolgov <9erthalion6@gmail.com> writes:On Mon, Jan 22, 2024 at 05:33:26PM +1100, Peter Smith wrote:
Hi, This patch has a CF status of "Needs Review" [1], but it seems
there was a CFbot test failure last time it was run [2]. Please have a
look and post an updated version if necessary.======
[1] https://commitfest.postgresql.org/46/2837/
[2] https://cirrus-ci.com/task/6688578378399744It's the same failing pipeline Vignesh C was talking above. I've fixed
the issue in the latest patch version, but looks like it wasn't picked
up yet (from what I understand, the latest build for this CF is 8 weeks
old).Please notice that at the moment, it's not being tested at all because
of a patch-apply failure -- that's what the little triangular symbol
means. The rest of the display concerns the test results from the
last successfully-applied patch version. (Perhaps that isn't a
particularly great UI design.)If you click on the triangle you find out
== Applying patches on top of PostgreSQL commit ID b0f0a9432d0b6f53634a96715f2666f6d4ea25a1 ===
=== applying patch ./v17-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patch
patching file contrib/pg_stat_statements/Makefile
Hunk #1 FAILED at 19.
1 out of 1 hunk FAILED -- saving rejects to file contrib/pg_stat_statements/Makefile.rej
patching file contrib/pg_stat_statements/expected/merging.out
patching file contrib/pg_stat_statements/meson.build
Oh, I see, thanks. Give me a moment, will fix this.
On Mon, Jan 22, 2024 at 06:07:27PM +0100, Dmitry Dolgov wrote:
Please notice that at the moment, it's not being tested at all because
of a patch-apply failure -- that's what the little triangular symbol
means. The rest of the display concerns the test results from the
last successfully-applied patch version. (Perhaps that isn't a
particularly great UI design.)If you click on the triangle you find out
== Applying patches on top of PostgreSQL commit ID b0f0a9432d0b6f53634a96715f2666f6d4ea25a1 ===
=== applying patch ./v17-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patch
patching file contrib/pg_stat_statements/Makefile
Hunk #1 FAILED at 19.
1 out of 1 hunk FAILED -- saving rejects to file contrib/pg_stat_statements/Makefile.rej
patching file contrib/pg_stat_statements/expected/merging.out
patching file contrib/pg_stat_statements/meson.buildOh, I see, thanks. Give me a moment, will fix this.
Here is it.
Attachments:
v18-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/x-diff; charset=us-asciiDownload
From ac8a7c93fbb72c469ca7128280f52024adb860ab Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Mon, 22 Jan 2024 21:11:15 +0100
Subject: [PATCH v18 1/4] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on the number of parameters, because every element of
ArrayExpr is jumbled. In certain situations it's undesirable, especially
if the list becomes too large.
Make an array of Const expressions contribute only the first/last
elements to the jumble hash. Allow to enable this behavior via the new
pg_stat_statements parameter query_id_const_merge with the default value off.
Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane,
Michael Paquier, Sergei Kornilov, Alvaro Herrera, David Geier
Tested-by: Chengxi Sun
---
contrib/pg_stat_statements/Makefile | 2 +-
.../pg_stat_statements/expected/merging.out | 167 ++++++++++++++++++
contrib/pg_stat_statements/meson.build | 1 +
.../pg_stat_statements/pg_stat_statements.c | 62 ++++++-
contrib/pg_stat_statements/sql/merging.sql | 58 ++++++
doc/src/sgml/pgstatstatements.sgml | 57 +++++-
src/backend/nodes/gen_node_support.pl | 21 ++-
src/backend/nodes/queryjumblefuncs.c | 100 ++++++++++-
src/backend/postmaster/postmaster.c | 3 +
src/backend/utils/misc/postgresql.conf.sample | 1 -
src/include/nodes/primnodes.h | 2 +-
src/include/nodes/queryjumble.h | 9 +-
12 files changed, 458 insertions(+), 25 deletions(-)
create mode 100644 contrib/pg_stat_statements/expected/merging.out
create mode 100644 contrib/pg_stat_statements/sql/merging.sql
diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile
index 414a30856e4..03a62d685f3 100644
--- a/contrib/pg_stat_statements/Makefile
+++ b/contrib/pg_stat_statements/Makefile
@@ -19,7 +19,7 @@ LDFLAGS_SL += $(filter -lm, $(LIBS))
REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_statements/pg_stat_statements.conf
REGRESS = select dml cursors utility level_tracking planning \
- user_activity wal entry_timestamp cleanup oldextversions
+ user_activity wal entry_timestamp cleanup oldextversions merging
# Disabled because these tests require "shared_preload_libraries=pg_stat_statements",
# which typical installcheck users do not have (e.g. buildfarm clients).
NO_INSTALLCHECK = 1
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
new file mode 100644
index 00000000000..1e58283afe8
--- /dev/null
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -0,0 +1,167 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(4 rows)
+
+-- Normal scenario, too many simple constants for an IN query
+SET pg_stat_statements.query_id_const_merge = on;
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 4
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) and data = $3 | 3
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- No constants simplification
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 + $2, $3 + $4, $5 + $6, $7 + $8, $9 + $10, $11 + $12, $13 + $14, $15 + $16, $17 + $18) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Numeric type
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Test constants evaluation, verifies a tricky part to make sure there are no
+-- issues in the merging implementation
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result
+--------
+(0 rows)
+
+RESET pg_stat_statements.query_id_const_merge;
diff --git a/contrib/pg_stat_statements/meson.build b/contrib/pg_stat_statements/meson.build
index 9bfc9657e1a..6bd62f92b9a 100644
--- a/contrib/pg_stat_statements/meson.build
+++ b/contrib/pg_stat_statements/meson.build
@@ -52,6 +52,7 @@ tests += {
'entry_timestamp',
'cleanup',
'oldextversions',
+ 'merging',
],
'regress_args': ['--temp-config', files('pg_stat_statements.conf')],
# Disabled because these tests require
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 8c6a3a2d087..09912f61b7d 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -264,6 +264,9 @@ static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
static ProcessUtility_hook_type prev_ProcessUtility = NULL;
+/* An assign hook to keep query_id_const_merge in sync */
+static void pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra);
+
/* Links to shared memory state */
static pgssSharedState *pgss = NULL;
static HTAB *pgss_hash = NULL;
@@ -291,7 +294,8 @@ static bool pgss_track_utility = true; /* whether to track utility commands */
static bool pgss_track_planning = false; /* whether to track planning
* duration */
static bool pgss_save = true; /* whether to save stats across shutdown */
-
+static bool pgss_query_id_const_merge = false; /* request constants merging
+ * when computing query_id */
#define pgss_enabled(level) \
(!IsParallelWorker() && \
@@ -454,8 +458,21 @@ _PG_init(void)
NULL,
NULL);
+ DefineCustomBoolVariable("pg_stat_statements.query_id_const_merge",
+ "Whether to merge constants in a list when computing query_id.",
+ NULL,
+ &pgss_query_id_const_merge,
+ false,
+ PGC_SUSET,
+ 0,
+ NULL,
+ pgss_query_id_const_merge_assign_hook,
+ NULL);
+
MarkGUCPrefixReserved("pg_stat_statements");
+ SetQueryIdConstMerge(pgss_query_id_const_merge);
+
/*
* Install hooks.
*/
@@ -2807,6 +2824,9 @@ generate_normalized_query(JumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool skip = false; /* Signals that certain constants are
+ merged together and have to be skipped */
+
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -2830,7 +2850,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
{
int off, /* Offset from start for cur tok */
tok_len; /* Length (in bytes) of that tok */
-
off = jstate->clocations[i].location;
/* Adjust recorded location if we're dealing with partial string */
off -= query_loc;
@@ -2845,12 +2864,32 @@ generate_normalized_query(JumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ /* Normal path, non merged constant */
+ if (!jstate->clocations[i].merged)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id);
+
+ /* In case previous constants were merged away, stop doing that */
+ if (skip)
+ skip = false;
+ }
+ /* The firsts merged constant */
+ else if (!skip)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* Skip the following until a non merged constant appear */
+ skip = true;
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ }
+ /* Otherwise the constant is merged away */
quer_loc = off + tok_len;
last_off = off;
@@ -3014,3 +3053,12 @@ comp_location(const void *a, const void *b)
else
return 0;
}
+
+/*
+ * Notify query jumbling about query_id_const_merge status
+ */
+static void
+pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra)
+{
+ SetQueryIdConstMerge(newvalue);
+}
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
new file mode 100644
index 00000000000..71985bb1cd9
--- /dev/null
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -0,0 +1,58 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal scenario, too many simple constants for an IN query
+SET pg_stat_statements.query_id_const_merge = on;
+
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- No constants simplification
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+SELECT * FROM test_merge WHERE id IN (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Numeric type
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test constants evaluation, verifies a tricky part to make sure there are no
+-- issues in the merging implementation
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+RESET pg_stat_statements.query_id_const_merge;
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 44dd4db7ce7..2b4c142f647 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -603,11 +603,29 @@
<para>
In some cases, queries with visibly different texts might get merged into a
- single <structname>pg_stat_statements</structname> entry. Normally this will happen
- only for semantically equivalent queries, but there is a small chance of
- hash collisions causing unrelated queries to be merged into one entry.
- (This cannot happen for queries belonging to different users or databases,
- however.)
+ single <structname>pg_stat_statements</structname> entry. Normally this
+ will happen only for semantically equivalent queries, or if
+ <varname>pg_stat_statements.query_id_const_merge</varname> is enabled and
+ the only difference between queries is the length of an array with constants
+ they contain:
+
+<screen>
+=# SET query_id_const_merge = on;
+=# SELECT pg_stat_statements_reset();
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
+=# SELECT query, calls FROM pg_stat_statements;
+-[ RECORD 1 ]------------------------------
+query | SELECT * FROM test WHERE a IN (...)
+calls | 2
+-[ RECORD 2 ]------------------------------
+query | SELECT pg_stat_statements_reset()
+calls | 1
+</screen>
+
+ But there is a small chance of hash collisions causing unrelated queries to
+ be merged into one entry. (This cannot happen for queries belonging to
+ different users or databases, however.)
</para>
<para>
@@ -932,6 +950,35 @@
</para>
</listitem>
</varlistentry>
+
+ <varlistentry>
+ <term>
+ <varname>pg_stat_statements.query_id_const_merge</varname> (<type>bool</type>)
+ <indexterm>
+ <primary><varname>pg_stat_statements.query_id_const_merge</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+
+ <listitem>
+ <para>
+ Specifies how an array of constants (e.g. for an "IN" clause)
+ contributes to the query identifier computation. Normally every element
+ of an array contributes to the query identifier, which means the same
+ query will get multiple different identifiers, one for each occurrence
+ with an array of different lenght.
+
+ If this parameter is on, an array of constants will contribute only the
+ first and the last elements to the query identifier. It means two
+ occurences of the same query, where the only difference is number of
+ constants in the array, are going to get the same query identifier.
+ Such queries are represented in form <literal>'(...)'</literal>.
+
+ The parameter could be used to reduce amount of repeating data stored
+ via <structname>pg_stat_statements</structname>. The default value is off.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
<para>
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 2f0a59bc874..8e79bb3001a 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -475,6 +475,7 @@ foreach my $infile (@ARGV)
equal_ignore_if_zero
query_jumble_ignore
query_jumble_location
+ query_jumble_merge
read_write_ignore
write_only_relids
write_only_nondefault_pathtarget
@@ -1282,6 +1283,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
my @a = @{ $node_type_info{$n}->{field_attrs}{$f} };
my $query_jumble_ignore = $struct_no_query_jumble;
my $query_jumble_location = 0;
+ my $query_jumble_merge = 0;
# extract per-field attributes
foreach my $a (@a)
@@ -1294,21 +1296,34 @@ _jumble${n}(JumbleState *jstate, Node *node)
{
$query_jumble_location = 1;
}
+ elsif ($a eq 'query_jumble_merge')
+ {
+ $query_jumble_merge = 1;
+ }
}
# node type
if (($t =~ /^(\w+)\*$/ or $t =~ /^struct\s+(\w+)\*$/)
and elem $1, @node_types)
{
- print $jff "\tJUMBLE_NODE($f);\n"
- unless $query_jumble_ignore;
+ # Merge constants if requested.
+ if ($query_jumble_merge)
+ {
+ print $jff "\tJUMBLE_ELEMENTS($f);\n"
+ unless $query_jumble_ignore;
+ }
+ else
+ {
+ print $jff "\tJUMBLE_NODE($f);\n"
+ unless $query_jumble_ignore;
+ }
}
elsif ($t eq 'int' && $f =~ 'location$')
{
# Track the node's location only if directly requested.
if ($query_jumble_location)
{
- print $jff "\tJUMBLE_LOCATION($f);\n"
+ print $jff "\tJUMBLE_LOCATION($f, false);\n"
unless $query_jumble_ignore;
}
}
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index e489bfceb56..45717553b5b 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -42,13 +42,18 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+/* Whether to merge constants in a list when computing query_id */
+bool query_id_const_merge = false;
+
/* True when compute_query_id is ON, or AUTO and a module requests them */
bool query_id_enabled = false;
static void AppendJumble(JumbleState *jstate,
const unsigned char *item, Size size);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void RecordConstLocation(JumbleState *jstate,
+ int location, bool merged);
static void _jumbleNode(JumbleState *jstate, Node *node);
+static void _jumbleElements(JumbleState *jstate, List *elements);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
static void _jumbleList(JumbleState *jstate, Node *node);
static void _jumbleRangeTblEntry(JumbleState *jstate, Node *node);
@@ -148,6 +153,18 @@ EnableQueryId(void)
query_id_enabled = true;
}
+/*
+ * Controls constants merging for query identifier computation.
+ *
+ * Third-party plugins can use this function to enable/disable merging
+ * of constants in a list when query identifier is computed.
+ */
+void
+SetQueryIdConstMerge(bool value)
+{
+ query_id_const_merge = value;
+}
+
/*
* AppendJumble: Append a value that is substantive in a given query to
* the current jumble.
@@ -186,11 +203,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
}
/*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked.
+ *
+ * Merged argument signals that the constant represents the first or the last
+ * element in a series of merged constants, and everything but the first/last
+ * element contributes nothing to the jumble hash.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -206,15 +227,67 @@ RecordConstLocation(JumbleState *jstate, int location)
}
jstate->clocations[jstate->clocations_count].location = location;
/* initialize lengths to -1 to simplify third-party module usage */
+ jstate->clocations[jstate->clocations_count].merged = merged;
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
}
}
+/*
+ * Verify if the provided list contains could be merged down, which means it
+ * contains only constant expressions.
+ *
+ * Return value indicates if merging is possible.
+ *
+ * Note that this function searches only for explicit Const nodes and does not
+ * try to simplify expressions.
+ */
+static bool
+IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
+{
+ ListCell *temp;
+ Node *firstExpr = NULL;
+
+ if (elements == NULL)
+ return false;
+
+ if (!query_id_const_merge)
+ {
+ /* Merging is disabled, process everything one by one */
+ return false;
+ }
+
+ firstExpr = linitial(elements);
+
+ /*
+ * If the first expression is a constant, verify if the following elements
+ * are constants as well. If yes, the list is eligible for merging, and the
+ * order of magnitude need to be calculated.
+ */
+ if (IsA(firstExpr, Const))
+ {
+ foreach(temp, elements)
+ if (!IsA(lfirst(temp), Const))
+ return false;
+
+ *firstConst = (Const *) firstExpr;
+ *lastConst = llast_node(Const, elements);
+ return true;
+ }
+
+ /*
+ * If we end up here, it means no constants merging is possible, process
+ * the list as usual.
+ */
+ return false;
+}
+
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
-#define JUMBLE_LOCATION(location) \
- RecordConstLocation(jstate, expr->location)
+#define JUMBLE_ELEMENTS(list) \
+ _jumbleElements(jstate, (List *) expr->list)
+#define JUMBLE_LOCATION(location, merged) \
+ RecordConstLocation(jstate, expr->location, merged)
#define JUMBLE_FIELD(item) \
AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
#define JUMBLE_FIELD_SINGLE(item) \
@@ -227,6 +300,21 @@ do { \
#include "queryjumblefuncs.funcs.c"
+static void
+_jumbleElements(JumbleState *jstate, List *elements)
+{
+ Const *first, *last;
+ if(IsMergeableConstList(elements, &first, &last))
+ {
+ RecordConstLocation(jstate, first->location, true);
+ RecordConstLocation(jstate, last->location, true);
+ }
+ else
+ {
+ _jumbleNode(jstate, (Node *) elements);
+ }
+}
+
static void
_jumbleNode(JumbleState *jstate, Node *node)
{
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index feb471dd1df..367ea016fb3 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -533,6 +533,7 @@ typedef struct
bool redirection_done;
bool IsBinaryUpgrade;
bool query_id_enabled;
+ bool query_id_const_merge;
int max_safe_fds;
int MaxBackends;
#ifdef WIN32
@@ -6112,6 +6113,7 @@ save_backend_variables(BackendParameters *param, Port *port, BackgroundWorker *w
param->redirection_done = redirection_done;
param->IsBinaryUpgrade = IsBinaryUpgrade;
param->query_id_enabled = query_id_enabled;
+ param->query_id_const_merge = query_id_const_merge;
param->max_safe_fds = max_safe_fds;
param->MaxBackends = MaxBackends;
@@ -6357,6 +6359,7 @@ restore_backend_variables(BackendParameters *param, Port **port, BackgroundWorke
redirection_done = param->redirection_done;
IsBinaryUpgrade = param->IsBinaryUpgrade;
query_id_enabled = param->query_id_enabled;
+ query_id_const_merge = param->query_id_const_merge;
max_safe_fds = param->max_safe_fds;
MaxBackends = param->MaxBackends;
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index da10b43dac3..450e2144901 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -638,7 +638,6 @@
#log_planner_stats = off
#log_executor_stats = off
-
#------------------------------------------------------------------------------
# AUTOVACUUM
#------------------------------------------------------------------------------
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 4a154606d2b..fcbb8550189 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1312,7 +1312,7 @@ typedef struct ArrayExpr
/* common type of array elements */
Oid element_typeid pg_node_attr(query_jumble_ignore);
/* the array elements or sub-arrays */
- List *elements;
+ List *elements pg_node_attr(query_jumble_merge);
/* true if elements are sub-arrays */
bool multidims pg_node_attr(query_jumble_ignore);
/* token location, or -1 if unknown */
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index f1c55c8067f..8daf0725d7f 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -23,6 +23,12 @@ typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+
+ /*
+ * Indicates the constant represents the beginning or the end of a merged
+ * constants interval.
+ */
+ bool merged;
} LocationLen;
/*
@@ -62,12 +68,13 @@ enum ComputeQueryIdType
/* GUC parameters */
extern PGDLLIMPORT int compute_query_id;
-
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query);
extern void EnableQueryId(void);
+extern void SetQueryIdConstMerge(bool value);
extern PGDLLIMPORT bool query_id_enabled;
+extern PGDLLIMPORT bool query_id_const_merge;
/*
* Returns whether query identifier computation has been enabled, either
base-commit: 0eb23285a2579591c09a591e5a52829f65665341
--
2.41.0
v18-0002-Reusable-decimalLength-functions.patchtext/x-diff; charset=us-asciiDownload
From 1a1e12c10e7136b4ff5ce9c9ef8f90c541e21ead Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Fri, 17 Feb 2023 10:17:55 +0100
Subject: [PATCH v18 2/4] Reusable decimalLength functions
Move out decimalLength functions to reuse in the following patch.
---
src/backend/utils/adt/numutils.c | 50 +-----------------------
src/include/utils/numutils.h | 67 ++++++++++++++++++++++++++++++++
2 files changed, 68 insertions(+), 49 deletions(-)
create mode 100644 src/include/utils/numutils.h
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index 24d4c0e29e9..dddd5dc104d 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -18,9 +18,8 @@
#include <limits.h>
#include <ctype.h>
-#include "common/int.h"
#include "utils/builtins.h"
-#include "port/pg_bitutils.h"
+#include "utils/numutils.h"
/*
* A table of all two-digit numbers. This is used to speed up decimal digit
@@ -38,53 +37,6 @@ static const char DIGIT_TABLE[200] =
"80" "81" "82" "83" "84" "85" "86" "87" "88" "89"
"90" "91" "92" "93" "94" "95" "96" "97" "98" "99";
-/*
- * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
- */
-static inline int
-decimalLength32(const uint32 v)
-{
- int t;
- static const uint32 PowersOfTen[] = {
- 1, 10, 100,
- 1000, 10000, 100000,
- 1000000, 10000000, 100000000,
- 1000000000
- };
-
- /*
- * Compute base-10 logarithm by dividing the base-2 logarithm by a
- * good-enough approximation of the base-2 logarithm of 10
- */
- t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
- return t + (v >= PowersOfTen[t]);
-}
-
-static inline int
-decimalLength64(const uint64 v)
-{
- int t;
- static const uint64 PowersOfTen[] = {
- UINT64CONST(1), UINT64CONST(10),
- UINT64CONST(100), UINT64CONST(1000),
- UINT64CONST(10000), UINT64CONST(100000),
- UINT64CONST(1000000), UINT64CONST(10000000),
- UINT64CONST(100000000), UINT64CONST(1000000000),
- UINT64CONST(10000000000), UINT64CONST(100000000000),
- UINT64CONST(1000000000000), UINT64CONST(10000000000000),
- UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
- UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
- UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
- };
-
- /*
- * Compute base-10 logarithm by dividing the base-2 logarithm by a
- * good-enough approximation of the base-2 logarithm of 10
- */
- t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
- return t + (v >= PowersOfTen[t]);
-}
-
static const int8 hexlookup[128] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
diff --git a/src/include/utils/numutils.h b/src/include/utils/numutils.h
new file mode 100644
index 00000000000..876e64f2df9
--- /dev/null
+++ b/src/include/utils/numutils.h
@@ -0,0 +1,67 @@
+/*-------------------------------------------------------------------------
+ *
+ * numutils.h
+ * Decimal length functions for numutils.c
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/utils/numutils.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef NUMUTILS_H
+#define NUMUTILS_H
+
+#include "common/int.h"
+#include "port/pg_bitutils.h"
+
+/*
+ * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
+ */
+static inline int
+decimalLength32(const uint32 v)
+{
+ int t;
+ static const uint32 PowersOfTen[] = {
+ 1, 10, 100,
+ 1000, 10000, 100000,
+ 1000000, 10000000, 100000000,
+ 1000000000
+ };
+
+ /*
+ * Compute base-10 logarithm by dividing the base-2 logarithm by a
+ * good-enough approximation of the base-2 logarithm of 10
+ */
+ t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
+ return t + (v >= PowersOfTen[t]);
+}
+
+static inline int
+decimalLength64(const uint64 v)
+{
+ int t;
+ static const uint64 PowersOfTen[] = {
+ UINT64CONST(1), UINT64CONST(10),
+ UINT64CONST(100), UINT64CONST(1000),
+ UINT64CONST(10000), UINT64CONST(100000),
+ UINT64CONST(1000000), UINT64CONST(10000000),
+ UINT64CONST(100000000), UINT64CONST(1000000000),
+ UINT64CONST(10000000000), UINT64CONST(100000000000),
+ UINT64CONST(1000000000000), UINT64CONST(10000000000000),
+ UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
+ UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
+ UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
+ };
+
+ /*
+ * Compute base-10 logarithm by dividing the base-2 logarithm by a
+ * good-enough approximation of the base-2 logarithm of 10
+ */
+ t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
+ return t + (v >= PowersOfTen[t]);
+}
+
+#endif /* NUMUTILS_H */
--
2.41.0
v18-0003-Merge-constants-in-ArrayExpr-into-groups.patchtext/x-diff; charset=us-asciiDownload
From 4df7f67e4ae8737c4c708a44af6f598bb5af827a Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Mon, 22 Jan 2024 21:20:45 +0100
Subject: [PATCH v18 3/4] Merge constants in ArrayExpr into groups
Using query_id_const_merge only first/last element in an ArrayExpr will
be used to compute query id. Extend this to take into account number of
elements, and merge constants into groups based on it. Resulting groups
are powers of 10, i.e. 1 to 9, 10 to 99, etc.
---
.../pg_stat_statements/expected/merging.out | 84 +++++++++++++++----
.../pg_stat_statements/pg_stat_statements.c | 17 +++-
contrib/pg_stat_statements/sql/merging.sql | 13 +++
doc/src/sgml/pgstatstatements.sgml | 11 +--
src/backend/nodes/queryjumblefuncs.c | 52 ++++++++----
src/include/nodes/queryjumble.h | 7 +-
6 files changed, 142 insertions(+), 42 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
index 1e58283afe8..0cb4f67b8b7 100644
--- a/contrib/pg_stat_statements/expected/merging.out
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -54,11 +54,11 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3);
(0 rows)
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
- query | calls
-----------------------------------------------------+-------
- SELECT * FROM test_merge WHERE id IN ($1) | 1
- SELECT * FROM test_merge WHERE id IN (...) | 1
- SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ query | calls
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
(3 rows)
SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
@@ -80,7 +80,60 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
query | calls
------------------------------------------------------------------------+-------
SELECT * FROM test_merge WHERE id IN ($1) | 1
- SELECT * FROM test_merge WHERE id IN (...) | 4
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 2
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 2
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(5 rows)
+
+-- Second order of magnitude, brace yourself
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+--------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [100-999 entries]) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 1
SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
(4 rows)
@@ -108,11 +161,12 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and dat
(0 rows)
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
- query | calls
-----------------------------------------------------------+-------
- SELECT * FROM test_merge WHERE id IN (...) and data = $3 | 3
- SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
-(2 rows)
+ query | calls
+--------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) and data = $3 | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) and data = $3 | 2
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
-- No constants simplification
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
@@ -147,10 +201,10 @@ SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
(0 rows)
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
- query | calls
-----------------------------------------------------+-------
- SELECT * FROM test_merge_numeric WHERE id IN (...) | 1
- SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ query | calls
+--------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (... [10-99 entries]) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
(2 rows)
-- Test constants evaluation, verifies a tricky part to make sure there are no
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 09912f61b7d..bb8c1894e05 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2826,6 +2826,8 @@ generate_normalized_query(JumbleState *jstate, const char *query,
last_tok_len = 0; /* Length (in bytes) of that tok */
bool skip = false; /* Signals that certain constants are
merged together and have to be skipped */
+ int magnitude; /* Order of magnitute for number of merged
+ constants */
/*
@@ -2866,7 +2868,8 @@ generate_normalized_query(JumbleState *jstate, const char *query,
Assert(len_to_wrt >= 0);
/* Normal path, non merged constant */
- if (!jstate->clocations[i].merged)
+ magnitude = jstate->clocations[i].magnitude;
+ if (magnitude == 0)
{
memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
n_quer_loc += len_to_wrt;
@@ -2882,12 +2885,22 @@ generate_normalized_query(JumbleState *jstate, const char *query,
/* The firsts merged constant */
else if (!skip)
{
+ static const uint32 powers_of_ten[] = {
+ 1, 10, 100,
+ 1000, 10000, 100000,
+ 1000000, 10000000, 100000000,
+ 1000000000
+ };
+ int lower_merged = powers_of_ten[magnitude - 1];
+ int upper_merged = powers_of_ten[magnitude];
+
memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
n_quer_loc += len_to_wrt;
/* Skip the following until a non merged constant appear */
skip = true;
- n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "... [%d-%d entries]",
+ lower_merged, upper_merged - 1);
}
/* Otherwise the constant is merged away */
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
index 71985bb1cd9..657044faded 100644
--- a/contrib/pg_stat_statements/sql/merging.sql
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -27,6 +27,19 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+-- Second order of magnitude, brace yourself
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
-- More conditions in the query
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 2b4c142f647..bac029430be 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -616,7 +616,7 @@
=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
=# SELECT query, calls FROM pg_stat_statements;
-[ RECORD 1 ]------------------------------
-query | SELECT * FROM test WHERE a IN (...)
+query | SELECT * FROM test WHERE a IN (... [10-99 entries])
calls | 2
-[ RECORD 2 ]------------------------------
query | SELECT pg_stat_statements_reset()
@@ -968,10 +968,11 @@ calls | 1
with an array of different lenght.
If this parameter is on, an array of constants will contribute only the
- first and the last elements to the query identifier. It means two
- occurences of the same query, where the only difference is number of
- constants in the array, are going to get the same query identifier.
- Such queries are represented in form <literal>'(...)'</literal>.
+ first element, the last element and the number of elements to the query
+ identifier. It means two occurences of the same query, where the only
+ difference is number of constants in the array, are going to get the
+ same query identifier if the arrays are of similar length.
+ Such queries are represented in form <literal>'(... [10-99 entries])'</literal>.
The parameter could be used to reduce amount of repeating data stored
via <structname>pg_stat_statements</structname>. The default value is off.
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index 45717553b5b..7db7ae7f2ee 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -37,6 +37,8 @@
#include "nodes/queryjumble.h"
#include "parser/scansup.h"
+#include "utils/numutils.h"
+
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
/* GUC parameters */
@@ -51,7 +53,7 @@ bool query_id_enabled = false;
static void AppendJumble(JumbleState *jstate,
const unsigned char *item, Size size);
static void RecordConstLocation(JumbleState *jstate,
- int location, bool merged);
+ int location, int magnitude);
static void _jumbleNode(JumbleState *jstate, Node *node);
static void _jumbleElements(JumbleState *jstate, List *elements);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
@@ -206,12 +208,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
* Record location of constant within query string of query tree that is
* currently being walked.
*
- * Merged argument signals that the constant represents the first or the last
- * element in a series of merged constants, and everything but the first/last
- * element contributes nothing to the jumble hash.
+ * Magnitude argument larger than zero signals that the constant represents the
+ * first or the last element in a series of merged constants, and everything
+ * but such first/last element will contribute nothing to the jumble hash. The
+ * magnitute value specifies order of magnitute (i.e. how many digits it has)
+ * for the number of elements in the series, to represent the fact of merging
+ * later on.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location, bool merged)
+RecordConstLocation(JumbleState *jstate, int location, int magnitude)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -227,7 +232,7 @@ RecordConstLocation(JumbleState *jstate, int location, bool merged)
}
jstate->clocations[jstate->clocations_count].location = location;
/* initialize lengths to -1 to simplify third-party module usage */
- jstate->clocations[jstate->clocations_count].merged = merged;
+ jstate->clocations[jstate->clocations_count].magnitude = magnitude;
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
}
@@ -237,24 +242,26 @@ RecordConstLocation(JumbleState *jstate, int location, bool merged)
* Verify if the provided list contains could be merged down, which means it
* contains only constant expressions.
*
- * Return value indicates if merging is possible.
+ * Return value is the order of magnitude (i.e. how many digits it has) for
+ * length of the list (to use for representation purposes later on) if merging
+ * is possible, otherwise zero.
*
* Note that this function searches only for explicit Const nodes and does not
* try to simplify expressions.
*/
-static bool
+static int
IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
{
ListCell *temp;
Node *firstExpr = NULL;
if (elements == NULL)
- return false;
+ return 0;
if (!query_id_const_merge)
{
/* Merging is disabled, process everything one by one */
- return false;
+ return 0;
}
firstExpr = linitial(elements);
@@ -268,26 +275,26 @@ IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
{
foreach(temp, elements)
if (!IsA(lfirst(temp), Const))
- return false;
+ return 0;
*firstConst = (Const *) firstExpr;
*lastConst = llast_node(Const, elements);
- return true;
+ return decimalLength32(elements->length);
}
/*
* If we end up here, it means no constants merging is possible, process
* the list as usual.
*/
- return false;
+ return 0;
}
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
#define JUMBLE_ELEMENTS(list) \
_jumbleElements(jstate, (List *) expr->list)
-#define JUMBLE_LOCATION(location, merged) \
- RecordConstLocation(jstate, expr->location, merged)
+#define JUMBLE_LOCATION(location, magnitude) \
+ RecordConstLocation(jstate, expr->location, magnitude)
#define JUMBLE_FIELD(item) \
AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
#define JUMBLE_FIELD_SINGLE(item) \
@@ -304,10 +311,19 @@ static void
_jumbleElements(JumbleState *jstate, List *elements)
{
Const *first, *last;
- if(IsMergeableConstList(elements, &first, &last))
+ int magnitude = IsMergeableConstList(elements, &first, &last);
+
+ if (magnitude)
{
- RecordConstLocation(jstate, first->location, true);
- RecordConstLocation(jstate, last->location, true);
+ RecordConstLocation(jstate, first->location, magnitude);
+ RecordConstLocation(jstate, last->location, magnitude);
+
+ /*
+ * After merging constants down we end up with only two constants, the
+ * first and the last one. To distinguish the order of magnitute behind
+ * merged constants, add its value into the jumble.
+ */
+ JUMBLE_FIELD_SINGLE(magnitude);
}
else
{
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 8daf0725d7f..551555494e0 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -26,9 +26,12 @@ typedef struct LocationLen
/*
* Indicates the constant represents the beginning or the end of a merged
- * constants interval.
+ * constants interval. The value shows how many constants were merged away
+ * (up to a power of 10), or in other words the order of manitude for
+ * number of merged constants (i.e. how many digits it has). Otherwise the
+ * value is 0, indicating that no merging was performed.
*/
- bool merged;
+ int magnitude;
} LocationLen;
/*
--
2.41.0
v18-0004-Introduce-query_id_const_merge_threshold.patchtext/x-diff; charset=us-asciiDownload
From 8e48d013e7d6580b4eda4f999b4b078380d43dcb Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Mon, 22 Jan 2024 21:31:18 +0100
Subject: [PATCH v18 4/4] Introduce query_id_const_merge_threshold
Replace query_id_const_merge with a threshold to allow merging only if
the number of elements is larger than specified value, which could be
configured using pg_stat_statements parameter query_id_const_merge_threshold.
---
.../pg_stat_statements/expected/merging.out | 68 ++++++++++++++++++-
.../pg_stat_statements/pg_stat_statements.c | 36 +++++-----
contrib/pg_stat_statements/sql/merging.sql | 21 +++++-
doc/src/sgml/pgstatstatements.sgml | 23 ++++---
src/backend/nodes/queryjumblefuncs.c | 23 +++++--
src/backend/postmaster/postmaster.c | 6 +-
src/include/nodes/queryjumble.h | 4 +-
7 files changed, 137 insertions(+), 44 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
index 0cb4f67b8b7..552e248ff14 100644
--- a/contrib/pg_stat_statements/expected/merging.out
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -36,7 +36,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
(4 rows)
-- Normal scenario, too many simple constants for an IN query
-SET pg_stat_statements.query_id_const_merge = on;
+SET pg_stat_statements.query_id_const_merge_threshold = 1;
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
t
---
@@ -218,4 +218,68 @@ FROM cte;
--------
(0 rows)
-RESET pg_stat_statements.query_id_const_merge;
+-- With the threshold
+SET pg_stat_statements.query_id_const_merge_threshold = 10;
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 2
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+RESET pg_stat_statements.query_id_const_merge_threshold;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index bb8c1894e05..6b438aeeeb9 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -264,8 +264,8 @@ static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
static ProcessUtility_hook_type prev_ProcessUtility = NULL;
-/* An assign hook to keep query_id_const_merge in sync */
-static void pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra);
+/* An assign hook to keep query_id_const_merge_threshold in sync */
+static void pgss_query_id_const_merge_assign_hook(int newvalue, void *extra);
/* Links to shared memory state */
static pgssSharedState *pgss = NULL;
@@ -294,8 +294,8 @@ static bool pgss_track_utility = true; /* whether to track utility commands */
static bool pgss_track_planning = false; /* whether to track planning
* duration */
static bool pgss_save = true; /* whether to save stats across shutdown */
-static bool pgss_query_id_const_merge = false; /* request constants merging
- * when computing query_id */
+static int pgss_query_id_const_merge_threshold = 0; /* request constants merging
+ * when computing query_id */
#define pgss_enabled(level) \
(!IsParallelWorker() && \
@@ -458,20 +458,22 @@ _PG_init(void)
NULL,
NULL);
- DefineCustomBoolVariable("pg_stat_statements.query_id_const_merge",
- "Whether to merge constants in a list when computing query_id.",
- NULL,
- &pgss_query_id_const_merge,
- false,
- PGC_SUSET,
- 0,
- NULL,
- pgss_query_id_const_merge_assign_hook,
- NULL);
+ DefineCustomIntVariable("pg_stat_statements.query_id_const_merge_threshold",
+ "Whether to merge constants in a list when computing query_id.",
+ NULL,
+ &pgss_query_id_const_merge_threshold,
+ 0,
+ 0,
+ INT_MAX,
+ PGC_SUSET,
+ 0,
+ NULL,
+ pgss_query_id_const_merge_assign_hook,
+ NULL);
MarkGUCPrefixReserved("pg_stat_statements");
- SetQueryIdConstMerge(pgss_query_id_const_merge);
+ SetQueryIdConstMerge(pgss_query_id_const_merge_threshold);
/*
* Install hooks.
@@ -3068,10 +3070,10 @@ comp_location(const void *a, const void *b)
}
/*
- * Notify query jumbling about query_id_const_merge status
+ * Notify query jumbling about query_id_const_merge_threshold status
*/
static void
-pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra)
+pgss_query_id_const_merge_assign_hook(int newvalue, void *extra)
{
SetQueryIdConstMerge(newvalue);
}
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
index 657044faded..fedeb35b8f5 100644
--- a/contrib/pg_stat_statements/sql/merging.sql
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -15,7 +15,7 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-- Normal scenario, too many simple constants for an IN query
-SET pg_stat_statements.query_id_const_merge = on;
+SET pg_stat_statements.query_id_const_merge_threshold = 1;
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
SELECT * FROM test_merge WHERE id IN (1);
@@ -68,4 +68,21 @@ WITH cte AS (
SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
FROM cte;
-RESET pg_stat_statements.query_id_const_merge;
+-- With the threshold
+SET pg_stat_statements.query_id_const_merge_threshold = 10;
+
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+RESET pg_stat_statements.query_id_const_merge_threshold;
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index bac029430be..3a060935bff 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -605,12 +605,12 @@
In some cases, queries with visibly different texts might get merged into a
single <structname>pg_stat_statements</structname> entry. Normally this
will happen only for semantically equivalent queries, or if
- <varname>pg_stat_statements.query_id_const_merge</varname> is enabled and
- the only difference between queries is the length of an array with constants
- they contain:
+ <varname>pg_stat_statements.query_id_const_merge_threshold</varname> is
+ enabled and the only difference between queries is the length of an array
+ with constants they contain:
<screen>
-=# SET query_id_const_merge = on;
+=# SET query_id_const_merge_threshold = 1;
=# SELECT pg_stat_statements_reset();
=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
@@ -953,9 +953,9 @@ calls | 1
<varlistentry>
<term>
- <varname>pg_stat_statements.query_id_const_merge</varname> (<type>bool</type>)
+ <varname>pg_stat_statements.query_id_const_merge_threshold</varname> (<type>integer</type>)
<indexterm>
- <primary><varname>pg_stat_statements.query_id_const_merge</varname> configuration parameter</primary>
+ <primary><varname>pg_stat_statements.query_id_const_merge_threshold</varname> configuration parameter</primary>
</indexterm>
</term>
@@ -967,11 +967,12 @@ calls | 1
query will get multiple different identifiers, one for each occurrence
with an array of different lenght.
- If this parameter is on, an array of constants will contribute only the
- first element, the last element and the number of elements to the query
- identifier. It means two occurences of the same query, where the only
- difference is number of constants in the array, are going to get the
- same query identifier if the arrays are of similar length.
+ If this parameter is greater than 0, an array with more than
+ <varname>pg_stat_statements.query_id_const_merge_threshold</varname>
+ constants will contribute only the first element, the last element
+ and the number of elements to the query identifier. It means two
+ occurences of the same query, where the only difference is number of
+ constants in the array, are going to get the same query identifier.
Such queries are represented in form <literal>'(... [10-99 entries])'</literal>.
The parameter could be used to reduce amount of repeating data stored
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index 7db7ae7f2ee..1884e91de49 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -44,8 +44,8 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
-/* Whether to merge constants in a list when computing query_id */
-bool query_id_const_merge = false;
+/* Lower threshold for the list length to merge constants when computing query_id */
+int query_id_const_merge_threshold = 1;
/* True when compute_query_id is ON, or AUTO and a module requests them */
bool query_id_enabled = false;
@@ -159,12 +159,14 @@ EnableQueryId(void)
* Controls constants merging for query identifier computation.
*
* Third-party plugins can use this function to enable/disable merging
- * of constants in a list when query identifier is computed.
+ * of constants in a list when query identifier is computed. The argument
+ * specifies the lower threshold for an array length, above which merging will
+ * be applied.
*/
void
-SetQueryIdConstMerge(bool value)
+SetQueryIdConstMerge(int threshold)
{
- query_id_const_merge = value;
+ query_id_const_merge_threshold = threshold;
}
/*
@@ -240,7 +242,8 @@ RecordConstLocation(JumbleState *jstate, int location, int magnitude)
/*
* Verify if the provided list contains could be merged down, which means it
- * contains only constant expressions.
+ * contains only constant expressions and the list contains more than
+ * query_id_const_merge_threshold elements.
*
* Return value is the order of magnitude (i.e. how many digits it has) for
* length of the list (to use for representation purposes later on) if merging
@@ -258,12 +261,18 @@ IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
if (elements == NULL)
return 0;
- if (!query_id_const_merge)
+ if (query_id_const_merge_threshold < 1)
{
/* Merging is disabled, process everything one by one */
return 0;
}
+ if (elements->length < query_id_const_merge_threshold)
+ {
+ /* The list is not large enough */
+ return 0;
+ }
+
firstExpr = linitial(elements);
/*
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 367ea016fb3..78de378b4a9 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -533,7 +533,7 @@ typedef struct
bool redirection_done;
bool IsBinaryUpgrade;
bool query_id_enabled;
- bool query_id_const_merge;
+ int query_id_const_merge_threshold;
int max_safe_fds;
int MaxBackends;
#ifdef WIN32
@@ -6113,7 +6113,7 @@ save_backend_variables(BackendParameters *param, Port *port, BackgroundWorker *w
param->redirection_done = redirection_done;
param->IsBinaryUpgrade = IsBinaryUpgrade;
param->query_id_enabled = query_id_enabled;
- param->query_id_const_merge = query_id_const_merge;
+ param->query_id_const_merge_threshold = query_id_const_merge_threshold;
param->max_safe_fds = max_safe_fds;
param->MaxBackends = MaxBackends;
@@ -6359,7 +6359,7 @@ restore_backend_variables(BackendParameters *param, Port **port, BackgroundWorke
redirection_done = param->redirection_done;
IsBinaryUpgrade = param->IsBinaryUpgrade;
query_id_enabled = param->query_id_enabled;
- query_id_const_merge = param->query_id_const_merge;
+ query_id_const_merge_threshold = param->query_id_const_merge_threshold;
max_safe_fds = param->max_safe_fds;
MaxBackends = param->MaxBackends;
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 551555494e0..ae5907aed84 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -74,10 +74,10 @@ extern PGDLLIMPORT int compute_query_id;
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query);
extern void EnableQueryId(void);
-extern void SetQueryIdConstMerge(bool value);
+extern void SetQueryIdConstMerge(int threshold);
extern PGDLLIMPORT bool query_id_enabled;
-extern PGDLLIMPORT bool query_id_const_merge;
+extern PGDLLIMPORT int query_id_const_merge_threshold;
/*
* Returns whether query identifier computation has been enabled, either
--
2.41.0
Hi, I'm interested in this feature. It looks like these patches have
some conflicts.
http://cfbot.cputube.org/patch_47_2837.log
Would you rebase these patches?
Thanks,
--
Yasuo Honda
On Sat, Mar 23, 2024 at 4:11 PM Dmitry Dolgov <9erthalion6@gmail.com> wrote:
Show quoted text
Oh, I see, thanks. Give me a moment, will fix this.
Here is it.
On Sat, Mar 23, 2024 at 04:13:44PM +0900, Yasuo Honda wrote:
Hi, I'm interested in this feature. It looks like these patches have
some conflicts.http://cfbot.cputube.org/patch_47_2837.log
Would you rebase these patches?
Sure, I can rebase, give me a moment. If you don't want to wait, there
is a base commit in the patch, against which it should be applied
without issues, 0eb23285a2.
Thanks for the information. I can apply these 4 patches from
0eb23285a2 . I tested this branch from Ruby on Rails and it gets some
unexpected behavior from my point of view.
Setting pg_stat_statements.query_id_const_merge_threshold = 5 does not
normalize sql queries whose number of in clauses exceeds 5.
Here are test steps.
https://gist.github.com/yahonda/825ffccc4dcb58aa60e12ce33d25cd45#expected-behavior
It would be appreciated if I can get my understanding correct.
--
Yasuo Honda
On Sun, Mar 24, 2024 at 3:20 AM Dmitry Dolgov <9erthalion6@gmail.com> wrote:
Show quoted text
Sure, I can rebase, give me a moment. If you don't want to wait, there
is a base commit in the patch, against which it should be applied
without issues, 0eb23285a2.
On Sun, Mar 24, 2024 at 11:36:38PM +0900, Yasuo Honda wrote:
Thanks for the information. I can apply these 4 patches from
0eb23285a2 . I tested this branch from Ruby on Rails and it gets some
unexpected behavior from my point of view.
Setting pg_stat_statements.query_id_const_merge_threshold = 5 does not
normalize sql queries whose number of in clauses exceeds 5.Here are test steps.
https://gist.github.com/yahonda/825ffccc4dcb58aa60e12ce33d25cd45#expected-behaviorIt would be appreciated if I can get my understanding correct.
From what I understand out of the description this ruby script uses
prepared statements, passing values as parameters, right? Unfortunately
the current version of the patch doesn't handle that, it works with
constants only [1]/messages/by-id/20230211104707.grsicemegr7d3mgh@erthalion.local. The original incarnation of this feature was able to
handle that, but the implementation was considered to be not suitable --
thus, to make some progress, it was left outside.
The plan is, if everything goes fine at some point, to do a follow-up
patch to handle Params and the rest.
[1]: /messages/by-id/20230211104707.grsicemegr7d3mgh@erthalion.local
Yes. The script uses prepared statements because Ruby on Rails enables
prepared statements by default for PostgreSQL databases.
Then I tested this branch
https://github.com/yahonda/postgres/tree/pg_stat_statements without
using prepared statements as follows and all of them do not normalize
in clause values.
- Disabled prepared statements by setting `prepared_statements: false`
https://gist.github.com/yahonda/2c2d6ac7a955886a305750eecfd07c5e
- Use ruby-pg
https://gist.github.com/yahonda/2f0efb11ae888d8f6b27a07e0b833fdf
- Use psql
https://gist.github.com/yahonda/c830379b33d66a743aef159aa03d7e49
I do not know why even if I use psql, the query column at
pg_stat_sql_statement shows it is like a prepared statement "IN ($1,
$2)".
On Tue, Mar 26, 2024 at 1:35 AM Dmitry Dolgov <9erthalion6@gmail.com> wrote:
Show quoted text
From what I understand out of the description this ruby script uses
prepared statements, passing values as parameters, right? Unfortunately
the current version of the patch doesn't handle that, it works with
constants only [1]. The original incarnation of this feature was able to
handle that, but the implementation was considered to be not suitable --
thus, to make some progress, it was left outside.
On Tue, Mar 26, 2024 at 04:21:46PM +0900, Yasuo Honda wrote:
Yes. The script uses prepared statements because Ruby on Rails enables
prepared statements by default for PostgreSQL databases.Then I tested this branch
https://github.com/yahonda/postgres/tree/pg_stat_statements without
using prepared statements as follows and all of them do not normalize
in clause values.- Disabled prepared statements by setting `prepared_statements: false`
https://gist.github.com/yahonda/2c2d6ac7a955886a305750eecfd07c5e- Use ruby-pg
https://gist.github.com/yahonda/2f0efb11ae888d8f6b27a07e0b833fdf- Use psql
https://gist.github.com/yahonda/c830379b33d66a743aef159aa03d7e49I do not know why even if I use psql, the query column at
pg_stat_sql_statement shows it is like a prepared statement "IN ($1,
$2)".
It's a similar case: the column is defined as bigint, thus PostgreSQL
has to wrap every constant expression in a function expression that
converts its type to bigint. The current patch version doesn't try to
reduce a FuncExpr into Const (event if the wrapped value is a Const),
thus this array is not getting merged. If you replace bigint with an
int, no type conversion would be required and merging logic will kick
in.
Again, the original version of the patch was able to handle this case,
but it was stripped away to make the patch smaller in hope of moving
forward. Anyway, thanks for reminding about how annoying the current
handling of constant arrays can look like in practice!
Thanks for the useful info.
Ruby on Rails uses bigint as a default data type for the primary key
and prepared statements have been enabled by default for PostgreSQL.
I'm looking forward to these current patches being merged as a first
step and future versions of pg_stat_statements will support
normalizing bigint and prepared statements.
On Wed, Mar 27, 2024 at 6:00 AM Dmitry Dolgov <9erthalion6@gmail.com> wrote:
Show quoted text
It's a similar case: the column is defined as bigint, thus PostgreSQL
has to wrap every constant expression in a function expression that
converts its type to bigint. The current patch version doesn't try to
reduce a FuncExpr into Const (event if the wrapped value is a Const),
thus this array is not getting merged. If you replace bigint with an
int, no type conversion would be required and merging logic will kick
in.Again, the original version of the patch was able to handle this case,
but it was stripped away to make the patch smaller in hope of moving
forward. Anyway, thanks for reminding about how annoying the current
handling of constant arrays can look like in practice!
On Wed, Mar 27, 2024 at 08:56:12AM +0900, Yasuo Honda wrote:
Thanks for the useful info.Ruby on Rails uses bigint as a default data type for the primary key
and prepared statements have been enabled by default for PostgreSQL.
I'm looking forward to these current patches being merged as a first
step and future versions of pg_stat_statements will support
normalizing bigint and prepared statements.
Here is the rebased version. In the meantime I'm going to experiment
with how to support more use cases in a way that will be more acceptable
for the community.
Attachments:
v19-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/x-diff; charset=us-asciiDownload
From 21d6e88cc9594745e3b88938be1547aa526b2a29 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Wed, 3 Apr 2024 20:02:51 +0200
Subject: [PATCH v19 1/4] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on the number of parameters, because every element of
ArrayExpr is jumbled. In certain situations it's undesirable, especially
if the list becomes too large.
Make an array of Const expressions contribute only the first/last
elements to the jumble hash. Allow to enable this behavior via the new
pg_stat_statements parameter query_id_const_merge with the default value off.
Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane,
Michael Paquier, Sergei Kornilov, Alvaro Herrera, David Geier
Tested-by: Chengxi Sun
---
contrib/pg_stat_statements/Makefile | 2 +-
.../pg_stat_statements/expected/merging.out | 167 ++++++++++++++++++
contrib/pg_stat_statements/meson.build | 1 +
.../pg_stat_statements/pg_stat_statements.c | 62 ++++++-
contrib/pg_stat_statements/sql/merging.sql | 58 ++++++
doc/src/sgml/pgstatstatements.sgml | 57 +++++-
src/backend/nodes/gen_node_support.pl | 21 ++-
src/backend/nodes/queryjumblefuncs.c | 100 ++++++++++-
src/backend/postmaster/launch_backend.c | 3 +
src/backend/utils/misc/postgresql.conf.sample | 1 -
src/include/nodes/primnodes.h | 2 +-
src/include/nodes/queryjumble.h | 9 +-
12 files changed, 458 insertions(+), 25 deletions(-)
create mode 100644 contrib/pg_stat_statements/expected/merging.out
create mode 100644 contrib/pg_stat_statements/sql/merging.sql
diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile
index 414a30856e4..03a62d685f3 100644
--- a/contrib/pg_stat_statements/Makefile
+++ b/contrib/pg_stat_statements/Makefile
@@ -19,7 +19,7 @@ LDFLAGS_SL += $(filter -lm, $(LIBS))
REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_statements/pg_stat_statements.conf
REGRESS = select dml cursors utility level_tracking planning \
- user_activity wal entry_timestamp cleanup oldextversions
+ user_activity wal entry_timestamp cleanup oldextversions merging
# Disabled because these tests require "shared_preload_libraries=pg_stat_statements",
# which typical installcheck users do not have (e.g. buildfarm clients).
NO_INSTALLCHECK = 1
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
new file mode 100644
index 00000000000..1e58283afe8
--- /dev/null
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -0,0 +1,167 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(4 rows)
+
+-- Normal scenario, too many simple constants for an IN query
+SET pg_stat_statements.query_id_const_merge = on;
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 4
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) and data = $3 | 3
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- No constants simplification
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 + $2, $3 + $4, $5 + $6, $7 + $8, $9 + $10, $11 + $12, $13 + $14, $15 + $16, $17 + $18) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Numeric type
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Test constants evaluation, verifies a tricky part to make sure there are no
+-- issues in the merging implementation
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result
+--------
+(0 rows)
+
+RESET pg_stat_statements.query_id_const_merge;
diff --git a/contrib/pg_stat_statements/meson.build b/contrib/pg_stat_statements/meson.build
index 9bfc9657e1a..6bd62f92b9a 100644
--- a/contrib/pg_stat_statements/meson.build
+++ b/contrib/pg_stat_statements/meson.build
@@ -52,6 +52,7 @@ tests += {
'entry_timestamp',
'cleanup',
'oldextversions',
+ 'merging',
],
'regress_args': ['--temp-config', files('pg_stat_statements.conf')],
# Disabled because these tests require
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 67cec865ba1..d7841b51cc9 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -265,6 +265,9 @@ static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
static ProcessUtility_hook_type prev_ProcessUtility = NULL;
+/* An assign hook to keep query_id_const_merge in sync */
+static void pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra);
+
/* Links to shared memory state */
static pgssSharedState *pgss = NULL;
static HTAB *pgss_hash = NULL;
@@ -292,7 +295,8 @@ static bool pgss_track_utility = true; /* whether to track utility commands */
static bool pgss_track_planning = false; /* whether to track planning
* duration */
static bool pgss_save = true; /* whether to save stats across shutdown */
-
+static bool pgss_query_id_const_merge = false; /* request constants merging
+ * when computing query_id */
#define pgss_enabled(level) \
(!IsParallelWorker() && \
@@ -455,8 +459,21 @@ _PG_init(void)
NULL,
NULL);
+ DefineCustomBoolVariable("pg_stat_statements.query_id_const_merge",
+ "Whether to merge constants in a list when computing query_id.",
+ NULL,
+ &pgss_query_id_const_merge,
+ false,
+ PGC_SUSET,
+ 0,
+ NULL,
+ pgss_query_id_const_merge_assign_hook,
+ NULL);
+
MarkGUCPrefixReserved("pg_stat_statements");
+ SetQueryIdConstMerge(pgss_query_id_const_merge);
+
/*
* Install hooks.
*/
@@ -2808,6 +2825,9 @@ generate_normalized_query(JumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool skip = false; /* Signals that certain constants are
+ merged together and have to be skipped */
+
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -2831,7 +2851,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
{
int off, /* Offset from start for cur tok */
tok_len; /* Length (in bytes) of that tok */
-
off = jstate->clocations[i].location;
/* Adjust recorded location if we're dealing with partial string */
off -= query_loc;
@@ -2846,12 +2865,32 @@ generate_normalized_query(JumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ /* Normal path, non merged constant */
+ if (!jstate->clocations[i].merged)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id);
+
+ /* In case previous constants were merged away, stop doing that */
+ if (skip)
+ skip = false;
+ }
+ /* The firsts merged constant */
+ else if (!skip)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* Skip the following until a non merged constant appear */
+ skip = true;
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ }
+ /* Otherwise the constant is merged away */
quer_loc = off + tok_len;
last_off = off;
@@ -3010,3 +3049,12 @@ comp_location(const void *a, const void *b)
return pg_cmp_s32(l, r);
}
+
+/*
+ * Notify query jumbling about query_id_const_merge status
+ */
+static void
+pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra)
+{
+ SetQueryIdConstMerge(newvalue);
+}
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
new file mode 100644
index 00000000000..71985bb1cd9
--- /dev/null
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -0,0 +1,58 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal scenario, too many simple constants for an IN query
+SET pg_stat_statements.query_id_const_merge = on;
+
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- No constants simplification
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+SELECT * FROM test_merge WHERE id IN (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Numeric type
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test constants evaluation, verifies a tricky part to make sure there are no
+-- issues in the merging implementation
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+RESET pg_stat_statements.query_id_const_merge;
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 44dd4db7ce7..2b4c142f647 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -603,11 +603,29 @@
<para>
In some cases, queries with visibly different texts might get merged into a
- single <structname>pg_stat_statements</structname> entry. Normally this will happen
- only for semantically equivalent queries, but there is a small chance of
- hash collisions causing unrelated queries to be merged into one entry.
- (This cannot happen for queries belonging to different users or databases,
- however.)
+ single <structname>pg_stat_statements</structname> entry. Normally this
+ will happen only for semantically equivalent queries, or if
+ <varname>pg_stat_statements.query_id_const_merge</varname> is enabled and
+ the only difference between queries is the length of an array with constants
+ they contain:
+
+<screen>
+=# SET query_id_const_merge = on;
+=# SELECT pg_stat_statements_reset();
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
+=# SELECT query, calls FROM pg_stat_statements;
+-[ RECORD 1 ]------------------------------
+query | SELECT * FROM test WHERE a IN (...)
+calls | 2
+-[ RECORD 2 ]------------------------------
+query | SELECT pg_stat_statements_reset()
+calls | 1
+</screen>
+
+ But there is a small chance of hash collisions causing unrelated queries to
+ be merged into one entry. (This cannot happen for queries belonging to
+ different users or databases, however.)
</para>
<para>
@@ -932,6 +950,35 @@
</para>
</listitem>
</varlistentry>
+
+ <varlistentry>
+ <term>
+ <varname>pg_stat_statements.query_id_const_merge</varname> (<type>bool</type>)
+ <indexterm>
+ <primary><varname>pg_stat_statements.query_id_const_merge</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+
+ <listitem>
+ <para>
+ Specifies how an array of constants (e.g. for an "IN" clause)
+ contributes to the query identifier computation. Normally every element
+ of an array contributes to the query identifier, which means the same
+ query will get multiple different identifiers, one for each occurrence
+ with an array of different lenght.
+
+ If this parameter is on, an array of constants will contribute only the
+ first and the last elements to the query identifier. It means two
+ occurences of the same query, where the only difference is number of
+ constants in the array, are going to get the same query identifier.
+ Such queries are represented in form <literal>'(...)'</literal>.
+
+ The parameter could be used to reduce amount of repeating data stored
+ via <structname>pg_stat_statements</structname>. The default value is off.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
<para>
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index d4244facbb3..021073151b2 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -475,6 +475,7 @@ foreach my $infile (@ARGV)
equal_ignore_if_zero
query_jumble_ignore
query_jumble_location
+ query_jumble_merge
read_write_ignore
write_only_relids
write_only_nondefault_pathtarget
@@ -1282,6 +1283,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
my @a = @{ $node_type_info{$n}->{field_attrs}{$f} };
my $query_jumble_ignore = $struct_no_query_jumble;
my $query_jumble_location = 0;
+ my $query_jumble_merge = 0;
# extract per-field attributes
foreach my $a (@a)
@@ -1294,21 +1296,34 @@ _jumble${n}(JumbleState *jstate, Node *node)
{
$query_jumble_location = 1;
}
+ elsif ($a eq 'query_jumble_merge')
+ {
+ $query_jumble_merge = 1;
+ }
}
# node type
if (($t =~ /^(\w+)\*$/ or $t =~ /^struct\s+(\w+)\*$/)
and elem $1, @node_types)
{
- print $jff "\tJUMBLE_NODE($f);\n"
- unless $query_jumble_ignore;
+ # Merge constants if requested.
+ if ($query_jumble_merge)
+ {
+ print $jff "\tJUMBLE_ELEMENTS($f);\n"
+ unless $query_jumble_ignore;
+ }
+ else
+ {
+ print $jff "\tJUMBLE_NODE($f);\n"
+ unless $query_jumble_ignore;
+ }
}
elsif ($t eq 'ParseLoc')
{
# Track the node's location only if directly requested.
if ($query_jumble_location)
{
- print $jff "\tJUMBLE_LOCATION($f);\n"
+ print $jff "\tJUMBLE_LOCATION($f, false);\n"
unless $query_jumble_ignore;
}
}
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index be823a7f8fa..e9473def361 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -42,6 +42,9 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+/* Whether to merge constants in a list when computing query_id */
+bool query_id_const_merge = false;
+
/*
* True when compute_query_id is ON or AUTO, and a module requests them.
*
@@ -53,8 +56,10 @@ bool query_id_enabled = false;
static void AppendJumble(JumbleState *jstate,
const unsigned char *item, Size size);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void RecordConstLocation(JumbleState *jstate,
+ int location, bool merged);
static void _jumbleNode(JumbleState *jstate, Node *node);
+static void _jumbleElements(JumbleState *jstate, List *elements);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
static void _jumbleList(JumbleState *jstate, Node *node);
static void _jumbleRangeTblEntry(JumbleState *jstate, Node *node);
@@ -154,6 +159,18 @@ EnableQueryId(void)
query_id_enabled = true;
}
+/*
+ * Controls constants merging for query identifier computation.
+ *
+ * Third-party plugins can use this function to enable/disable merging
+ * of constants in a list when query identifier is computed.
+ */
+void
+SetQueryIdConstMerge(bool value)
+{
+ query_id_const_merge = value;
+}
+
/*
* AppendJumble: Append a value that is substantive in a given query to
* the current jumble.
@@ -192,11 +209,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
}
/*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked.
+ *
+ * Merged argument signals that the constant represents the first or the last
+ * element in a series of merged constants, and everything but the first/last
+ * element contributes nothing to the jumble hash.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -212,15 +233,67 @@ RecordConstLocation(JumbleState *jstate, int location)
}
jstate->clocations[jstate->clocations_count].location = location;
/* initialize lengths to -1 to simplify third-party module usage */
+ jstate->clocations[jstate->clocations_count].merged = merged;
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
}
}
+/*
+ * Verify if the provided list contains could be merged down, which means it
+ * contains only constant expressions.
+ *
+ * Return value indicates if merging is possible.
+ *
+ * Note that this function searches only for explicit Const nodes and does not
+ * try to simplify expressions.
+ */
+static bool
+IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
+{
+ ListCell *temp;
+ Node *firstExpr = NULL;
+
+ if (elements == NULL)
+ return false;
+
+ if (!query_id_const_merge)
+ {
+ /* Merging is disabled, process everything one by one */
+ return false;
+ }
+
+ firstExpr = linitial(elements);
+
+ /*
+ * If the first expression is a constant, verify if the following elements
+ * are constants as well. If yes, the list is eligible for merging, and the
+ * order of magnitude need to be calculated.
+ */
+ if (IsA(firstExpr, Const))
+ {
+ foreach(temp, elements)
+ if (!IsA(lfirst(temp), Const))
+ return false;
+
+ *firstConst = (Const *) firstExpr;
+ *lastConst = llast_node(Const, elements);
+ return true;
+ }
+
+ /*
+ * If we end up here, it means no constants merging is possible, process
+ * the list as usual.
+ */
+ return false;
+}
+
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
-#define JUMBLE_LOCATION(location) \
- RecordConstLocation(jstate, expr->location)
+#define JUMBLE_ELEMENTS(list) \
+ _jumbleElements(jstate, (List *) expr->list)
+#define JUMBLE_LOCATION(location, merged) \
+ RecordConstLocation(jstate, expr->location, merged)
#define JUMBLE_FIELD(item) \
AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
#define JUMBLE_FIELD_SINGLE(item) \
@@ -233,6 +306,21 @@ do { \
#include "queryjumblefuncs.funcs.c"
+static void
+_jumbleElements(JumbleState *jstate, List *elements)
+{
+ Const *first, *last;
+ if(IsMergeableConstList(elements, &first, &last))
+ {
+ RecordConstLocation(jstate, first->location, true);
+ RecordConstLocation(jstate, last->location, true);
+ }
+ else
+ {
+ _jumbleNode(jstate, (Node *) elements);
+ }
+}
+
static void
_jumbleNode(JumbleState *jstate, Node *node)
{
diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c
index cb0c3e2f8ab..5bb2f00c6ef 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -122,6 +122,7 @@ typedef struct
bool redirection_done;
bool IsBinaryUpgrade;
bool query_id_enabled;
+ bool query_id_const_merge;
int max_safe_fds;
int MaxBackends;
#ifdef WIN32
@@ -742,6 +743,7 @@ save_backend_variables(BackendParameters *param, ClientSocket *client_sock,
param->redirection_done = redirection_done;
param->IsBinaryUpgrade = IsBinaryUpgrade;
param->query_id_enabled = query_id_enabled;
+ param->query_id_const_merge = query_id_const_merge;
param->max_safe_fds = max_safe_fds;
param->MaxBackends = MaxBackends;
@@ -1000,6 +1002,7 @@ restore_backend_variables(BackendParameters *param)
redirection_done = param->redirection_done;
IsBinaryUpgrade = param->IsBinaryUpgrade;
query_id_enabled = param->query_id_enabled;
+ query_id_const_merge = param->query_id_const_merge;
max_safe_fds = param->max_safe_fds;
MaxBackends = param->MaxBackends;
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index baecde28410..cdb5bea780d 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -651,7 +651,6 @@
#log_planner_stats = off
#log_executor_stats = off
-
#------------------------------------------------------------------------------
# AUTOVACUUM
#------------------------------------------------------------------------------
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index aa727e722cc..cf4f900d4ed 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1333,7 +1333,7 @@ typedef struct ArrayExpr
/* common type of array elements */
Oid element_typeid pg_node_attr(query_jumble_ignore);
/* the array elements or sub-arrays */
- List *elements;
+ List *elements pg_node_attr(query_jumble_merge);
/* true if elements are sub-arrays */
bool multidims pg_node_attr(query_jumble_ignore);
/* token location, or -1 if unknown */
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index f1c55c8067f..8daf0725d7f 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -23,6 +23,12 @@ typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+
+ /*
+ * Indicates the constant represents the beginning or the end of a merged
+ * constants interval.
+ */
+ bool merged;
} LocationLen;
/*
@@ -62,12 +68,13 @@ enum ComputeQueryIdType
/* GUC parameters */
extern PGDLLIMPORT int compute_query_id;
-
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query);
extern void EnableQueryId(void);
+extern void SetQueryIdConstMerge(bool value);
extern PGDLLIMPORT bool query_id_enabled;
+extern PGDLLIMPORT bool query_id_const_merge;
/*
* Returns whether query identifier computation has been enabled, either
base-commit: c9920a9068eac2e6c8fb34988d18c0b42b9bf811
--
2.41.0
v19-0002-Reusable-decimalLength-functions.patchtext/x-diff; charset=us-asciiDownload
From 439c3c3dbbf59d7b3ed204178de9c34a60734756 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Wed, 3 Apr 2024 20:03:08 +0200
Subject: [PATCH v19 2/4] Reusable decimalLength functions
Move out decimalLength functions to reuse in the following patch.
---
src/backend/utils/adt/numutils.c | 48 +----------------------
src/include/utils/numutils.h | 67 ++++++++++++++++++++++++++++++++
2 files changed, 68 insertions(+), 47 deletions(-)
create mode 100644 src/include/utils/numutils.h
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index adc1e8a4cba..adfad9f89fa 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -20,6 +20,7 @@
#include "port/pg_bitutils.h"
#include "utils/builtins.h"
+#include "utils/numutils.h"
/*
* A table of all two-digit numbers. This is used to speed up decimal digit
@@ -37,53 +38,6 @@ static const char DIGIT_TABLE[200] =
"80" "81" "82" "83" "84" "85" "86" "87" "88" "89"
"90" "91" "92" "93" "94" "95" "96" "97" "98" "99";
-/*
- * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
- */
-static inline int
-decimalLength32(const uint32 v)
-{
- int t;
- static const uint32 PowersOfTen[] = {
- 1, 10, 100,
- 1000, 10000, 100000,
- 1000000, 10000000, 100000000,
- 1000000000
- };
-
- /*
- * Compute base-10 logarithm by dividing the base-2 logarithm by a
- * good-enough approximation of the base-2 logarithm of 10
- */
- t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
- return t + (v >= PowersOfTen[t]);
-}
-
-static inline int
-decimalLength64(const uint64 v)
-{
- int t;
- static const uint64 PowersOfTen[] = {
- UINT64CONST(1), UINT64CONST(10),
- UINT64CONST(100), UINT64CONST(1000),
- UINT64CONST(10000), UINT64CONST(100000),
- UINT64CONST(1000000), UINT64CONST(10000000),
- UINT64CONST(100000000), UINT64CONST(1000000000),
- UINT64CONST(10000000000), UINT64CONST(100000000000),
- UINT64CONST(1000000000000), UINT64CONST(10000000000000),
- UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
- UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
- UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
- };
-
- /*
- * Compute base-10 logarithm by dividing the base-2 logarithm by a
- * good-enough approximation of the base-2 logarithm of 10
- */
- t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
- return t + (v >= PowersOfTen[t]);
-}
-
static const int8 hexlookup[128] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
diff --git a/src/include/utils/numutils.h b/src/include/utils/numutils.h
new file mode 100644
index 00000000000..876e64f2df9
--- /dev/null
+++ b/src/include/utils/numutils.h
@@ -0,0 +1,67 @@
+/*-------------------------------------------------------------------------
+ *
+ * numutils.h
+ * Decimal length functions for numutils.c
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/utils/numutils.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef NUMUTILS_H
+#define NUMUTILS_H
+
+#include "common/int.h"
+#include "port/pg_bitutils.h"
+
+/*
+ * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
+ */
+static inline int
+decimalLength32(const uint32 v)
+{
+ int t;
+ static const uint32 PowersOfTen[] = {
+ 1, 10, 100,
+ 1000, 10000, 100000,
+ 1000000, 10000000, 100000000,
+ 1000000000
+ };
+
+ /*
+ * Compute base-10 logarithm by dividing the base-2 logarithm by a
+ * good-enough approximation of the base-2 logarithm of 10
+ */
+ t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
+ return t + (v >= PowersOfTen[t]);
+}
+
+static inline int
+decimalLength64(const uint64 v)
+{
+ int t;
+ static const uint64 PowersOfTen[] = {
+ UINT64CONST(1), UINT64CONST(10),
+ UINT64CONST(100), UINT64CONST(1000),
+ UINT64CONST(10000), UINT64CONST(100000),
+ UINT64CONST(1000000), UINT64CONST(10000000),
+ UINT64CONST(100000000), UINT64CONST(1000000000),
+ UINT64CONST(10000000000), UINT64CONST(100000000000),
+ UINT64CONST(1000000000000), UINT64CONST(10000000000000),
+ UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
+ UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
+ UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
+ };
+
+ /*
+ * Compute base-10 logarithm by dividing the base-2 logarithm by a
+ * good-enough approximation of the base-2 logarithm of 10
+ */
+ t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
+ return t + (v >= PowersOfTen[t]);
+}
+
+#endif /* NUMUTILS_H */
--
2.41.0
v19-0003-Merge-constants-in-ArrayExpr-into-groups.patchtext/x-diff; charset=us-asciiDownload
From 401184675e25ef7f314cd9ac7c6986d4e18148d9 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Mon, 22 Jan 2024 21:20:45 +0100
Subject: [PATCH v19 3/4] Merge constants in ArrayExpr into groups
Using query_id_const_merge only first/last element in an ArrayExpr will
be used to compute query id. Extend this to take into account number of
elements, and merge constants into groups based on it. Resulting groups
are powers of 10, i.e. 1 to 9, 10 to 99, etc.
---
.../pg_stat_statements/expected/merging.out | 84 +++++++++++++++----
.../pg_stat_statements/pg_stat_statements.c | 17 +++-
contrib/pg_stat_statements/sql/merging.sql | 13 +++
doc/src/sgml/pgstatstatements.sgml | 11 +--
src/backend/nodes/queryjumblefuncs.c | 52 ++++++++----
src/include/nodes/queryjumble.h | 7 +-
6 files changed, 142 insertions(+), 42 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
index 1e58283afe8..0cb4f67b8b7 100644
--- a/contrib/pg_stat_statements/expected/merging.out
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -54,11 +54,11 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3);
(0 rows)
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
- query | calls
-----------------------------------------------------+-------
- SELECT * FROM test_merge WHERE id IN ($1) | 1
- SELECT * FROM test_merge WHERE id IN (...) | 1
- SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ query | calls
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
(3 rows)
SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
@@ -80,7 +80,60 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
query | calls
------------------------------------------------------------------------+-------
SELECT * FROM test_merge WHERE id IN ($1) | 1
- SELECT * FROM test_merge WHERE id IN (...) | 4
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 2
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 2
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(5 rows)
+
+-- Second order of magnitude, brace yourself
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+--------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [100-999 entries]) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 1
SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
(4 rows)
@@ -108,11 +161,12 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and dat
(0 rows)
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
- query | calls
-----------------------------------------------------------+-------
- SELECT * FROM test_merge WHERE id IN (...) and data = $3 | 3
- SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
-(2 rows)
+ query | calls
+--------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) and data = $3 | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) and data = $3 | 2
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
-- No constants simplification
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
@@ -147,10 +201,10 @@ SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
(0 rows)
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
- query | calls
-----------------------------------------------------+-------
- SELECT * FROM test_merge_numeric WHERE id IN (...) | 1
- SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ query | calls
+--------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (... [10-99 entries]) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
(2 rows)
-- Test constants evaluation, verifies a tricky part to make sure there are no
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index d7841b51cc9..00eec30feb1 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2827,6 +2827,8 @@ generate_normalized_query(JumbleState *jstate, const char *query,
last_tok_len = 0; /* Length (in bytes) of that tok */
bool skip = false; /* Signals that certain constants are
merged together and have to be skipped */
+ int magnitude; /* Order of magnitute for number of merged
+ constants */
/*
@@ -2867,7 +2869,8 @@ generate_normalized_query(JumbleState *jstate, const char *query,
Assert(len_to_wrt >= 0);
/* Normal path, non merged constant */
- if (!jstate->clocations[i].merged)
+ magnitude = jstate->clocations[i].magnitude;
+ if (magnitude == 0)
{
memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
n_quer_loc += len_to_wrt;
@@ -2883,12 +2886,22 @@ generate_normalized_query(JumbleState *jstate, const char *query,
/* The firsts merged constant */
else if (!skip)
{
+ static const uint32 powers_of_ten[] = {
+ 1, 10, 100,
+ 1000, 10000, 100000,
+ 1000000, 10000000, 100000000,
+ 1000000000
+ };
+ int lower_merged = powers_of_ten[magnitude - 1];
+ int upper_merged = powers_of_ten[magnitude];
+
memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
n_quer_loc += len_to_wrt;
/* Skip the following until a non merged constant appear */
skip = true;
- n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "... [%d-%d entries]",
+ lower_merged, upper_merged - 1);
}
/* Otherwise the constant is merged away */
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
index 71985bb1cd9..657044faded 100644
--- a/contrib/pg_stat_statements/sql/merging.sql
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -27,6 +27,19 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+-- Second order of magnitude, brace yourself
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
-- More conditions in the query
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 2b4c142f647..bac029430be 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -616,7 +616,7 @@
=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
=# SELECT query, calls FROM pg_stat_statements;
-[ RECORD 1 ]------------------------------
-query | SELECT * FROM test WHERE a IN (...)
+query | SELECT * FROM test WHERE a IN (... [10-99 entries])
calls | 2
-[ RECORD 2 ]------------------------------
query | SELECT pg_stat_statements_reset()
@@ -968,10 +968,11 @@ calls | 1
with an array of different lenght.
If this parameter is on, an array of constants will contribute only the
- first and the last elements to the query identifier. It means two
- occurences of the same query, where the only difference is number of
- constants in the array, are going to get the same query identifier.
- Such queries are represented in form <literal>'(...)'</literal>.
+ first element, the last element and the number of elements to the query
+ identifier. It means two occurences of the same query, where the only
+ difference is number of constants in the array, are going to get the
+ same query identifier if the arrays are of similar length.
+ Such queries are represented in form <literal>'(... [10-99 entries])'</literal>.
The parameter could be used to reduce amount of repeating data stored
via <structname>pg_stat_statements</structname>. The default value is off.
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index e9473def361..a27202cb279 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -37,6 +37,8 @@
#include "nodes/queryjumble.h"
#include "parser/scansup.h"
+#include "utils/numutils.h"
+
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
/* GUC parameters */
@@ -57,7 +59,7 @@ bool query_id_enabled = false;
static void AppendJumble(JumbleState *jstate,
const unsigned char *item, Size size);
static void RecordConstLocation(JumbleState *jstate,
- int location, bool merged);
+ int location, int magnitude);
static void _jumbleNode(JumbleState *jstate, Node *node);
static void _jumbleElements(JumbleState *jstate, List *elements);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
@@ -212,12 +214,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
* Record location of constant within query string of query tree that is
* currently being walked.
*
- * Merged argument signals that the constant represents the first or the last
- * element in a series of merged constants, and everything but the first/last
- * element contributes nothing to the jumble hash.
+ * Magnitude argument larger than zero signals that the constant represents the
+ * first or the last element in a series of merged constants, and everything
+ * but such first/last element will contribute nothing to the jumble hash. The
+ * magnitute value specifies order of magnitute (i.e. how many digits it has)
+ * for the number of elements in the series, to represent the fact of merging
+ * later on.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location, bool merged)
+RecordConstLocation(JumbleState *jstate, int location, int magnitude)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -233,7 +238,7 @@ RecordConstLocation(JumbleState *jstate, int location, bool merged)
}
jstate->clocations[jstate->clocations_count].location = location;
/* initialize lengths to -1 to simplify third-party module usage */
- jstate->clocations[jstate->clocations_count].merged = merged;
+ jstate->clocations[jstate->clocations_count].magnitude = magnitude;
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
}
@@ -243,24 +248,26 @@ RecordConstLocation(JumbleState *jstate, int location, bool merged)
* Verify if the provided list contains could be merged down, which means it
* contains only constant expressions.
*
- * Return value indicates if merging is possible.
+ * Return value is the order of magnitude (i.e. how many digits it has) for
+ * length of the list (to use for representation purposes later on) if merging
+ * is possible, otherwise zero.
*
* Note that this function searches only for explicit Const nodes and does not
* try to simplify expressions.
*/
-static bool
+static int
IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
{
ListCell *temp;
Node *firstExpr = NULL;
if (elements == NULL)
- return false;
+ return 0;
if (!query_id_const_merge)
{
/* Merging is disabled, process everything one by one */
- return false;
+ return 0;
}
firstExpr = linitial(elements);
@@ -274,26 +281,26 @@ IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
{
foreach(temp, elements)
if (!IsA(lfirst(temp), Const))
- return false;
+ return 0;
*firstConst = (Const *) firstExpr;
*lastConst = llast_node(Const, elements);
- return true;
+ return decimalLength32(elements->length);
}
/*
* If we end up here, it means no constants merging is possible, process
* the list as usual.
*/
- return false;
+ return 0;
}
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
#define JUMBLE_ELEMENTS(list) \
_jumbleElements(jstate, (List *) expr->list)
-#define JUMBLE_LOCATION(location, merged) \
- RecordConstLocation(jstate, expr->location, merged)
+#define JUMBLE_LOCATION(location, magnitude) \
+ RecordConstLocation(jstate, expr->location, magnitude)
#define JUMBLE_FIELD(item) \
AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
#define JUMBLE_FIELD_SINGLE(item) \
@@ -310,10 +317,19 @@ static void
_jumbleElements(JumbleState *jstate, List *elements)
{
Const *first, *last;
- if(IsMergeableConstList(elements, &first, &last))
+ int magnitude = IsMergeableConstList(elements, &first, &last);
+
+ if (magnitude)
{
- RecordConstLocation(jstate, first->location, true);
- RecordConstLocation(jstate, last->location, true);
+ RecordConstLocation(jstate, first->location, magnitude);
+ RecordConstLocation(jstate, last->location, magnitude);
+
+ /*
+ * After merging constants down we end up with only two constants, the
+ * first and the last one. To distinguish the order of magnitute behind
+ * merged constants, add its value into the jumble.
+ */
+ JUMBLE_FIELD_SINGLE(magnitude);
}
else
{
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 8daf0725d7f..551555494e0 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -26,9 +26,12 @@ typedef struct LocationLen
/*
* Indicates the constant represents the beginning or the end of a merged
- * constants interval.
+ * constants interval. The value shows how many constants were merged away
+ * (up to a power of 10), or in other words the order of manitude for
+ * number of merged constants (i.e. how many digits it has). Otherwise the
+ * value is 0, indicating that no merging was performed.
*/
- bool merged;
+ int magnitude;
} LocationLen;
/*
--
2.41.0
v19-0004-Introduce-query_id_const_merge_threshold.patchtext/x-diff; charset=us-asciiDownload
From 751999d19472f3427eb8f232ba4501eb8ac1f4d3 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Wed, 3 Apr 2024 20:03:45 +0200
Subject: [PATCH v19 4/4] Introduce query_id_const_merge_threshold
Replace query_id_const_merge with a threshold to allow merging only if
the number of elements is larger than specified value, which could be
configured using pg_stat_statements parameter query_id_const_merge_threshold.
---
.../pg_stat_statements/expected/merging.out | 68 ++++++++++++++++++-
.../pg_stat_statements/pg_stat_statements.c | 36 +++++-----
contrib/pg_stat_statements/sql/merging.sql | 21 +++++-
doc/src/sgml/pgstatstatements.sgml | 23 ++++---
src/backend/nodes/queryjumblefuncs.c | 23 +++++--
src/backend/postmaster/launch_backend.c | 6 +-
src/include/nodes/queryjumble.h | 4 +-
7 files changed, 137 insertions(+), 44 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
index 0cb4f67b8b7..552e248ff14 100644
--- a/contrib/pg_stat_statements/expected/merging.out
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -36,7 +36,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
(4 rows)
-- Normal scenario, too many simple constants for an IN query
-SET pg_stat_statements.query_id_const_merge = on;
+SET pg_stat_statements.query_id_const_merge_threshold = 1;
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
t
---
@@ -218,4 +218,68 @@ FROM cte;
--------
(0 rows)
-RESET pg_stat_statements.query_id_const_merge;
+-- With the threshold
+SET pg_stat_statements.query_id_const_merge_threshold = 10;
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 2
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+RESET pg_stat_statements.query_id_const_merge_threshold;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 00eec30feb1..569a74c4368 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -265,8 +265,8 @@ static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
static ProcessUtility_hook_type prev_ProcessUtility = NULL;
-/* An assign hook to keep query_id_const_merge in sync */
-static void pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra);
+/* An assign hook to keep query_id_const_merge_threshold in sync */
+static void pgss_query_id_const_merge_assign_hook(int newvalue, void *extra);
/* Links to shared memory state */
static pgssSharedState *pgss = NULL;
@@ -295,8 +295,8 @@ static bool pgss_track_utility = true; /* whether to track utility commands */
static bool pgss_track_planning = false; /* whether to track planning
* duration */
static bool pgss_save = true; /* whether to save stats across shutdown */
-static bool pgss_query_id_const_merge = false; /* request constants merging
- * when computing query_id */
+static int pgss_query_id_const_merge_threshold = 0; /* request constants merging
+ * when computing query_id */
#define pgss_enabled(level) \
(!IsParallelWorker() && \
@@ -459,20 +459,22 @@ _PG_init(void)
NULL,
NULL);
- DefineCustomBoolVariable("pg_stat_statements.query_id_const_merge",
- "Whether to merge constants in a list when computing query_id.",
- NULL,
- &pgss_query_id_const_merge,
- false,
- PGC_SUSET,
- 0,
- NULL,
- pgss_query_id_const_merge_assign_hook,
- NULL);
+ DefineCustomIntVariable("pg_stat_statements.query_id_const_merge_threshold",
+ "Whether to merge constants in a list when computing query_id.",
+ NULL,
+ &pgss_query_id_const_merge_threshold,
+ 0,
+ 0,
+ INT_MAX,
+ PGC_SUSET,
+ 0,
+ NULL,
+ pgss_query_id_const_merge_assign_hook,
+ NULL);
MarkGUCPrefixReserved("pg_stat_statements");
- SetQueryIdConstMerge(pgss_query_id_const_merge);
+ SetQueryIdConstMerge(pgss_query_id_const_merge_threshold);
/*
* Install hooks.
@@ -3064,10 +3066,10 @@ comp_location(const void *a, const void *b)
}
/*
- * Notify query jumbling about query_id_const_merge status
+ * Notify query jumbling about query_id_const_merge_threshold status
*/
static void
-pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra)
+pgss_query_id_const_merge_assign_hook(int newvalue, void *extra)
{
SetQueryIdConstMerge(newvalue);
}
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
index 657044faded..fedeb35b8f5 100644
--- a/contrib/pg_stat_statements/sql/merging.sql
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -15,7 +15,7 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-- Normal scenario, too many simple constants for an IN query
-SET pg_stat_statements.query_id_const_merge = on;
+SET pg_stat_statements.query_id_const_merge_threshold = 1;
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
SELECT * FROM test_merge WHERE id IN (1);
@@ -68,4 +68,21 @@ WITH cte AS (
SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
FROM cte;
-RESET pg_stat_statements.query_id_const_merge;
+-- With the threshold
+SET pg_stat_statements.query_id_const_merge_threshold = 10;
+
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+RESET pg_stat_statements.query_id_const_merge_threshold;
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index bac029430be..3a060935bff 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -605,12 +605,12 @@
In some cases, queries with visibly different texts might get merged into a
single <structname>pg_stat_statements</structname> entry. Normally this
will happen only for semantically equivalent queries, or if
- <varname>pg_stat_statements.query_id_const_merge</varname> is enabled and
- the only difference between queries is the length of an array with constants
- they contain:
+ <varname>pg_stat_statements.query_id_const_merge_threshold</varname> is
+ enabled and the only difference between queries is the length of an array
+ with constants they contain:
<screen>
-=# SET query_id_const_merge = on;
+=# SET query_id_const_merge_threshold = 1;
=# SELECT pg_stat_statements_reset();
=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
@@ -953,9 +953,9 @@ calls | 1
<varlistentry>
<term>
- <varname>pg_stat_statements.query_id_const_merge</varname> (<type>bool</type>)
+ <varname>pg_stat_statements.query_id_const_merge_threshold</varname> (<type>integer</type>)
<indexterm>
- <primary><varname>pg_stat_statements.query_id_const_merge</varname> configuration parameter</primary>
+ <primary><varname>pg_stat_statements.query_id_const_merge_threshold</varname> configuration parameter</primary>
</indexterm>
</term>
@@ -967,11 +967,12 @@ calls | 1
query will get multiple different identifiers, one for each occurrence
with an array of different lenght.
- If this parameter is on, an array of constants will contribute only the
- first element, the last element and the number of elements to the query
- identifier. It means two occurences of the same query, where the only
- difference is number of constants in the array, are going to get the
- same query identifier if the arrays are of similar length.
+ If this parameter is greater than 0, an array with more than
+ <varname>pg_stat_statements.query_id_const_merge_threshold</varname>
+ constants will contribute only the first element, the last element
+ and the number of elements to the query identifier. It means two
+ occurences of the same query, where the only difference is number of
+ constants in the array, are going to get the same query identifier.
Such queries are represented in form <literal>'(... [10-99 entries])'</literal>.
The parameter could be used to reduce amount of repeating data stored
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index a27202cb279..66696c87475 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -44,8 +44,8 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
-/* Whether to merge constants in a list when computing query_id */
-bool query_id_const_merge = false;
+/* Lower threshold for the list length to merge constants when computing query_id */
+int query_id_const_merge_threshold = 1;
/*
* True when compute_query_id is ON or AUTO, and a module requests them.
@@ -165,12 +165,14 @@ EnableQueryId(void)
* Controls constants merging for query identifier computation.
*
* Third-party plugins can use this function to enable/disable merging
- * of constants in a list when query identifier is computed.
+ * of constants in a list when query identifier is computed. The argument
+ * specifies the lower threshold for an array length, above which merging will
+ * be applied.
*/
void
-SetQueryIdConstMerge(bool value)
+SetQueryIdConstMerge(int threshold)
{
- query_id_const_merge = value;
+ query_id_const_merge_threshold = threshold;
}
/*
@@ -246,7 +248,8 @@ RecordConstLocation(JumbleState *jstate, int location, int magnitude)
/*
* Verify if the provided list contains could be merged down, which means it
- * contains only constant expressions.
+ * contains only constant expressions and the list contains more than
+ * query_id_const_merge_threshold elements.
*
* Return value is the order of magnitude (i.e. how many digits it has) for
* length of the list (to use for representation purposes later on) if merging
@@ -264,12 +267,18 @@ IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
if (elements == NULL)
return 0;
- if (!query_id_const_merge)
+ if (query_id_const_merge_threshold < 1)
{
/* Merging is disabled, process everything one by one */
return 0;
}
+ if (elements->length < query_id_const_merge_threshold)
+ {
+ /* The list is not large enough */
+ return 0;
+ }
+
firstExpr = linitial(elements);
/*
diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c
index 5bb2f00c6ef..84b214a5952 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -122,7 +122,7 @@ typedef struct
bool redirection_done;
bool IsBinaryUpgrade;
bool query_id_enabled;
- bool query_id_const_merge;
+ int query_id_const_merge_threshold;
int max_safe_fds;
int MaxBackends;
#ifdef WIN32
@@ -743,7 +743,7 @@ save_backend_variables(BackendParameters *param, ClientSocket *client_sock,
param->redirection_done = redirection_done;
param->IsBinaryUpgrade = IsBinaryUpgrade;
param->query_id_enabled = query_id_enabled;
- param->query_id_const_merge = query_id_const_merge;
+ param->query_id_const_merge_threshold = query_id_const_merge_threshold;
param->max_safe_fds = max_safe_fds;
param->MaxBackends = MaxBackends;
@@ -1002,7 +1002,7 @@ restore_backend_variables(BackendParameters *param)
redirection_done = param->redirection_done;
IsBinaryUpgrade = param->IsBinaryUpgrade;
query_id_enabled = param->query_id_enabled;
- query_id_const_merge = param->query_id_const_merge;
+ query_id_const_merge_threshold = param->query_id_const_merge_threshold;
max_safe_fds = param->max_safe_fds;
MaxBackends = param->MaxBackends;
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 551555494e0..ae5907aed84 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -74,10 +74,10 @@ extern PGDLLIMPORT int compute_query_id;
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query);
extern void EnableQueryId(void);
-extern void SetQueryIdConstMerge(bool value);
+extern void SetQueryIdConstMerge(int threshold);
extern PGDLLIMPORT bool query_id_enabled;
-extern PGDLLIMPORT bool query_id_const_merge;
+extern PGDLLIMPORT int query_id_const_merge_threshold;
/*
* Returns whether query identifier computation has been enabled, either
--
2.41.0
Hi,
In <20240404143514.a26f7ttxrbdfc73a@erthalion.local>
"Re: pg_stat_statements and "IN" conditions" on Thu, 4 Apr 2024 16:35:14 +0200,
Dmitry Dolgov <9erthalion6@gmail.com> wrote:
Here is the rebased version.
Thanks. I'm not familiar with this code base but I've
reviewed these patches because I'm interested in this
feature too.
0001:
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c index be823a7f8fa..e9473def361 100644 --- a/src/backend/nodes/queryjumblefuncs.c +++ b/src/backend/nodes/queryjumblefuncs.c@@ -212,15 +233,67 @@ RecordConstLocation(JumbleState *jstate, int location) ... +static bool +IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst) +{ + ListCell *temp; + Node *firstExpr = NULL; + + if (elements == NULL)
"elements == NIL" will be better for List.
+static void +_jumbleElements(JumbleState *jstate, List *elements) +{ + Const *first, *last; + if(IsMergeableConstList(elements, &first, &last))
A space is missing between "if" and "(".
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index aa727e722cc..cf4f900d4ed 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -1333,7 +1333,7 @@ typedef struct ArrayExpr /* common type of array elements */ Oid element_typeid pg_node_attr(query_jumble_ignore); /* the array elements or sub-arrays */ - List *elements; + List *elements pg_node_attr(query_jumble_merge);
Should we also update the pg_node_attr() comment for
query_jumble_merge in nodes.h?
0003:
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index d7841b51cc9..00eec30feb1 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c ... @@ -2883,12 +2886,22 @@ generate_normalized_query(JumbleState *jstate, const char *query, /* The firsts merged constant */ else if (!skip) { + static const uint32 powers_of_ten[] = { + 1, 10, 100, + 1000, 10000, 100000, + 1000000, 10000000, 100000000, + 1000000000 + }; + int lower_merged = powers_of_ten[magnitude - 1]; + int upper_merged = powers_of_ten[magnitude];
How about adding a reverse function of decimalLength32() to
numutils.h and use it here?
- n_quer_loc += sprintf(norm_query + n_quer_loc, "..."); + n_quer_loc += sprintf(norm_query + n_quer_loc, "... [%d-%d entries]", + lower_merged, upper_merged - 1);
Do we still have enough space in norm_query for this change?
It seems that norm_query expects up to 10 additional bytes
per jstate->clocations[i].
It seems that we can merge 0001, 0003 and 0004 to one patch.
(Sorry. I haven't read all discussions yet. If we already
discuss this, sorry for this noise.)
Thanks,
--
kou
On Mon, Apr 15, 2024 at 06:09:29PM +0900, Sutou Kouhei wrote:
Thanks. I'm not familiar with this code base but I've
reviewed these patches because I'm interested in this
feature too.
Thanks for the review! The commentaries for the first patch make sense
to me, will apply.
0003:
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index d7841b51cc9..00eec30feb1 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c ... @@ -2883,12 +2886,22 @@ generate_normalized_query(JumbleState *jstate, const char *query, /* The firsts merged constant */ else if (!skip) { + static const uint32 powers_of_ten[] = { + 1, 10, 100, + 1000, 10000, 100000, + 1000000, 10000000, 100000000, + 1000000000 + }; + int lower_merged = powers_of_ten[magnitude - 1]; + int upper_merged = powers_of_ten[magnitude];How about adding a reverse function of decimalLength32() to
numutils.h and use it here?
I was pondering that at some point, but eventually decided to keep it
this way, because:
* the use case is quite specific, I can't image it's being used anywhere
else
* it would not be strictly reverse, as the transformation itself is not
reversible in the strict sense
- n_quer_loc += sprintf(norm_query + n_quer_loc, "..."); + n_quer_loc += sprintf(norm_query + n_quer_loc, "... [%d-%d entries]", + lower_merged, upper_merged - 1);Do we still have enough space in norm_query for this change?
It seems that norm_query expects up to 10 additional bytes
per jstate->clocations[i].
As far as I understand there should be enough space, because we're going
to replace at least 10 constants with one merge record. But it's a good
point, this should be called out in the commentary explaining why 10
additional bytes are added.
It seems that we can merge 0001, 0003 and 0004 to one patch.
(Sorry. I haven't read all discussions yet. If we already
discuss this, sorry for this noise.)
There is a certain disagreement about which portion of this feature
makes sense to go with first, thus I think keeping all options open is a
good idea. In the end a committer can squash the patches if needed.
On Tue, Apr 23, 2024 at 10:18:15AM +0200, Dmitry Dolgov wrote:
On Mon, Apr 15, 2024 at 06:09:29PM +0900, Sutou Kouhei wrote:
Thanks. I'm not familiar with this code base but I've
reviewed these patches because I'm interested in this
feature too.Thanks for the review! The commentaries for the first patch make sense
to me, will apply.
Here is the new version. It turned out you were right about memory for
the normalized query, if the number of constants goes close to INT_MAX,
there were indeed not enough allocated. I've added a fix for this on top
of the applied changes, and also improved readability for
pg_stat_statements part.
Attachments:
v20-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/x-diff; charset=us-asciiDownload
From 324707496d7ec9a71b16f58d8df25e957e41c073 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Wed, 3 Apr 2024 20:02:51 +0200
Subject: [PATCH v20 1/4] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on the number of parameters, because every element of
ArrayExpr is jumbled. In certain situations it's undesirable, especially
if the list becomes too large.
Make an array of Const expressions contribute only the first/last
elements to the jumble hash. Allow to enable this behavior via the new
pg_stat_statements parameter query_id_const_merge with the default value off.
Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane,
Michael Paquier, Sergei Kornilov, Alvaro Herrera, David Geier, Sutou Kouhei
Tested-by: Chengxi Sun, Yasuo Honda
---
contrib/pg_stat_statements/Makefile | 2 +-
.../pg_stat_statements/expected/merging.out | 167 ++++++++++++++++++
contrib/pg_stat_statements/meson.build | 1 +
.../pg_stat_statements/pg_stat_statements.c | 74 +++++++-
contrib/pg_stat_statements/sql/merging.sql | 58 ++++++
doc/src/sgml/pgstatstatements.sgml | 57 +++++-
src/backend/nodes/gen_node_support.pl | 21 ++-
src/backend/nodes/queryjumblefuncs.c | 105 ++++++++++-
src/backend/postmaster/launch_backend.c | 3 +
src/backend/utils/misc/postgresql.conf.sample | 1 -
src/include/nodes/nodes.h | 3 +
src/include/nodes/primnodes.h | 2 +-
src/include/nodes/queryjumble.h | 9 +-
13 files changed, 478 insertions(+), 25 deletions(-)
create mode 100644 contrib/pg_stat_statements/expected/merging.out
create mode 100644 contrib/pg_stat_statements/sql/merging.sql
diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile
index 414a30856e4..03a62d685f3 100644
--- a/contrib/pg_stat_statements/Makefile
+++ b/contrib/pg_stat_statements/Makefile
@@ -19,7 +19,7 @@ LDFLAGS_SL += $(filter -lm, $(LIBS))
REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_statements/pg_stat_statements.conf
REGRESS = select dml cursors utility level_tracking planning \
- user_activity wal entry_timestamp cleanup oldextversions
+ user_activity wal entry_timestamp cleanup oldextversions merging
# Disabled because these tests require "shared_preload_libraries=pg_stat_statements",
# which typical installcheck users do not have (e.g. buildfarm clients).
NO_INSTALLCHECK = 1
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
new file mode 100644
index 00000000000..1e58283afe8
--- /dev/null
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -0,0 +1,167 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(4 rows)
+
+-- Normal scenario, too many simple constants for an IN query
+SET pg_stat_statements.query_id_const_merge = on;
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 4
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) and data = $3 | 3
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- No constants simplification
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 + $2, $3 + $4, $5 + $6, $7 + $8, $9 + $10, $11 + $12, $13 + $14, $15 + $16, $17 + $18) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Numeric type
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Test constants evaluation, verifies a tricky part to make sure there are no
+-- issues in the merging implementation
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result
+--------
+(0 rows)
+
+RESET pg_stat_statements.query_id_const_merge;
diff --git a/contrib/pg_stat_statements/meson.build b/contrib/pg_stat_statements/meson.build
index 9bfc9657e1a..6bd62f92b9a 100644
--- a/contrib/pg_stat_statements/meson.build
+++ b/contrib/pg_stat_statements/meson.build
@@ -52,6 +52,7 @@ tests += {
'entry_timestamp',
'cleanup',
'oldextversions',
+ 'merging',
],
'regress_args': ['--temp-config', files('pg_stat_statements.conf')],
# Disabled because these tests require
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 67cec865ba1..0e8d42e2054 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -265,6 +265,9 @@ static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
static ProcessUtility_hook_type prev_ProcessUtility = NULL;
+/* An assign hook to keep query_id_const_merge in sync */
+static void pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra);
+
/* Links to shared memory state */
static pgssSharedState *pgss = NULL;
static HTAB *pgss_hash = NULL;
@@ -292,7 +295,8 @@ static bool pgss_track_utility = true; /* whether to track utility commands */
static bool pgss_track_planning = false; /* whether to track planning
* duration */
static bool pgss_save = true; /* whether to save stats across shutdown */
-
+static bool pgss_query_id_const_merge = false; /* request constants merging
+ * when computing query_id */
#define pgss_enabled(level) \
(!IsParallelWorker() && \
@@ -455,8 +459,21 @@ _PG_init(void)
NULL,
NULL);
+ DefineCustomBoolVariable("pg_stat_statements.query_id_const_merge",
+ "Whether to merge constants in a list when computing query_id.",
+ NULL,
+ &pgss_query_id_const_merge,
+ false,
+ PGC_SUSET,
+ 0,
+ NULL,
+ pgss_query_id_const_merge_assign_hook,
+ NULL);
+
MarkGUCPrefixReserved("pg_stat_statements");
+ SetQueryIdConstMerge(pgss_query_id_const_merge);
+
/*
* Install hooks.
*/
@@ -2808,6 +2825,10 @@ generate_normalized_query(JumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool merged_interval = false; /* Currently processed constants
+ belong to a merged constants
+ interval. */
+
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -2831,7 +2852,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
{
int off, /* Offset from start for cur tok */
tok_len; /* Length (in bytes) of that tok */
-
off = jstate->clocations[i].location;
/* Adjust recorded location if we're dealing with partial string */
off -= query_loc;
@@ -2846,13 +2866,44 @@ generate_normalized_query(JumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ /* Normal path, non merged constant */
+ if (!jstate->clocations[i].merged)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id);
+ /* In case previous constants were merged away, stop doing that */
+ merged_interval = false;
+ }
+ else if (!merged_interval)
+ {
+ /*
+ * We are not inside a merged interval yet, which means it is the
+ * the first merged constant.
+ *
+ * A merged constants interval must be represented via two
+ * constants with the merged flag. Currently we are at the first,
+ * verify there is another one.
+ */
+ Assert(i + 1 < jstate->clocations_count);
+ Assert(jstate->clocations[i + 1].merged);
+
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* Remember to skip until a non merged constant appears */
+ merged_interval = true;
+
+ /* Mark the interval in the normalized query */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ }
+
+ /* Otherwise the constant is merged away, move forward */
quer_loc = off + tok_len;
last_off = off;
last_tok_len = tok_len;
@@ -3010,3 +3061,12 @@ comp_location(const void *a, const void *b)
return pg_cmp_s32(l, r);
}
+
+/*
+ * Notify query jumbling about query_id_const_merge status
+ */
+static void
+pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra)
+{
+ SetQueryIdConstMerge(newvalue);
+}
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
new file mode 100644
index 00000000000..71985bb1cd9
--- /dev/null
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -0,0 +1,58 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal scenario, too many simple constants for an IN query
+SET pg_stat_statements.query_id_const_merge = on;
+
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- No constants simplification
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+SELECT * FROM test_merge WHERE id IN (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Numeric type
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test constants evaluation, verifies a tricky part to make sure there are no
+-- issues in the merging implementation
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+RESET pg_stat_statements.query_id_const_merge;
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 9b0aff73b1e..cbe1f3e1715 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -603,11 +603,29 @@
<para>
In some cases, queries with visibly different texts might get merged into a
- single <structname>pg_stat_statements</structname> entry. Normally this will happen
- only for semantically equivalent queries, but there is a small chance of
- hash collisions causing unrelated queries to be merged into one entry.
- (This cannot happen for queries belonging to different users or databases,
- however.)
+ single <structname>pg_stat_statements</structname> entry. Normally this
+ will happen only for semantically equivalent queries, or if
+ <varname>pg_stat_statements.query_id_const_merge</varname> is enabled and
+ the only difference between queries is the length of an array with constants
+ they contain:
+
+<screen>
+=# SET query_id_const_merge = on;
+=# SELECT pg_stat_statements_reset();
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
+=# SELECT query, calls FROM pg_stat_statements;
+-[ RECORD 1 ]------------------------------
+query | SELECT * FROM test WHERE a IN (...)
+calls | 2
+-[ RECORD 2 ]------------------------------
+query | SELECT pg_stat_statements_reset()
+calls | 1
+</screen>
+
+ But there is a small chance of hash collisions causing unrelated queries to
+ be merged into one entry. (This cannot happen for queries belonging to
+ different users or databases, however.)
</para>
<para>
@@ -938,6 +956,35 @@
</para>
</listitem>
</varlistentry>
+
+ <varlistentry>
+ <term>
+ <varname>pg_stat_statements.query_id_const_merge</varname> (<type>bool</type>)
+ <indexterm>
+ <primary><varname>pg_stat_statements.query_id_const_merge</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+
+ <listitem>
+ <para>
+ Specifies how an array of constants (e.g. for an "IN" clause)
+ contributes to the query identifier computation. Normally every element
+ of an array contributes to the query identifier, which means the same
+ query will get multiple different identifiers, one for each occurrence
+ with an array of different lenght.
+
+ If this parameter is on, an array of constants will contribute only the
+ first and the last elements to the query identifier. It means two
+ occurences of the same query, where the only difference is number of
+ constants in the array, are going to get the same query identifier.
+ Such queries are represented in form <literal>'(...)'</literal>.
+
+ The parameter could be used to reduce amount of repeating data stored
+ via <structname>pg_stat_statements</structname>. The default value is off.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
<para>
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 81df3bdf95f..d2a276c303a 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -475,6 +475,7 @@ foreach my $infile (@ARGV)
equal_ignore_if_zero
query_jumble_ignore
query_jumble_location
+ query_jumble_merge
read_write_ignore
write_only_relids
write_only_nondefault_pathtarget
@@ -1282,6 +1283,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
my @a = @{ $node_type_info{$n}->{field_attrs}{$f} };
my $query_jumble_ignore = $struct_no_query_jumble;
my $query_jumble_location = 0;
+ my $query_jumble_merge = 0;
# extract per-field attributes
foreach my $a (@a)
@@ -1294,21 +1296,34 @@ _jumble${n}(JumbleState *jstate, Node *node)
{
$query_jumble_location = 1;
}
+ elsif ($a eq 'query_jumble_merge')
+ {
+ $query_jumble_merge = 1;
+ }
}
# node type
if (($t =~ /^(\w+)\*$/ or $t =~ /^struct\s+(\w+)\*$/)
and elem $1, @node_types)
{
- print $jff "\tJUMBLE_NODE($f);\n"
- unless $query_jumble_ignore;
+ # Merge constants if requested.
+ if ($query_jumble_merge)
+ {
+ print $jff "\tJUMBLE_ELEMENTS($f);\n"
+ unless $query_jumble_ignore;
+ }
+ else
+ {
+ print $jff "\tJUMBLE_NODE($f);\n"
+ unless $query_jumble_ignore;
+ }
}
elsif ($t eq 'ParseLoc')
{
# Track the node's location only if directly requested.
if ($query_jumble_location)
{
- print $jff "\tJUMBLE_LOCATION($f);\n"
+ print $jff "\tJUMBLE_LOCATION($f, false);\n"
unless $query_jumble_ignore;
}
}
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index be823a7f8fa..9e17767100d 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -42,6 +42,9 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+/* Whether to merge constants in a list when computing query_id */
+bool query_id_const_merge = false;
+
/*
* True when compute_query_id is ON or AUTO, and a module requests them.
*
@@ -53,8 +56,10 @@ bool query_id_enabled = false;
static void AppendJumble(JumbleState *jstate,
const unsigned char *item, Size size);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void RecordConstLocation(JumbleState *jstate,
+ int location, bool merged);
static void _jumbleNode(JumbleState *jstate, Node *node);
+static void _jumbleElements(JumbleState *jstate, List *elements);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
static void _jumbleList(JumbleState *jstate, Node *node);
static void _jumbleRangeTblEntry(JumbleState *jstate, Node *node);
@@ -154,6 +159,18 @@ EnableQueryId(void)
query_id_enabled = true;
}
+/*
+ * Controls constants merging for query identifier computation.
+ *
+ * Third-party plugins can use this function to enable/disable merging
+ * of constants in a list when query identifier is computed.
+ */
+void
+SetQueryIdConstMerge(bool value)
+{
+ query_id_const_merge = value;
+}
+
/*
* AppendJumble: Append a value that is substantive in a given query to
* the current jumble.
@@ -192,11 +209,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
}
/*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked.
+ *
+ * Merged argument signals that the constant represents the first or the last
+ * element in a series of merged constants, and everything but the first/last
+ * element contributes nothing to the jumble hash.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -212,15 +233,67 @@ RecordConstLocation(JumbleState *jstate, int location)
}
jstate->clocations[jstate->clocations_count].location = location;
/* initialize lengths to -1 to simplify third-party module usage */
+ jstate->clocations[jstate->clocations_count].merged = merged;
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
}
}
+/*
+ * Verify if the provided list contains could be merged down, which means it
+ * contains only constant expressions.
+ *
+ * Return value indicates if merging is possible.
+ *
+ * Note that this function searches only for explicit Const nodes and does not
+ * try to simplify expressions.
+ */
+static bool
+IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
+{
+ ListCell *temp;
+ Node *firstExpr = NULL;
+
+ if (elements == NIL)
+ return false;
+
+ if (!query_id_const_merge)
+ {
+ /* Merging is disabled, process everything one by one */
+ return false;
+ }
+
+ firstExpr = linitial(elements);
+
+ /*
+ * If the first expression is a constant, verify if the following elements
+ * are constants as well. If yes, the list is eligible for merging, and the
+ * order of magnitude need to be calculated.
+ */
+ if (IsA(firstExpr, Const))
+ {
+ foreach(temp, elements)
+ if (!IsA(lfirst(temp), Const))
+ return false;
+
+ *firstConst = (Const *) firstExpr;
+ *lastConst = llast_node(Const, elements);
+ return true;
+ }
+
+ /*
+ * If we end up here, it means no constants merging is possible, process
+ * the list as usual.
+ */
+ return false;
+}
+
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
-#define JUMBLE_LOCATION(location) \
- RecordConstLocation(jstate, expr->location)
+#define JUMBLE_ELEMENTS(list) \
+ _jumbleElements(jstate, (List *) expr->list)
+#define JUMBLE_LOCATION(location, merged) \
+ RecordConstLocation(jstate, expr->location, merged)
#define JUMBLE_FIELD(item) \
AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
#define JUMBLE_FIELD_SINGLE(item) \
@@ -233,6 +306,26 @@ do { \
#include "queryjumblefuncs.funcs.c"
+static void
+_jumbleElements(JumbleState *jstate, List *elements)
+{
+ Const *first, *last;
+ if (IsMergeableConstList(elements, &first, &last))
+ {
+ /*
+ * Both first and last constants have to be recorded. The first one
+ * will indicate the merged interval, the last one will tell us the
+ * length of the interval within the query text.
+ */
+ RecordConstLocation(jstate, first->location, true);
+ RecordConstLocation(jstate, last->location, true);
+ }
+ else
+ {
+ _jumbleNode(jstate, (Node *) elements);
+ }
+}
+
static void
_jumbleNode(JumbleState *jstate, Node *node)
{
diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c
index 4e9dde1517b..8bd73929183 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -122,6 +122,7 @@ typedef struct
bool redirection_done;
bool IsBinaryUpgrade;
bool query_id_enabled;
+ bool query_id_const_merge;
int max_safe_fds;
int MaxBackends;
#ifdef WIN32
@@ -742,6 +743,7 @@ save_backend_variables(BackendParameters *param, ClientSocket *client_sock,
param->redirection_done = redirection_done;
param->IsBinaryUpgrade = IsBinaryUpgrade;
param->query_id_enabled = query_id_enabled;
+ param->query_id_const_merge = query_id_const_merge;
param->max_safe_fds = max_safe_fds;
param->MaxBackends = MaxBackends;
@@ -1000,6 +1002,7 @@ restore_backend_variables(BackendParameters *param)
redirection_done = param->redirection_done;
IsBinaryUpgrade = param->IsBinaryUpgrade;
query_id_enabled = param->query_id_enabled;
+ query_id_const_merge = param->query_id_const_merge;
max_safe_fds = param->max_safe_fds;
MaxBackends = param->MaxBackends;
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 2166ea4a87a..74a3c406dc2 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -651,7 +651,6 @@
#log_planner_stats = off
#log_executor_stats = off
-
#------------------------------------------------------------------------------
# AUTOVACUUM
#------------------------------------------------------------------------------
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 855009fd6e2..5c117609394 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -108,6 +108,9 @@ typedef enum NodeTag
* - query_jumble_location: Mark the field as a location to track. This is
* only allowed for integer fields that include "location" in their name.
*
+ * - query_jumble_merge: Allow to merge the field values for the query
+ * jumbling.
+ *
* - read_as(VALUE): In nodeRead(), replace the field's value with VALUE.
*
* - read_write_ignore: Ignore the field for read/write. This is only allowed
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 247cecb4b45..6b15c51a6d8 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1347,7 +1347,7 @@ typedef struct ArrayExpr
/* common type of array elements */
Oid element_typeid pg_node_attr(query_jumble_ignore);
/* the array elements or sub-arrays */
- List *elements;
+ List *elements pg_node_attr(query_jumble_merge);
/* true if elements are sub-arrays */
bool multidims pg_node_attr(query_jumble_ignore);
/* token location, or -1 if unknown */
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index f1c55c8067f..8daf0725d7f 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -23,6 +23,12 @@ typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+
+ /*
+ * Indicates the constant represents the beginning or the end of a merged
+ * constants interval.
+ */
+ bool merged;
} LocationLen;
/*
@@ -62,12 +68,13 @@ enum ComputeQueryIdType
/* GUC parameters */
extern PGDLLIMPORT int compute_query_id;
-
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query);
extern void EnableQueryId(void);
+extern void SetQueryIdConstMerge(bool value);
extern PGDLLIMPORT bool query_id_enabled;
+extern PGDLLIMPORT bool query_id_const_merge;
/*
* Returns whether query identifier computation has been enabled, either
base-commit: 5bcbe9813bf91bcf14ef3a580162f1600dd3d1d4
--
2.41.0
v20-0002-Reusable-decimalLength-functions.patchtext/x-diff; charset=us-asciiDownload
From 1e71323bbeb5d5f2ea6ff06896f58fba5252d14e Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Wed, 3 Apr 2024 20:03:08 +0200
Subject: [PATCH v20 2/4] Reusable decimalLength functions
Move out decimalLength functions to reuse in the following patch.
Reviewed-by: Sutou Kouhei
Tested-by: Yasuo Honda
---
src/backend/utils/adt/numutils.c | 48 +----------------------
src/include/utils/numutils.h | 67 ++++++++++++++++++++++++++++++++
2 files changed, 68 insertions(+), 47 deletions(-)
create mode 100644 src/include/utils/numutils.h
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index adc1e8a4cba..adfad9f89fa 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -20,6 +20,7 @@
#include "port/pg_bitutils.h"
#include "utils/builtins.h"
+#include "utils/numutils.h"
/*
* A table of all two-digit numbers. This is used to speed up decimal digit
@@ -37,53 +38,6 @@ static const char DIGIT_TABLE[200] =
"80" "81" "82" "83" "84" "85" "86" "87" "88" "89"
"90" "91" "92" "93" "94" "95" "96" "97" "98" "99";
-/*
- * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
- */
-static inline int
-decimalLength32(const uint32 v)
-{
- int t;
- static const uint32 PowersOfTen[] = {
- 1, 10, 100,
- 1000, 10000, 100000,
- 1000000, 10000000, 100000000,
- 1000000000
- };
-
- /*
- * Compute base-10 logarithm by dividing the base-2 logarithm by a
- * good-enough approximation of the base-2 logarithm of 10
- */
- t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
- return t + (v >= PowersOfTen[t]);
-}
-
-static inline int
-decimalLength64(const uint64 v)
-{
- int t;
- static const uint64 PowersOfTen[] = {
- UINT64CONST(1), UINT64CONST(10),
- UINT64CONST(100), UINT64CONST(1000),
- UINT64CONST(10000), UINT64CONST(100000),
- UINT64CONST(1000000), UINT64CONST(10000000),
- UINT64CONST(100000000), UINT64CONST(1000000000),
- UINT64CONST(10000000000), UINT64CONST(100000000000),
- UINT64CONST(1000000000000), UINT64CONST(10000000000000),
- UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
- UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
- UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
- };
-
- /*
- * Compute base-10 logarithm by dividing the base-2 logarithm by a
- * good-enough approximation of the base-2 logarithm of 10
- */
- t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
- return t + (v >= PowersOfTen[t]);
-}
-
static const int8 hexlookup[128] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
diff --git a/src/include/utils/numutils.h b/src/include/utils/numutils.h
new file mode 100644
index 00000000000..876e64f2df9
--- /dev/null
+++ b/src/include/utils/numutils.h
@@ -0,0 +1,67 @@
+/*-------------------------------------------------------------------------
+ *
+ * numutils.h
+ * Decimal length functions for numutils.c
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/utils/numutils.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef NUMUTILS_H
+#define NUMUTILS_H
+
+#include "common/int.h"
+#include "port/pg_bitutils.h"
+
+/*
+ * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
+ */
+static inline int
+decimalLength32(const uint32 v)
+{
+ int t;
+ static const uint32 PowersOfTen[] = {
+ 1, 10, 100,
+ 1000, 10000, 100000,
+ 1000000, 10000000, 100000000,
+ 1000000000
+ };
+
+ /*
+ * Compute base-10 logarithm by dividing the base-2 logarithm by a
+ * good-enough approximation of the base-2 logarithm of 10
+ */
+ t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
+ return t + (v >= PowersOfTen[t]);
+}
+
+static inline int
+decimalLength64(const uint64 v)
+{
+ int t;
+ static const uint64 PowersOfTen[] = {
+ UINT64CONST(1), UINT64CONST(10),
+ UINT64CONST(100), UINT64CONST(1000),
+ UINT64CONST(10000), UINT64CONST(100000),
+ UINT64CONST(1000000), UINT64CONST(10000000),
+ UINT64CONST(100000000), UINT64CONST(1000000000),
+ UINT64CONST(10000000000), UINT64CONST(100000000000),
+ UINT64CONST(1000000000000), UINT64CONST(10000000000000),
+ UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
+ UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
+ UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
+ };
+
+ /*
+ * Compute base-10 logarithm by dividing the base-2 logarithm by a
+ * good-enough approximation of the base-2 logarithm of 10
+ */
+ t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
+ return t + (v >= PowersOfTen[t]);
+}
+
+#endif /* NUMUTILS_H */
--
2.41.0
v20-0003-Merge-constants-in-ArrayExpr-into-groups.patchtext/x-diff; charset=us-asciiDownload
From eb7364a5a9ccd476ba0436ad991f95f66efd6a55 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Sun, 12 May 2024 11:51:10 +0200
Subject: [PATCH v20 3/4] Merge constants in ArrayExpr into groups
Using query_id_const_merge only first/last element in an ArrayExpr will
be used to compute query id. Extend this to take into account number of
elements, and merge constants into groups based on it. Resulting groups
are powers of 10, i.e. 1 to 9, 10 to 99, etc.
Reviewed-by: Sutou Kouhei
Tested-by: Yasuo Honda
---
.../pg_stat_statements/expected/merging.out | 84 +++++++++++++++----
.../pg_stat_statements/pg_stat_statements.c | 29 +++++--
contrib/pg_stat_statements/sql/merging.sql | 13 +++
doc/src/sgml/pgstatstatements.sgml | 11 +--
src/backend/nodes/queryjumblefuncs.c | 55 ++++++++----
src/include/nodes/queryjumble.h | 10 ++-
6 files changed, 157 insertions(+), 45 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
index 1e58283afe8..0cb4f67b8b7 100644
--- a/contrib/pg_stat_statements/expected/merging.out
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -54,11 +54,11 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3);
(0 rows)
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
- query | calls
-----------------------------------------------------+-------
- SELECT * FROM test_merge WHERE id IN ($1) | 1
- SELECT * FROM test_merge WHERE id IN (...) | 1
- SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ query | calls
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
(3 rows)
SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
@@ -80,7 +80,60 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
query | calls
------------------------------------------------------------------------+-------
SELECT * FROM test_merge WHERE id IN ($1) | 1
- SELECT * FROM test_merge WHERE id IN (...) | 4
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 2
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 2
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(5 rows)
+
+-- Second order of magnitude, brace yourself
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+--------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [100-999 entries]) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 1
SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
(4 rows)
@@ -108,11 +161,12 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and dat
(0 rows)
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
- query | calls
-----------------------------------------------------------+-------
- SELECT * FROM test_merge WHERE id IN (...) and data = $3 | 3
- SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
-(2 rows)
+ query | calls
+--------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) and data = $3 | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) and data = $3 | 2
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
-- No constants simplification
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
@@ -147,10 +201,10 @@ SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
(0 rows)
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
- query | calls
-----------------------------------------------------+-------
- SELECT * FROM test_merge_numeric WHERE id IN (...) | 1
- SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ query | calls
+--------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (... [10-99 entries]) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
(2 rows)
-- Test constants evaluation, verifies a tricky part to make sure there are no
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 0e8d42e2054..0ade4cf515f 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2828,6 +2828,8 @@ generate_normalized_query(JumbleState *jstate, const char *query,
bool merged_interval = false; /* Currently processed constants
belong to a merged constants
interval. */
+ int magnitude; /* Order of magnitute for number of merged
+ constants */
/*
@@ -2842,8 +2844,13 @@ generate_normalized_query(JumbleState *jstate, const char *query,
* certainly isn't more than 11 bytes, even if n reaches INT_MAX. We
* could refine that limit based on the max value of n for the current
* query, but it hardly seems worth any extra effort to do so.
+ *
+ * On top of that, each pair of $n symbols representing a merged constants
+ * interval will be decorated with the explanationary text, adding 14
+ * bytes.
*/
- norm_query_buflen = query_len + jstate->clocations_count * 10;
+ norm_query_buflen = query_len + jstate->clocations_count * 10 +
+ jstate->clocations_merged_count * 14;
/* Allocate result buffer */
norm_query = palloc(norm_query_buflen + 1);
@@ -2868,7 +2875,8 @@ generate_normalized_query(JumbleState *jstate, const char *query,
Assert(len_to_wrt >= 0);
/* Normal path, non merged constant */
- if (!jstate->clocations[i].merged)
+ magnitude = jstate->clocations[i].magnitude;
+ if (magnitude == 0)
{
memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
n_quer_loc += len_to_wrt;
@@ -2885,13 +2893,23 @@ generate_normalized_query(JumbleState *jstate, const char *query,
/*
* We are not inside a merged interval yet, which means it is the
* the first merged constant.
- *
+ */
+ static const uint32 powers_of_ten[] = {
+ 1, 10, 100,
+ 1000, 10000, 100000,
+ 1000000, 10000000, 100000000,
+ 1000000000
+ };
+ int lower_merged = powers_of_ten[magnitude - 1];
+ int upper_merged = powers_of_ten[magnitude];
+
+ /*
* A merged constants interval must be represented via two
* constants with the merged flag. Currently we are at the first,
* verify there is another one.
*/
Assert(i + 1 < jstate->clocations_count);
- Assert(jstate->clocations[i + 1].merged);
+ Assert(jstate->clocations[i + 1].magnitude > 0);
memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
n_quer_loc += len_to_wrt;
@@ -2900,7 +2918,8 @@ generate_normalized_query(JumbleState *jstate, const char *query,
merged_interval = true;
/* Mark the interval in the normalized query */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "... [%d-%d entries]",
+ lower_merged, upper_merged - 1);
}
/* Otherwise the constant is merged away, move forward */
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
index 71985bb1cd9..657044faded 100644
--- a/contrib/pg_stat_statements/sql/merging.sql
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -27,6 +27,19 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+-- Second order of magnitude, brace yourself
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
-- More conditions in the query
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index cbe1f3e1715..12ffd021909 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -616,7 +616,7 @@
=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
=# SELECT query, calls FROM pg_stat_statements;
-[ RECORD 1 ]------------------------------
-query | SELECT * FROM test WHERE a IN (...)
+query | SELECT * FROM test WHERE a IN (... [10-99 entries])
calls | 2
-[ RECORD 2 ]------------------------------
query | SELECT pg_stat_statements_reset()
@@ -974,10 +974,11 @@ calls | 1
with an array of different lenght.
If this parameter is on, an array of constants will contribute only the
- first and the last elements to the query identifier. It means two
- occurences of the same query, where the only difference is number of
- constants in the array, are going to get the same query identifier.
- Such queries are represented in form <literal>'(...)'</literal>.
+ first element, the last element and the number of elements to the query
+ identifier. It means two occurences of the same query, where the only
+ difference is number of constants in the array, are going to get the
+ same query identifier if the arrays are of similar length.
+ Such queries are represented in form <literal>'(... [10-99 entries])'</literal>.
The parameter could be used to reduce amount of repeating data stored
via <structname>pg_stat_statements</structname>. The default value is off.
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index 9e17767100d..e9ede0acb70 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -37,6 +37,8 @@
#include "nodes/queryjumble.h"
#include "parser/scansup.h"
+#include "utils/numutils.h"
+
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
/* GUC parameters */
@@ -57,7 +59,7 @@ bool query_id_enabled = false;
static void AppendJumble(JumbleState *jstate,
const unsigned char *item, Size size);
static void RecordConstLocation(JumbleState *jstate,
- int location, bool merged);
+ int location, int magnitude);
static void _jumbleNode(JumbleState *jstate, Node *node);
static void _jumbleElements(JumbleState *jstate, List *elements);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
@@ -123,6 +125,7 @@ JumbleQuery(Query *query)
jstate->clocations = (LocationLen *)
palloc(jstate->clocations_buf_size * sizeof(LocationLen));
jstate->clocations_count = 0;
+ jstate->clocations_merged_count = 0;
jstate->highest_extern_param_id = 0;
/* Compute query ID and mark the Query node with it */
@@ -212,12 +215,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
* Record location of constant within query string of query tree that is
* currently being walked.
*
- * Merged argument signals that the constant represents the first or the last
- * element in a series of merged constants, and everything but the first/last
- * element contributes nothing to the jumble hash.
+ * Magnitude argument larger than zero signals that the constant represents the
+ * first or the last element in a series of merged constants, and everything
+ * but such first/last element will contribute nothing to the jumble hash. The
+ * magnitute value specifies order of magnitute (i.e. how many digits it has)
+ * for the number of elements in the series, to represent the fact of merging
+ * later on.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location, bool merged)
+RecordConstLocation(JumbleState *jstate, int location, int magnitude)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -232,10 +238,12 @@ RecordConstLocation(JumbleState *jstate, int location, bool merged)
sizeof(LocationLen));
}
jstate->clocations[jstate->clocations_count].location = location;
+ jstate->clocations[jstate->clocations_count].magnitude = magnitude;
/* initialize lengths to -1 to simplify third-party module usage */
- jstate->clocations[jstate->clocations_count].merged = merged;
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
+ if (magnitude > 0)
+ jstate->clocations_merged_count++;
}
}
@@ -243,24 +251,26 @@ RecordConstLocation(JumbleState *jstate, int location, bool merged)
* Verify if the provided list contains could be merged down, which means it
* contains only constant expressions.
*
- * Return value indicates if merging is possible.
+ * Return value is the order of magnitude (i.e. how many digits it has) for
+ * length of the list (to use for representation purposes later on) if merging
+ * is possible, otherwise zero.
*
* Note that this function searches only for explicit Const nodes and does not
* try to simplify expressions.
*/
-static bool
+static int
IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
{
ListCell *temp;
Node *firstExpr = NULL;
if (elements == NIL)
- return false;
+ return 0;
if (!query_id_const_merge)
{
/* Merging is disabled, process everything one by one */
- return false;
+ return 0;
}
firstExpr = linitial(elements);
@@ -274,26 +284,26 @@ IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
{
foreach(temp, elements)
if (!IsA(lfirst(temp), Const))
- return false;
+ return 0;
*firstConst = (Const *) firstExpr;
*lastConst = llast_node(Const, elements);
- return true;
+ return decimalLength32(elements->length);
}
/*
* If we end up here, it means no constants merging is possible, process
* the list as usual.
*/
- return false;
+ return 0;
}
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
#define JUMBLE_ELEMENTS(list) \
_jumbleElements(jstate, (List *) expr->list)
-#define JUMBLE_LOCATION(location, merged) \
- RecordConstLocation(jstate, expr->location, merged)
+#define JUMBLE_LOCATION(location, magnitude) \
+ RecordConstLocation(jstate, expr->location, magnitude)
#define JUMBLE_FIELD(item) \
AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
#define JUMBLE_FIELD_SINGLE(item) \
@@ -310,15 +320,24 @@ static void
_jumbleElements(JumbleState *jstate, List *elements)
{
Const *first, *last;
- if (IsMergeableConstList(elements, &first, &last))
+ int magnitude = IsMergeableConstList(elements, &first, &last);
+
+ if (magnitude)
{
/*
* Both first and last constants have to be recorded. The first one
* will indicate the merged interval, the last one will tell us the
* length of the interval within the query text.
*/
- RecordConstLocation(jstate, first->location, true);
- RecordConstLocation(jstate, last->location, true);
+ RecordConstLocation(jstate, first->location, magnitude);
+ RecordConstLocation(jstate, last->location, magnitude);
+
+ /*
+ * After merging constants down we end up with only two constants, the
+ * first and the last one. To distinguish the order of magnitute behind
+ * merged constants, add its value into the jumble.
+ */
+ JUMBLE_FIELD_SINGLE(magnitude);
}
else
{
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 8daf0725d7f..0e69e420b7f 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -26,9 +26,12 @@ typedef struct LocationLen
/*
* Indicates the constant represents the beginning or the end of a merged
- * constants interval.
+ * constants interval. The value shows how many constants were merged away
+ * (up to a power of 10), or in other words the order of manitude for
+ * number of merged constants (i.e. how many digits it has). Otherwise the
+ * value is 0, indicating that no merging was performed.
*/
- bool merged;
+ int magnitude;
} LocationLen;
/*
@@ -52,6 +55,9 @@ typedef struct JumbleState
/* Current number of valid entries in clocations array */
int clocations_count;
+ /* Current number of entries with merged constants interval */
+ int clocations_merged_count;
+
/* highest Param id we've seen, in order to start normalization correctly */
int highest_extern_param_id;
} JumbleState;
--
2.41.0
v20-0004-Introduce-query_id_const_merge_threshold.patchtext/x-diff; charset=us-asciiDownload
From a7dc2084e5fa73960756c1a0fbdf3d3e4f335380 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Wed, 3 Apr 2024 20:03:45 +0200
Subject: [PATCH v20 4/4] Introduce query_id_const_merge_threshold
Replace query_id_const_merge with a threshold to allow merging only if
the number of elements is larger than specified value, which could be
configured using pg_stat_statements parameter query_id_const_merge_threshold.
Reviewed-by: Sutou Kouhei
Tested-by: Yasuo Honda
---
.../pg_stat_statements/expected/merging.out | 68 ++++++++++++++++++-
.../pg_stat_statements/pg_stat_statements.c | 36 +++++-----
contrib/pg_stat_statements/sql/merging.sql | 21 +++++-
doc/src/sgml/pgstatstatements.sgml | 23 ++++---
src/backend/nodes/queryjumblefuncs.c | 23 +++++--
src/backend/postmaster/launch_backend.c | 6 +-
src/include/nodes/queryjumble.h | 4 +-
7 files changed, 137 insertions(+), 44 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
index 0cb4f67b8b7..552e248ff14 100644
--- a/contrib/pg_stat_statements/expected/merging.out
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -36,7 +36,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
(4 rows)
-- Normal scenario, too many simple constants for an IN query
-SET pg_stat_statements.query_id_const_merge = on;
+SET pg_stat_statements.query_id_const_merge_threshold = 1;
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
t
---
@@ -218,4 +218,68 @@ FROM cte;
--------
(0 rows)
-RESET pg_stat_statements.query_id_const_merge;
+-- With the threshold
+SET pg_stat_statements.query_id_const_merge_threshold = 10;
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 2
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+RESET pg_stat_statements.query_id_const_merge_threshold;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 0ade4cf515f..ffa832bbfb0 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -265,8 +265,8 @@ static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
static ProcessUtility_hook_type prev_ProcessUtility = NULL;
-/* An assign hook to keep query_id_const_merge in sync */
-static void pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra);
+/* An assign hook to keep query_id_const_merge_threshold in sync */
+static void pgss_query_id_const_merge_assign_hook(int newvalue, void *extra);
/* Links to shared memory state */
static pgssSharedState *pgss = NULL;
@@ -295,8 +295,8 @@ static bool pgss_track_utility = true; /* whether to track utility commands */
static bool pgss_track_planning = false; /* whether to track planning
* duration */
static bool pgss_save = true; /* whether to save stats across shutdown */
-static bool pgss_query_id_const_merge = false; /* request constants merging
- * when computing query_id */
+static int pgss_query_id_const_merge_threshold = 0; /* request constants merging
+ * when computing query_id */
#define pgss_enabled(level) \
(!IsParallelWorker() && \
@@ -459,20 +459,22 @@ _PG_init(void)
NULL,
NULL);
- DefineCustomBoolVariable("pg_stat_statements.query_id_const_merge",
- "Whether to merge constants in a list when computing query_id.",
- NULL,
- &pgss_query_id_const_merge,
- false,
- PGC_SUSET,
- 0,
- NULL,
- pgss_query_id_const_merge_assign_hook,
- NULL);
+ DefineCustomIntVariable("pg_stat_statements.query_id_const_merge_threshold",
+ "Whether to merge constants in a list when computing query_id.",
+ NULL,
+ &pgss_query_id_const_merge_threshold,
+ 0,
+ 0,
+ INT_MAX,
+ PGC_SUSET,
+ 0,
+ NULL,
+ pgss_query_id_const_merge_assign_hook,
+ NULL);
MarkGUCPrefixReserved("pg_stat_statements");
- SetQueryIdConstMerge(pgss_query_id_const_merge);
+ SetQueryIdConstMerge(pgss_query_id_const_merge_threshold);
/*
* Install hooks.
@@ -3082,10 +3084,10 @@ comp_location(const void *a, const void *b)
}
/*
- * Notify query jumbling about query_id_const_merge status
+ * Notify query jumbling about query_id_const_merge_threshold status
*/
static void
-pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra)
+pgss_query_id_const_merge_assign_hook(int newvalue, void *extra)
{
SetQueryIdConstMerge(newvalue);
}
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
index 657044faded..fedeb35b8f5 100644
--- a/contrib/pg_stat_statements/sql/merging.sql
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -15,7 +15,7 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-- Normal scenario, too many simple constants for an IN query
-SET pg_stat_statements.query_id_const_merge = on;
+SET pg_stat_statements.query_id_const_merge_threshold = 1;
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
SELECT * FROM test_merge WHERE id IN (1);
@@ -68,4 +68,21 @@ WITH cte AS (
SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
FROM cte;
-RESET pg_stat_statements.query_id_const_merge;
+-- With the threshold
+SET pg_stat_statements.query_id_const_merge_threshold = 10;
+
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+RESET pg_stat_statements.query_id_const_merge_threshold;
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 12ffd021909..c939c316a37 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -605,12 +605,12 @@
In some cases, queries with visibly different texts might get merged into a
single <structname>pg_stat_statements</structname> entry. Normally this
will happen only for semantically equivalent queries, or if
- <varname>pg_stat_statements.query_id_const_merge</varname> is enabled and
- the only difference between queries is the length of an array with constants
- they contain:
+ <varname>pg_stat_statements.query_id_const_merge_threshold</varname> is
+ enabled and the only difference between queries is the length of an array
+ with constants they contain:
<screen>
-=# SET query_id_const_merge = on;
+=# SET query_id_const_merge_threshold = 1;
=# SELECT pg_stat_statements_reset();
=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
@@ -959,9 +959,9 @@ calls | 1
<varlistentry>
<term>
- <varname>pg_stat_statements.query_id_const_merge</varname> (<type>bool</type>)
+ <varname>pg_stat_statements.query_id_const_merge_threshold</varname> (<type>integer</type>)
<indexterm>
- <primary><varname>pg_stat_statements.query_id_const_merge</varname> configuration parameter</primary>
+ <primary><varname>pg_stat_statements.query_id_const_merge_threshold</varname> configuration parameter</primary>
</indexterm>
</term>
@@ -973,11 +973,12 @@ calls | 1
query will get multiple different identifiers, one for each occurrence
with an array of different lenght.
- If this parameter is on, an array of constants will contribute only the
- first element, the last element and the number of elements to the query
- identifier. It means two occurences of the same query, where the only
- difference is number of constants in the array, are going to get the
- same query identifier if the arrays are of similar length.
+ If this parameter is greater than 0, an array with more than
+ <varname>pg_stat_statements.query_id_const_merge_threshold</varname>
+ constants will contribute only the first element, the last element
+ and the number of elements to the query identifier. It means two
+ occurences of the same query, where the only difference is number of
+ constants in the array, are going to get the same query identifier.
Such queries are represented in form <literal>'(... [10-99 entries])'</literal>.
The parameter could be used to reduce amount of repeating data stored
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index e9ede0acb70..c25cb2be62a 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -44,8 +44,8 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
-/* Whether to merge constants in a list when computing query_id */
-bool query_id_const_merge = false;
+/* Lower threshold for the list length to merge constants when computing query_id */
+int query_id_const_merge_threshold = 1;
/*
* True when compute_query_id is ON or AUTO, and a module requests them.
@@ -166,12 +166,14 @@ EnableQueryId(void)
* Controls constants merging for query identifier computation.
*
* Third-party plugins can use this function to enable/disable merging
- * of constants in a list when query identifier is computed.
+ * of constants in a list when query identifier is computed. The argument
+ * specifies the lower threshold for an array length, above which merging will
+ * be applied.
*/
void
-SetQueryIdConstMerge(bool value)
+SetQueryIdConstMerge(int threshold)
{
- query_id_const_merge = value;
+ query_id_const_merge_threshold = threshold;
}
/*
@@ -249,7 +251,8 @@ RecordConstLocation(JumbleState *jstate, int location, int magnitude)
/*
* Verify if the provided list contains could be merged down, which means it
- * contains only constant expressions.
+ * contains only constant expressions and the list contains more than
+ * query_id_const_merge_threshold elements.
*
* Return value is the order of magnitude (i.e. how many digits it has) for
* length of the list (to use for representation purposes later on) if merging
@@ -267,12 +270,18 @@ IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
if (elements == NIL)
return 0;
- if (!query_id_const_merge)
+ if (query_id_const_merge_threshold < 1)
{
/* Merging is disabled, process everything one by one */
return 0;
}
+ if (elements->length < query_id_const_merge_threshold)
+ {
+ /* The list is not large enough */
+ return 0;
+ }
+
firstExpr = linitial(elements);
/*
diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c
index 8bd73929183..491858a116e 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -122,7 +122,7 @@ typedef struct
bool redirection_done;
bool IsBinaryUpgrade;
bool query_id_enabled;
- bool query_id_const_merge;
+ int query_id_const_merge_threshold;
int max_safe_fds;
int MaxBackends;
#ifdef WIN32
@@ -743,7 +743,7 @@ save_backend_variables(BackendParameters *param, ClientSocket *client_sock,
param->redirection_done = redirection_done;
param->IsBinaryUpgrade = IsBinaryUpgrade;
param->query_id_enabled = query_id_enabled;
- param->query_id_const_merge = query_id_const_merge;
+ param->query_id_const_merge_threshold = query_id_const_merge_threshold;
param->max_safe_fds = max_safe_fds;
param->MaxBackends = MaxBackends;
@@ -1002,7 +1002,7 @@ restore_backend_variables(BackendParameters *param)
redirection_done = param->redirection_done;
IsBinaryUpgrade = param->IsBinaryUpgrade;
query_id_enabled = param->query_id_enabled;
- query_id_const_merge = param->query_id_const_merge;
+ query_id_const_merge_threshold = param->query_id_const_merge_threshold;
max_safe_fds = param->max_safe_fds;
MaxBackends = param->MaxBackends;
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 0e69e420b7f..90218c60531 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -77,10 +77,10 @@ extern PGDLLIMPORT int compute_query_id;
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query);
extern void EnableQueryId(void);
-extern void SetQueryIdConstMerge(bool value);
+extern void SetQueryIdConstMerge(int threshold);
extern PGDLLIMPORT bool query_id_enabled;
-extern PGDLLIMPORT bool query_id_const_merge;
+extern PGDLLIMPORT int query_id_const_merge_threshold;
/*
* Returns whether query identifier computation has been enabled, either
--
2.41.0
Hello
This feature will improve my monitoring. Even in patch 0001. I think there are many other people in the thread who think this is useful. So maybe we should move it forward? Any complaints about the overall design? I see in the discussion it was mentioned that it would be good to measure performance difference.
PS: patch cannot be applied at this time, it needs another rebase.
regards, Sergei
On Sun, Aug 11, 2024 at 07:54:05PM +0300, Sergei Kornilov wrote:
This feature will improve my monitoring. Even in patch 0001. I think there are many other people in the thread who think this is useful. So maybe we should move it forward? Any complaints about the overall design? I see in the discussion it was mentioned that it would be good to measure performance difference.
PS: patch cannot be applied at this time, it needs another rebase.
Yeah, it seems like most people are fine with the first patch and the
simplest approach. I'm going to post a rebased version and a short
thread summary soon.
On Sun, Aug 11, 2024 at 09:34:55PM GMT, Dmitry Dolgov wrote:
On Sun, Aug 11, 2024 at 07:54:05PM +0300, Sergei Kornilov wrote:
This feature will improve my monitoring. Even in patch 0001. I think there are many other people in the thread who think this is useful. So maybe we should move it forward? Any complaints about the overall design? I see in the discussion it was mentioned that it would be good to measure performance difference.
PS: patch cannot be applied at this time, it needs another rebase.
Yeah, it seems like most people are fine with the first patch and the
simplest approach. I'm going to post a rebased version and a short
thread summary soon.
Ok, here is the rebased version. If anyone would like to review them, below is
the short summary of the thread. Currently the patch series contains 4 changes:
* 0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patch
Implements the simplest way to handle constant arrays, if the array contains
only constants it will be reduced. This is the basis, if I read it correctly
Nathan and Michael expressed that they're mostly fine with this one.
Michael seems to be skeptical about the "merged" flag in the LocationLen
struct, but from what I see the proposed alternative has problems as well.
There was also a note that the loop over constants has to be benchmarked, but
it's not entirely clear for me in which dimentions to benchmark (i.e. what
are the expectations). For both I'm waiting on a reply to my questions.
* 0002-Reusable-decimalLength-functions.patch
A small refactoring to make already existing "powers" functonality reusable
for following patches.
* 0003-Merge-constants-in-ArrayExpr-into-groups.patch
Makes handling of constant arrays smarter by taking into account number of
elements in the array. This way records are merged into groups power of 10,
i.e. arrays with length 55 will land in a group 10-99, with lenght 555 in a
group 100-999 etc. This was proposed by Alvaro, and personally I like this
approach, because it remediates the issue of one-size-fits-all for the static
threshold. But there are opinions that this introduces too much complexity.
* 0004-Introduce-query_id_const_merge_threshold.patch
Fine tuning for the previous patch, makes only arrays with the length over a
certain threshold to be reduced.
On top of that Yasuo Honda and Jakub Wartak have provided a couple of practical
examples, where handling of constant arrays has to be improved. David Geier
pointed out some examples that might be confusing as well. All those are
definitely worth addressing, but out of scope of this patch for now.
Attachments:
v21-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/plain; charset=us-asciiDownload
From 4a08f85071c24bcf89a17bef782fcaef05a5d6e3 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Mon, 12 Aug 2024 20:40:01 +0200
Subject: [PATCH v21 1/4] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on the number of parameters, because every element of
ArrayExpr is jumbled. In certain situations it's undesirable, especially
if the list becomes too large.
Make an array of Const expressions contribute only the first/last
elements to the jumble hash. Allow to enable this behavior via the new
pg_stat_statements parameter query_id_const_merge with the default value off.
Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane,
Michael Paquier, Sergei Kornilov, Alvaro Herrera, David Geier, Sutou Kouhei
Tested-by: Chengxi Sun, Yasuo Honda
---
contrib/pg_stat_statements/Makefile | 2 +-
.../pg_stat_statements/expected/merging.out | 167 ++++++++++++++++++
contrib/pg_stat_statements/meson.build | 1 +
.../pg_stat_statements/pg_stat_statements.c | 74 +++++++-
contrib/pg_stat_statements/sql/merging.sql | 58 ++++++
doc/src/sgml/pgstatstatements.sgml | 57 +++++-
src/backend/nodes/gen_node_support.pl | 21 ++-
src/backend/nodes/queryjumblefuncs.c | 105 ++++++++++-
src/backend/postmaster/launch_backend.c | 3 +
src/backend/utils/misc/postgresql.conf.sample | 1 -
src/include/nodes/nodes.h | 3 +
src/include/nodes/primnodes.h | 2 +-
src/include/nodes/queryjumble.h | 9 +-
13 files changed, 478 insertions(+), 25 deletions(-)
create mode 100644 contrib/pg_stat_statements/expected/merging.out
create mode 100644 contrib/pg_stat_statements/sql/merging.sql
diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile
index c19ccad77e..79fcb02af9 100644
--- a/contrib/pg_stat_statements/Makefile
+++ b/contrib/pg_stat_statements/Makefile
@@ -20,7 +20,7 @@ LDFLAGS_SL += $(filter -lm, $(LIBS))
REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_statements/pg_stat_statements.conf
REGRESS = select dml cursors utility level_tracking planning \
user_activity wal entry_timestamp privileges cleanup \
- oldextversions
+ oldextversions merging
# Disabled because these tests require "shared_preload_libraries=pg_stat_statements",
# which typical installcheck users do not have (e.g. buildfarm clients).
NO_INSTALLCHECK = 1
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
new file mode 100644
index 0000000000..1e58283afe
--- /dev/null
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -0,0 +1,167 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(4 rows)
+
+-- Normal scenario, too many simple constants for an IN query
+SET pg_stat_statements.query_id_const_merge = on;
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 4
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) and data = $3 | 3
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- No constants simplification
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 + $2, $3 + $4, $5 + $6, $7 + $8, $9 + $10, $11 + $12, $13 + $14, $15 + $16, $17 + $18) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Numeric type
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Test constants evaluation, verifies a tricky part to make sure there are no
+-- issues in the merging implementation
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result
+--------
+(0 rows)
+
+RESET pg_stat_statements.query_id_const_merge;
diff --git a/contrib/pg_stat_statements/meson.build b/contrib/pg_stat_statements/meson.build
index 5cf926d1f8..379600e093 100644
--- a/contrib/pg_stat_statements/meson.build
+++ b/contrib/pg_stat_statements/meson.build
@@ -53,6 +53,7 @@ tests += {
'privileges',
'cleanup',
'oldextversions',
+ 'merging',
],
'regress_args': ['--temp-config', files('pg_stat_statements.conf')],
# Disabled because these tests require
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index d4197ae0f7..d267a72e0a 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -265,6 +265,9 @@ static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
static ProcessUtility_hook_type prev_ProcessUtility = NULL;
+/* An assign hook to keep query_id_const_merge in sync */
+static void pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra);
+
/* Links to shared memory state */
static pgssSharedState *pgss = NULL;
static HTAB *pgss_hash = NULL;
@@ -292,7 +295,8 @@ static bool pgss_track_utility = true; /* whether to track utility commands */
static bool pgss_track_planning = false; /* whether to track planning
* duration */
static bool pgss_save = true; /* whether to save stats across shutdown */
-
+static bool pgss_query_id_const_merge = false; /* request constants merging
+ * when computing query_id */
#define pgss_enabled(level) \
(!IsParallelWorker() && \
@@ -455,8 +459,21 @@ _PG_init(void)
NULL,
NULL);
+ DefineCustomBoolVariable("pg_stat_statements.query_id_const_merge",
+ "Whether to merge constants in a list when computing query_id.",
+ NULL,
+ &pgss_query_id_const_merge,
+ false,
+ PGC_SUSET,
+ 0,
+ NULL,
+ pgss_query_id_const_merge_assign_hook,
+ NULL);
+
MarkGUCPrefixReserved("pg_stat_statements");
+ SetQueryIdConstMerge(pgss_query_id_const_merge);
+
/*
* Install hooks.
*/
@@ -2808,6 +2825,10 @@ generate_normalized_query(JumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool merged_interval = false; /* Currently processed constants
+ belong to a merged constants
+ interval. */
+
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -2831,7 +2852,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
{
int off, /* Offset from start for cur tok */
tok_len; /* Length (in bytes) of that tok */
-
off = jstate->clocations[i].location;
/* Adjust recorded location if we're dealing with partial string */
off -= query_loc;
@@ -2846,13 +2866,44 @@ generate_normalized_query(JumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ /* Normal path, non merged constant */
+ if (!jstate->clocations[i].merged)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id);
+ /* In case previous constants were merged away, stop doing that */
+ merged_interval = false;
+ }
+ else if (!merged_interval)
+ {
+ /*
+ * We are not inside a merged interval yet, which means it is the
+ * the first merged constant.
+ *
+ * A merged constants interval must be represented via two
+ * constants with the merged flag. Currently we are at the first,
+ * verify there is another one.
+ */
+ Assert(i + 1 < jstate->clocations_count);
+ Assert(jstate->clocations[i + 1].merged);
+
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* Remember to skip until a non merged constant appears */
+ merged_interval = true;
+
+ /* Mark the interval in the normalized query */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ }
+
+ /* Otherwise the constant is merged away, move forward */
quer_loc = off + tok_len;
last_off = off;
last_tok_len = tok_len;
@@ -3010,3 +3061,12 @@ comp_location(const void *a, const void *b)
return pg_cmp_s32(l, r);
}
+
+/*
+ * Notify query jumbling about query_id_const_merge status
+ */
+static void
+pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra)
+{
+ SetQueryIdConstMerge(newvalue);
+}
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
new file mode 100644
index 0000000000..71985bb1cd
--- /dev/null
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -0,0 +1,58 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal scenario, too many simple constants for an IN query
+SET pg_stat_statements.query_id_const_merge = on;
+
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- No constants simplification
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+SELECT * FROM test_merge WHERE id IN (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Numeric type
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test constants evaluation, verifies a tricky part to make sure there are no
+-- issues in the merging implementation
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+RESET pg_stat_statements.query_id_const_merge;
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 9b0aff73b1..cbe1f3e171 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -603,11 +603,29 @@
<para>
In some cases, queries with visibly different texts might get merged into a
- single <structname>pg_stat_statements</structname> entry. Normally this will happen
- only for semantically equivalent queries, but there is a small chance of
- hash collisions causing unrelated queries to be merged into one entry.
- (This cannot happen for queries belonging to different users or databases,
- however.)
+ single <structname>pg_stat_statements</structname> entry. Normally this
+ will happen only for semantically equivalent queries, or if
+ <varname>pg_stat_statements.query_id_const_merge</varname> is enabled and
+ the only difference between queries is the length of an array with constants
+ they contain:
+
+<screen>
+=# SET query_id_const_merge = on;
+=# SELECT pg_stat_statements_reset();
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
+=# SELECT query, calls FROM pg_stat_statements;
+-[ RECORD 1 ]------------------------------
+query | SELECT * FROM test WHERE a IN (...)
+calls | 2
+-[ RECORD 2 ]------------------------------
+query | SELECT pg_stat_statements_reset()
+calls | 1
+</screen>
+
+ But there is a small chance of hash collisions causing unrelated queries to
+ be merged into one entry. (This cannot happen for queries belonging to
+ different users or databases, however.)
</para>
<para>
@@ -938,6 +956,35 @@
</para>
</listitem>
</varlistentry>
+
+ <varlistentry>
+ <term>
+ <varname>pg_stat_statements.query_id_const_merge</varname> (<type>bool</type>)
+ <indexterm>
+ <primary><varname>pg_stat_statements.query_id_const_merge</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+
+ <listitem>
+ <para>
+ Specifies how an array of constants (e.g. for an "IN" clause)
+ contributes to the query identifier computation. Normally every element
+ of an array contributes to the query identifier, which means the same
+ query will get multiple different identifiers, one for each occurrence
+ with an array of different lenght.
+
+ If this parameter is on, an array of constants will contribute only the
+ first and the last elements to the query identifier. It means two
+ occurences of the same query, where the only difference is number of
+ constants in the array, are going to get the same query identifier.
+ Such queries are represented in form <literal>'(...)'</literal>.
+
+ The parameter could be used to reduce amount of repeating data stored
+ via <structname>pg_stat_statements</structname>. The default value is off.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
<para>
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 81df3bdf95..d2a276c303 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -475,6 +475,7 @@ foreach my $infile (@ARGV)
equal_ignore_if_zero
query_jumble_ignore
query_jumble_location
+ query_jumble_merge
read_write_ignore
write_only_relids
write_only_nondefault_pathtarget
@@ -1282,6 +1283,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
my @a = @{ $node_type_info{$n}->{field_attrs}{$f} };
my $query_jumble_ignore = $struct_no_query_jumble;
my $query_jumble_location = 0;
+ my $query_jumble_merge = 0;
# extract per-field attributes
foreach my $a (@a)
@@ -1294,21 +1296,34 @@ _jumble${n}(JumbleState *jstate, Node *node)
{
$query_jumble_location = 1;
}
+ elsif ($a eq 'query_jumble_merge')
+ {
+ $query_jumble_merge = 1;
+ }
}
# node type
if (($t =~ /^(\w+)\*$/ or $t =~ /^struct\s+(\w+)\*$/)
and elem $1, @node_types)
{
- print $jff "\tJUMBLE_NODE($f);\n"
- unless $query_jumble_ignore;
+ # Merge constants if requested.
+ if ($query_jumble_merge)
+ {
+ print $jff "\tJUMBLE_ELEMENTS($f);\n"
+ unless $query_jumble_ignore;
+ }
+ else
+ {
+ print $jff "\tJUMBLE_NODE($f);\n"
+ unless $query_jumble_ignore;
+ }
}
elsif ($t eq 'ParseLoc')
{
# Track the node's location only if directly requested.
if ($query_jumble_location)
{
- print $jff "\tJUMBLE_LOCATION($f);\n"
+ print $jff "\tJUMBLE_LOCATION($f, false);\n"
unless $query_jumble_ignore;
}
}
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index 129fb44709..722b064873 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -42,6 +42,9 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+/* Whether to merge constants in a list when computing query_id */
+bool query_id_const_merge = false;
+
/*
* True when compute_query_id is ON or AUTO, and a module requests them.
*
@@ -53,8 +56,10 @@ bool query_id_enabled = false;
static void AppendJumble(JumbleState *jstate,
const unsigned char *item, Size size);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void RecordConstLocation(JumbleState *jstate,
+ int location, bool merged);
static void _jumbleNode(JumbleState *jstate, Node *node);
+static void _jumbleElements(JumbleState *jstate, List *elements);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
static void _jumbleList(JumbleState *jstate, Node *node);
@@ -153,6 +158,18 @@ EnableQueryId(void)
query_id_enabled = true;
}
+/*
+ * Controls constants merging for query identifier computation.
+ *
+ * Third-party plugins can use this function to enable/disable merging
+ * of constants in a list when query identifier is computed.
+ */
+void
+SetQueryIdConstMerge(bool value)
+{
+ query_id_const_merge = value;
+}
+
/*
* AppendJumble: Append a value that is substantive in a given query to
* the current jumble.
@@ -191,11 +208,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
}
/*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked.
+ *
+ * Merged argument signals that the constant represents the first or the last
+ * element in a series of merged constants, and everything but the first/last
+ * element contributes nothing to the jumble hash.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -211,15 +232,67 @@ RecordConstLocation(JumbleState *jstate, int location)
}
jstate->clocations[jstate->clocations_count].location = location;
/* initialize lengths to -1 to simplify third-party module usage */
+ jstate->clocations[jstate->clocations_count].merged = merged;
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
}
}
+/*
+ * Verify if the provided list contains could be merged down, which means it
+ * contains only constant expressions.
+ *
+ * Return value indicates if merging is possible.
+ *
+ * Note that this function searches only for explicit Const nodes and does not
+ * try to simplify expressions.
+ */
+static bool
+IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
+{
+ ListCell *temp;
+ Node *firstExpr = NULL;
+
+ if (elements == NIL)
+ return false;
+
+ if (!query_id_const_merge)
+ {
+ /* Merging is disabled, process everything one by one */
+ return false;
+ }
+
+ firstExpr = linitial(elements);
+
+ /*
+ * If the first expression is a constant, verify if the following elements
+ * are constants as well. If yes, the list is eligible for merging, and the
+ * order of magnitude need to be calculated.
+ */
+ if (IsA(firstExpr, Const))
+ {
+ foreach(temp, elements)
+ if (!IsA(lfirst(temp), Const))
+ return false;
+
+ *firstConst = (Const *) firstExpr;
+ *lastConst = llast_node(Const, elements);
+ return true;
+ }
+
+ /*
+ * If we end up here, it means no constants merging is possible, process
+ * the list as usual.
+ */
+ return false;
+}
+
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
-#define JUMBLE_LOCATION(location) \
- RecordConstLocation(jstate, expr->location)
+#define JUMBLE_ELEMENTS(list) \
+ _jumbleElements(jstate, (List *) expr->list)
+#define JUMBLE_LOCATION(location, merged) \
+ RecordConstLocation(jstate, expr->location, merged)
#define JUMBLE_FIELD(item) \
AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
#define JUMBLE_FIELD_SINGLE(item) \
@@ -232,6 +305,26 @@ do { \
#include "queryjumblefuncs.funcs.c"
+static void
+_jumbleElements(JumbleState *jstate, List *elements)
+{
+ Const *first, *last;
+ if (IsMergeableConstList(elements, &first, &last))
+ {
+ /*
+ * Both first and last constants have to be recorded. The first one
+ * will indicate the merged interval, the last one will tell us the
+ * length of the interval within the query text.
+ */
+ RecordConstLocation(jstate, first->location, true);
+ RecordConstLocation(jstate, last->location, true);
+ }
+ else
+ {
+ _jumbleNode(jstate, (Node *) elements);
+ }
+}
+
static void
_jumbleNode(JumbleState *jstate, Node *node)
{
diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c
index e9fc982787..da3ceceddb 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -122,6 +122,7 @@ typedef struct
bool redirection_done;
bool IsBinaryUpgrade;
bool query_id_enabled;
+ bool query_id_const_merge;
int max_safe_fds;
int MaxBackends;
#ifdef WIN32
@@ -730,6 +731,7 @@ save_backend_variables(BackendParameters *param, ClientSocket *client_sock,
param->redirection_done = redirection_done;
param->IsBinaryUpgrade = IsBinaryUpgrade;
param->query_id_enabled = query_id_enabled;
+ param->query_id_const_merge = query_id_const_merge;
param->max_safe_fds = max_safe_fds;
param->MaxBackends = MaxBackends;
@@ -989,6 +991,7 @@ restore_backend_variables(BackendParameters *param)
redirection_done = param->redirection_done;
IsBinaryUpgrade = param->IsBinaryUpgrade;
query_id_enabled = param->query_id_enabled;
+ query_id_const_merge = param->query_id_const_merge;
max_safe_fds = param->max_safe_fds;
MaxBackends = param->MaxBackends;
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 9ec9f97e92..bddf2e1e2e 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -651,7 +651,6 @@
#log_planner_stats = off
#log_executor_stats = off
-
#------------------------------------------------------------------------------
# AUTOVACUUM
#------------------------------------------------------------------------------
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 0d71d821f7..60461b1fae 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -108,6 +108,9 @@ typedef enum NodeTag
* - query_jumble_location: Mark the field as a location to track. This is
* only allowed for integer fields that include "location" in their name.
*
+ * - query_jumble_merge: Allow to merge the field values for the query
+ * jumbling.
+ *
* - read_as(VALUE): In nodeRead(), replace the field's value with VALUE.
*
* - read_write_ignore: Ignore the field for read/write. This is only allowed
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index ea47652adb..20e92e1a71 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1377,7 +1377,7 @@ typedef struct ArrayExpr
/* common type of array elements */
Oid element_typeid pg_node_attr(query_jumble_ignore);
/* the array elements or sub-arrays */
- List *elements;
+ List *elements pg_node_attr(query_jumble_merge);
/* true if elements are sub-arrays */
bool multidims pg_node_attr(query_jumble_ignore);
/* token location, or -1 if unknown */
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index f1c55c8067..8daf0725d7 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -23,6 +23,12 @@ typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+
+ /*
+ * Indicates the constant represents the beginning or the end of a merged
+ * constants interval.
+ */
+ bool merged;
} LocationLen;
/*
@@ -62,12 +68,13 @@ enum ComputeQueryIdType
/* GUC parameters */
extern PGDLLIMPORT int compute_query_id;
-
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query);
extern void EnableQueryId(void);
+extern void SetQueryIdConstMerge(bool value);
extern PGDLLIMPORT bool query_id_enabled;
+extern PGDLLIMPORT bool query_id_const_merge;
/*
* Returns whether query identifier computation has been enabled, either
base-commit: 284c030a10b838bb016e8c2de56ae9b845a6b30e
--
2.45.1
v21-0002-Reusable-decimalLength-functions.patchtext/plain; charset=us-asciiDownload
From 24231d035a4a9465c554c7c9c67e286757a97204 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Wed, 3 Apr 2024 20:03:08 +0200
Subject: [PATCH v21 2/4] Reusable decimalLength functions
Move out decimalLength functions to reuse in the following patch.
Reviewed-by: Sutou Kouhei
Tested-by: Yasuo Honda
---
src/backend/utils/adt/numutils.c | 48 +----------------------
src/include/utils/numutils.h | 67 ++++++++++++++++++++++++++++++++
2 files changed, 68 insertions(+), 47 deletions(-)
create mode 100644 src/include/utils/numutils.h
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index adc1e8a4cb..adfad9f89f 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -20,6 +20,7 @@
#include "port/pg_bitutils.h"
#include "utils/builtins.h"
+#include "utils/numutils.h"
/*
* A table of all two-digit numbers. This is used to speed up decimal digit
@@ -37,53 +38,6 @@ static const char DIGIT_TABLE[200] =
"80" "81" "82" "83" "84" "85" "86" "87" "88" "89"
"90" "91" "92" "93" "94" "95" "96" "97" "98" "99";
-/*
- * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
- */
-static inline int
-decimalLength32(const uint32 v)
-{
- int t;
- static const uint32 PowersOfTen[] = {
- 1, 10, 100,
- 1000, 10000, 100000,
- 1000000, 10000000, 100000000,
- 1000000000
- };
-
- /*
- * Compute base-10 logarithm by dividing the base-2 logarithm by a
- * good-enough approximation of the base-2 logarithm of 10
- */
- t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
- return t + (v >= PowersOfTen[t]);
-}
-
-static inline int
-decimalLength64(const uint64 v)
-{
- int t;
- static const uint64 PowersOfTen[] = {
- UINT64CONST(1), UINT64CONST(10),
- UINT64CONST(100), UINT64CONST(1000),
- UINT64CONST(10000), UINT64CONST(100000),
- UINT64CONST(1000000), UINT64CONST(10000000),
- UINT64CONST(100000000), UINT64CONST(1000000000),
- UINT64CONST(10000000000), UINT64CONST(100000000000),
- UINT64CONST(1000000000000), UINT64CONST(10000000000000),
- UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
- UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
- UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
- };
-
- /*
- * Compute base-10 logarithm by dividing the base-2 logarithm by a
- * good-enough approximation of the base-2 logarithm of 10
- */
- t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
- return t + (v >= PowersOfTen[t]);
-}
-
static const int8 hexlookup[128] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
diff --git a/src/include/utils/numutils.h b/src/include/utils/numutils.h
new file mode 100644
index 0000000000..876e64f2df
--- /dev/null
+++ b/src/include/utils/numutils.h
@@ -0,0 +1,67 @@
+/*-------------------------------------------------------------------------
+ *
+ * numutils.h
+ * Decimal length functions for numutils.c
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/utils/numutils.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef NUMUTILS_H
+#define NUMUTILS_H
+
+#include "common/int.h"
+#include "port/pg_bitutils.h"
+
+/*
+ * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
+ */
+static inline int
+decimalLength32(const uint32 v)
+{
+ int t;
+ static const uint32 PowersOfTen[] = {
+ 1, 10, 100,
+ 1000, 10000, 100000,
+ 1000000, 10000000, 100000000,
+ 1000000000
+ };
+
+ /*
+ * Compute base-10 logarithm by dividing the base-2 logarithm by a
+ * good-enough approximation of the base-2 logarithm of 10
+ */
+ t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
+ return t + (v >= PowersOfTen[t]);
+}
+
+static inline int
+decimalLength64(const uint64 v)
+{
+ int t;
+ static const uint64 PowersOfTen[] = {
+ UINT64CONST(1), UINT64CONST(10),
+ UINT64CONST(100), UINT64CONST(1000),
+ UINT64CONST(10000), UINT64CONST(100000),
+ UINT64CONST(1000000), UINT64CONST(10000000),
+ UINT64CONST(100000000), UINT64CONST(1000000000),
+ UINT64CONST(10000000000), UINT64CONST(100000000000),
+ UINT64CONST(1000000000000), UINT64CONST(10000000000000),
+ UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
+ UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
+ UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
+ };
+
+ /*
+ * Compute base-10 logarithm by dividing the base-2 logarithm by a
+ * good-enough approximation of the base-2 logarithm of 10
+ */
+ t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
+ return t + (v >= PowersOfTen[t]);
+}
+
+#endif /* NUMUTILS_H */
--
2.45.1
v21-0003-Merge-constants-in-ArrayExpr-into-groups.patchtext/plain; charset=us-asciiDownload
From acbbb641e8697eaf5ba94ac454cd430a04a6d1e0 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Sun, 12 May 2024 11:51:10 +0200
Subject: [PATCH v21 3/4] Merge constants in ArrayExpr into groups
Using query_id_const_merge only first/last element in an ArrayExpr will
be used to compute query id. Extend this to take into account number of
elements, and merge constants into groups based on it. Resulting groups
are powers of 10, i.e. 1 to 9, 10 to 99, etc.
Reviewed-by: Sutou Kouhei
Tested-by: Yasuo Honda
---
.../pg_stat_statements/expected/merging.out | 84 +++++++++++++++----
.../pg_stat_statements/pg_stat_statements.c | 29 +++++--
contrib/pg_stat_statements/sql/merging.sql | 13 +++
doc/src/sgml/pgstatstatements.sgml | 11 +--
src/backend/nodes/queryjumblefuncs.c | 55 ++++++++----
src/include/nodes/queryjumble.h | 10 ++-
6 files changed, 157 insertions(+), 45 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
index 1e58283afe..0cb4f67b8b 100644
--- a/contrib/pg_stat_statements/expected/merging.out
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -54,11 +54,11 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3);
(0 rows)
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
- query | calls
-----------------------------------------------------+-------
- SELECT * FROM test_merge WHERE id IN ($1) | 1
- SELECT * FROM test_merge WHERE id IN (...) | 1
- SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ query | calls
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
(3 rows)
SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
@@ -80,7 +80,60 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
query | calls
------------------------------------------------------------------------+-------
SELECT * FROM test_merge WHERE id IN ($1) | 1
- SELECT * FROM test_merge WHERE id IN (...) | 4
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 2
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 2
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(5 rows)
+
+-- Second order of magnitude, brace yourself
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+--------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [100-999 entries]) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 1
SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
(4 rows)
@@ -108,11 +161,12 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and dat
(0 rows)
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
- query | calls
-----------------------------------------------------------+-------
- SELECT * FROM test_merge WHERE id IN (...) and data = $3 | 3
- SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
-(2 rows)
+ query | calls
+--------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) and data = $3 | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) and data = $3 | 2
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
-- No constants simplification
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
@@ -147,10 +201,10 @@ SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
(0 rows)
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
- query | calls
-----------------------------------------------------+-------
- SELECT * FROM test_merge_numeric WHERE id IN (...) | 1
- SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ query | calls
+--------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (... [10-99 entries]) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
(2 rows)
-- Test constants evaluation, verifies a tricky part to make sure there are no
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index d267a72e0a..1c35e10117 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2828,6 +2828,8 @@ generate_normalized_query(JumbleState *jstate, const char *query,
bool merged_interval = false; /* Currently processed constants
belong to a merged constants
interval. */
+ int magnitude; /* Order of magnitute for number of merged
+ constants */
/*
@@ -2842,8 +2844,13 @@ generate_normalized_query(JumbleState *jstate, const char *query,
* certainly isn't more than 11 bytes, even if n reaches INT_MAX. We
* could refine that limit based on the max value of n for the current
* query, but it hardly seems worth any extra effort to do so.
+ *
+ * On top of that, each pair of $n symbols representing a merged constants
+ * interval will be decorated with the explanationary text, adding 14
+ * bytes.
*/
- norm_query_buflen = query_len + jstate->clocations_count * 10;
+ norm_query_buflen = query_len + jstate->clocations_count * 10 +
+ jstate->clocations_merged_count * 14;
/* Allocate result buffer */
norm_query = palloc(norm_query_buflen + 1);
@@ -2868,7 +2875,8 @@ generate_normalized_query(JumbleState *jstate, const char *query,
Assert(len_to_wrt >= 0);
/* Normal path, non merged constant */
- if (!jstate->clocations[i].merged)
+ magnitude = jstate->clocations[i].magnitude;
+ if (magnitude == 0)
{
memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
n_quer_loc += len_to_wrt;
@@ -2885,13 +2893,23 @@ generate_normalized_query(JumbleState *jstate, const char *query,
/*
* We are not inside a merged interval yet, which means it is the
* the first merged constant.
- *
+ */
+ static const uint32 powers_of_ten[] = {
+ 1, 10, 100,
+ 1000, 10000, 100000,
+ 1000000, 10000000, 100000000,
+ 1000000000
+ };
+ int lower_merged = powers_of_ten[magnitude - 1];
+ int upper_merged = powers_of_ten[magnitude];
+
+ /*
* A merged constants interval must be represented via two
* constants with the merged flag. Currently we are at the first,
* verify there is another one.
*/
Assert(i + 1 < jstate->clocations_count);
- Assert(jstate->clocations[i + 1].merged);
+ Assert(jstate->clocations[i + 1].magnitude > 0);
memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
n_quer_loc += len_to_wrt;
@@ -2900,7 +2918,8 @@ generate_normalized_query(JumbleState *jstate, const char *query,
merged_interval = true;
/* Mark the interval in the normalized query */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "... [%d-%d entries]",
+ lower_merged, upper_merged - 1);
}
/* Otherwise the constant is merged away, move forward */
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
index 71985bb1cd..657044fade 100644
--- a/contrib/pg_stat_statements/sql/merging.sql
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -27,6 +27,19 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+-- Second order of magnitude, brace yourself
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
-- More conditions in the query
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index cbe1f3e171..12ffd02190 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -616,7 +616,7 @@
=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
=# SELECT query, calls FROM pg_stat_statements;
-[ RECORD 1 ]------------------------------
-query | SELECT * FROM test WHERE a IN (...)
+query | SELECT * FROM test WHERE a IN (... [10-99 entries])
calls | 2
-[ RECORD 2 ]------------------------------
query | SELECT pg_stat_statements_reset()
@@ -974,10 +974,11 @@ calls | 1
with an array of different lenght.
If this parameter is on, an array of constants will contribute only the
- first and the last elements to the query identifier. It means two
- occurences of the same query, where the only difference is number of
- constants in the array, are going to get the same query identifier.
- Such queries are represented in form <literal>'(...)'</literal>.
+ first element, the last element and the number of elements to the query
+ identifier. It means two occurences of the same query, where the only
+ difference is number of constants in the array, are going to get the
+ same query identifier if the arrays are of similar length.
+ Such queries are represented in form <literal>'(... [10-99 entries])'</literal>.
The parameter could be used to reduce amount of repeating data stored
via <structname>pg_stat_statements</structname>. The default value is off.
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index 722b064873..1d3f36ca64 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -37,6 +37,8 @@
#include "nodes/queryjumble.h"
#include "parser/scansup.h"
+#include "utils/numutils.h"
+
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
/* GUC parameters */
@@ -57,7 +59,7 @@ bool query_id_enabled = false;
static void AppendJumble(JumbleState *jstate,
const unsigned char *item, Size size);
static void RecordConstLocation(JumbleState *jstate,
- int location, bool merged);
+ int location, int magnitude);
static void _jumbleNode(JumbleState *jstate, Node *node);
static void _jumbleElements(JumbleState *jstate, List *elements);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
@@ -122,6 +124,7 @@ JumbleQuery(Query *query)
jstate->clocations = (LocationLen *)
palloc(jstate->clocations_buf_size * sizeof(LocationLen));
jstate->clocations_count = 0;
+ jstate->clocations_merged_count = 0;
jstate->highest_extern_param_id = 0;
/* Compute query ID and mark the Query node with it */
@@ -211,12 +214,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
* Record location of constant within query string of query tree that is
* currently being walked.
*
- * Merged argument signals that the constant represents the first or the last
- * element in a series of merged constants, and everything but the first/last
- * element contributes nothing to the jumble hash.
+ * Magnitude argument larger than zero signals that the constant represents the
+ * first or the last element in a series of merged constants, and everything
+ * but such first/last element will contribute nothing to the jumble hash. The
+ * magnitute value specifies order of magnitute (i.e. how many digits it has)
+ * for the number of elements in the series, to represent the fact of merging
+ * later on.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location, bool merged)
+RecordConstLocation(JumbleState *jstate, int location, int magnitude)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -231,10 +237,12 @@ RecordConstLocation(JumbleState *jstate, int location, bool merged)
sizeof(LocationLen));
}
jstate->clocations[jstate->clocations_count].location = location;
+ jstate->clocations[jstate->clocations_count].magnitude = magnitude;
/* initialize lengths to -1 to simplify third-party module usage */
- jstate->clocations[jstate->clocations_count].merged = merged;
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
+ if (magnitude > 0)
+ jstate->clocations_merged_count++;
}
}
@@ -242,24 +250,26 @@ RecordConstLocation(JumbleState *jstate, int location, bool merged)
* Verify if the provided list contains could be merged down, which means it
* contains only constant expressions.
*
- * Return value indicates if merging is possible.
+ * Return value is the order of magnitude (i.e. how many digits it has) for
+ * length of the list (to use for representation purposes later on) if merging
+ * is possible, otherwise zero.
*
* Note that this function searches only for explicit Const nodes and does not
* try to simplify expressions.
*/
-static bool
+static int
IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
{
ListCell *temp;
Node *firstExpr = NULL;
if (elements == NIL)
- return false;
+ return 0;
if (!query_id_const_merge)
{
/* Merging is disabled, process everything one by one */
- return false;
+ return 0;
}
firstExpr = linitial(elements);
@@ -273,26 +283,26 @@ IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
{
foreach(temp, elements)
if (!IsA(lfirst(temp), Const))
- return false;
+ return 0;
*firstConst = (Const *) firstExpr;
*lastConst = llast_node(Const, elements);
- return true;
+ return decimalLength32(elements->length);
}
/*
* If we end up here, it means no constants merging is possible, process
* the list as usual.
*/
- return false;
+ return 0;
}
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
#define JUMBLE_ELEMENTS(list) \
_jumbleElements(jstate, (List *) expr->list)
-#define JUMBLE_LOCATION(location, merged) \
- RecordConstLocation(jstate, expr->location, merged)
+#define JUMBLE_LOCATION(location, magnitude) \
+ RecordConstLocation(jstate, expr->location, magnitude)
#define JUMBLE_FIELD(item) \
AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
#define JUMBLE_FIELD_SINGLE(item) \
@@ -309,15 +319,24 @@ static void
_jumbleElements(JumbleState *jstate, List *elements)
{
Const *first, *last;
- if (IsMergeableConstList(elements, &first, &last))
+ int magnitude = IsMergeableConstList(elements, &first, &last);
+
+ if (magnitude)
{
/*
* Both first and last constants have to be recorded. The first one
* will indicate the merged interval, the last one will tell us the
* length of the interval within the query text.
*/
- RecordConstLocation(jstate, first->location, true);
- RecordConstLocation(jstate, last->location, true);
+ RecordConstLocation(jstate, first->location, magnitude);
+ RecordConstLocation(jstate, last->location, magnitude);
+
+ /*
+ * After merging constants down we end up with only two constants, the
+ * first and the last one. To distinguish the order of magnitute behind
+ * merged constants, add its value into the jumble.
+ */
+ JUMBLE_FIELD_SINGLE(magnitude);
}
else
{
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 8daf0725d7..0e69e420b7 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -26,9 +26,12 @@ typedef struct LocationLen
/*
* Indicates the constant represents the beginning or the end of a merged
- * constants interval.
+ * constants interval. The value shows how many constants were merged away
+ * (up to a power of 10), or in other words the order of manitude for
+ * number of merged constants (i.e. how many digits it has). Otherwise the
+ * value is 0, indicating that no merging was performed.
*/
- bool merged;
+ int magnitude;
} LocationLen;
/*
@@ -52,6 +55,9 @@ typedef struct JumbleState
/* Current number of valid entries in clocations array */
int clocations_count;
+ /* Current number of entries with merged constants interval */
+ int clocations_merged_count;
+
/* highest Param id we've seen, in order to start normalization correctly */
int highest_extern_param_id;
} JumbleState;
--
2.45.1
v21-0004-Introduce-query_id_const_merge_threshold.patchtext/plain; charset=us-asciiDownload
From 3a6c56094c0c9f428e92e9e4dfbd1746e4689841 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Wed, 3 Apr 2024 20:03:45 +0200
Subject: [PATCH v21 4/4] Introduce query_id_const_merge_threshold
Replace query_id_const_merge with a threshold to allow merging only if
the number of elements is larger than specified value, which could be
configured using pg_stat_statements parameter query_id_const_merge_threshold.
Reviewed-by: Sutou Kouhei
Tested-by: Yasuo Honda
---
.../pg_stat_statements/expected/merging.out | 68 ++++++++++++++++++-
.../pg_stat_statements/pg_stat_statements.c | 36 +++++-----
contrib/pg_stat_statements/sql/merging.sql | 21 +++++-
doc/src/sgml/pgstatstatements.sgml | 23 ++++---
src/backend/nodes/queryjumblefuncs.c | 23 +++++--
src/backend/postmaster/launch_backend.c | 6 +-
src/include/nodes/queryjumble.h | 4 +-
7 files changed, 137 insertions(+), 44 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
index 0cb4f67b8b..552e248ff1 100644
--- a/contrib/pg_stat_statements/expected/merging.out
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -36,7 +36,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
(4 rows)
-- Normal scenario, too many simple constants for an IN query
-SET pg_stat_statements.query_id_const_merge = on;
+SET pg_stat_statements.query_id_const_merge_threshold = 1;
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
t
---
@@ -218,4 +218,68 @@ FROM cte;
--------
(0 rows)
-RESET pg_stat_statements.query_id_const_merge;
+-- With the threshold
+SET pg_stat_statements.query_id_const_merge_threshold = 10;
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 2
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+RESET pg_stat_statements.query_id_const_merge_threshold;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 1c35e10117..ae672fcead 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -265,8 +265,8 @@ static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
static ProcessUtility_hook_type prev_ProcessUtility = NULL;
-/* An assign hook to keep query_id_const_merge in sync */
-static void pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra);
+/* An assign hook to keep query_id_const_merge_threshold in sync */
+static void pgss_query_id_const_merge_assign_hook(int newvalue, void *extra);
/* Links to shared memory state */
static pgssSharedState *pgss = NULL;
@@ -295,8 +295,8 @@ static bool pgss_track_utility = true; /* whether to track utility commands */
static bool pgss_track_planning = false; /* whether to track planning
* duration */
static bool pgss_save = true; /* whether to save stats across shutdown */
-static bool pgss_query_id_const_merge = false; /* request constants merging
- * when computing query_id */
+static int pgss_query_id_const_merge_threshold = 0; /* request constants merging
+ * when computing query_id */
#define pgss_enabled(level) \
(!IsParallelWorker() && \
@@ -459,20 +459,22 @@ _PG_init(void)
NULL,
NULL);
- DefineCustomBoolVariable("pg_stat_statements.query_id_const_merge",
- "Whether to merge constants in a list when computing query_id.",
- NULL,
- &pgss_query_id_const_merge,
- false,
- PGC_SUSET,
- 0,
- NULL,
- pgss_query_id_const_merge_assign_hook,
- NULL);
+ DefineCustomIntVariable("pg_stat_statements.query_id_const_merge_threshold",
+ "Whether to merge constants in a list when computing query_id.",
+ NULL,
+ &pgss_query_id_const_merge_threshold,
+ 0,
+ 0,
+ INT_MAX,
+ PGC_SUSET,
+ 0,
+ NULL,
+ pgss_query_id_const_merge_assign_hook,
+ NULL);
MarkGUCPrefixReserved("pg_stat_statements");
- SetQueryIdConstMerge(pgss_query_id_const_merge);
+ SetQueryIdConstMerge(pgss_query_id_const_merge_threshold);
/*
* Install hooks.
@@ -3082,10 +3084,10 @@ comp_location(const void *a, const void *b)
}
/*
- * Notify query jumbling about query_id_const_merge status
+ * Notify query jumbling about query_id_const_merge_threshold status
*/
static void
-pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra)
+pgss_query_id_const_merge_assign_hook(int newvalue, void *extra)
{
SetQueryIdConstMerge(newvalue);
}
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
index 657044fade..fedeb35b8f 100644
--- a/contrib/pg_stat_statements/sql/merging.sql
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -15,7 +15,7 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-- Normal scenario, too many simple constants for an IN query
-SET pg_stat_statements.query_id_const_merge = on;
+SET pg_stat_statements.query_id_const_merge_threshold = 1;
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
SELECT * FROM test_merge WHERE id IN (1);
@@ -68,4 +68,21 @@ WITH cte AS (
SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
FROM cte;
-RESET pg_stat_statements.query_id_const_merge;
+-- With the threshold
+SET pg_stat_statements.query_id_const_merge_threshold = 10;
+
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+RESET pg_stat_statements.query_id_const_merge_threshold;
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 12ffd02190..c939c316a3 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -605,12 +605,12 @@
In some cases, queries with visibly different texts might get merged into a
single <structname>pg_stat_statements</structname> entry. Normally this
will happen only for semantically equivalent queries, or if
- <varname>pg_stat_statements.query_id_const_merge</varname> is enabled and
- the only difference between queries is the length of an array with constants
- they contain:
+ <varname>pg_stat_statements.query_id_const_merge_threshold</varname> is
+ enabled and the only difference between queries is the length of an array
+ with constants they contain:
<screen>
-=# SET query_id_const_merge = on;
+=# SET query_id_const_merge_threshold = 1;
=# SELECT pg_stat_statements_reset();
=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
@@ -959,9 +959,9 @@ calls | 1
<varlistentry>
<term>
- <varname>pg_stat_statements.query_id_const_merge</varname> (<type>bool</type>)
+ <varname>pg_stat_statements.query_id_const_merge_threshold</varname> (<type>integer</type>)
<indexterm>
- <primary><varname>pg_stat_statements.query_id_const_merge</varname> configuration parameter</primary>
+ <primary><varname>pg_stat_statements.query_id_const_merge_threshold</varname> configuration parameter</primary>
</indexterm>
</term>
@@ -973,11 +973,12 @@ calls | 1
query will get multiple different identifiers, one for each occurrence
with an array of different lenght.
- If this parameter is on, an array of constants will contribute only the
- first element, the last element and the number of elements to the query
- identifier. It means two occurences of the same query, where the only
- difference is number of constants in the array, are going to get the
- same query identifier if the arrays are of similar length.
+ If this parameter is greater than 0, an array with more than
+ <varname>pg_stat_statements.query_id_const_merge_threshold</varname>
+ constants will contribute only the first element, the last element
+ and the number of elements to the query identifier. It means two
+ occurences of the same query, where the only difference is number of
+ constants in the array, are going to get the same query identifier.
Such queries are represented in form <literal>'(... [10-99 entries])'</literal>.
The parameter could be used to reduce amount of repeating data stored
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index 1d3f36ca64..37a47072fb 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -44,8 +44,8 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
-/* Whether to merge constants in a list when computing query_id */
-bool query_id_const_merge = false;
+/* Lower threshold for the list length to merge constants when computing query_id */
+int query_id_const_merge_threshold = 1;
/*
* True when compute_query_id is ON or AUTO, and a module requests them.
@@ -165,12 +165,14 @@ EnableQueryId(void)
* Controls constants merging for query identifier computation.
*
* Third-party plugins can use this function to enable/disable merging
- * of constants in a list when query identifier is computed.
+ * of constants in a list when query identifier is computed. The argument
+ * specifies the lower threshold for an array length, above which merging will
+ * be applied.
*/
void
-SetQueryIdConstMerge(bool value)
+SetQueryIdConstMerge(int threshold)
{
- query_id_const_merge = value;
+ query_id_const_merge_threshold = threshold;
}
/*
@@ -248,7 +250,8 @@ RecordConstLocation(JumbleState *jstate, int location, int magnitude)
/*
* Verify if the provided list contains could be merged down, which means it
- * contains only constant expressions.
+ * contains only constant expressions and the list contains more than
+ * query_id_const_merge_threshold elements.
*
* Return value is the order of magnitude (i.e. how many digits it has) for
* length of the list (to use for representation purposes later on) if merging
@@ -266,12 +269,18 @@ IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
if (elements == NIL)
return 0;
- if (!query_id_const_merge)
+ if (query_id_const_merge_threshold < 1)
{
/* Merging is disabled, process everything one by one */
return 0;
}
+ if (elements->length < query_id_const_merge_threshold)
+ {
+ /* The list is not large enough */
+ return 0;
+ }
+
firstExpr = linitial(elements);
/*
diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c
index da3ceceddb..f8a232b6a2 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -122,7 +122,7 @@ typedef struct
bool redirection_done;
bool IsBinaryUpgrade;
bool query_id_enabled;
- bool query_id_const_merge;
+ int query_id_const_merge_threshold;
int max_safe_fds;
int MaxBackends;
#ifdef WIN32
@@ -731,7 +731,7 @@ save_backend_variables(BackendParameters *param, ClientSocket *client_sock,
param->redirection_done = redirection_done;
param->IsBinaryUpgrade = IsBinaryUpgrade;
param->query_id_enabled = query_id_enabled;
- param->query_id_const_merge = query_id_const_merge;
+ param->query_id_const_merge_threshold = query_id_const_merge_threshold;
param->max_safe_fds = max_safe_fds;
param->MaxBackends = MaxBackends;
@@ -991,7 +991,7 @@ restore_backend_variables(BackendParameters *param)
redirection_done = param->redirection_done;
IsBinaryUpgrade = param->IsBinaryUpgrade;
query_id_enabled = param->query_id_enabled;
- query_id_const_merge = param->query_id_const_merge;
+ query_id_const_merge_threshold = param->query_id_const_merge_threshold;
max_safe_fds = param->max_safe_fds;
MaxBackends = param->MaxBackends;
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 0e69e420b7..90218c6053 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -77,10 +77,10 @@ extern PGDLLIMPORT int compute_query_id;
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query);
extern void EnableQueryId(void);
-extern void SetQueryIdConstMerge(bool value);
+extern void SetQueryIdConstMerge(int threshold);
extern PGDLLIMPORT bool query_id_enabled;
-extern PGDLLIMPORT bool query_id_const_merge;
+extern PGDLLIMPORT int query_id_const_merge_threshold;
/*
* Returns whether query identifier computation has been enabled, either
--
2.45.1
On Wed, 14 Aug 2024 at 01:06, Dmitry Dolgov <9erthalion6@gmail.com> wrote:
On Sun, Aug 11, 2024 at 09:34:55PM GMT, Dmitry Dolgov wrote:
On Sun, Aug 11, 2024 at 07:54:05PM +0300, Sergei Kornilov wrote:
This feature will improve my monitoring. Even in patch 0001. I think there are many other people in the thread who think this is useful. So maybe we should move it forward? Any complaints about the overall design? I see in the discussion it was mentioned that it would be good to measure performance difference.
PS: patch cannot be applied at this time, it needs another rebase.
Yeah, it seems like most people are fine with the first patch and the
simplest approach. I'm going to post a rebased version and a short
thread summary soon.Ok, here is the rebased version. If anyone would like to review them, below is
the short summary of the thread. Currently the patch series contains 4 changes:* 0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patch
Implements the simplest way to handle constant arrays, if the array contains
only constants it will be reduced. This is the basis, if I read it correctly
Nathan and Michael expressed that they're mostly fine with this one.Michael seems to be skeptical about the "merged" flag in the LocationLen
struct, but from what I see the proposed alternative has problems as well.
There was also a note that the loop over constants has to be benchmarked, but
it's not entirely clear for me in which dimentions to benchmark (i.e. what
are the expectations). For both I'm waiting on a reply to my questions.* 0002-Reusable-decimalLength-functions.patch
A small refactoring to make already existing "powers" functonality reusable
for following patches.* 0003-Merge-constants-in-ArrayExpr-into-groups.patch
Makes handling of constant arrays smarter by taking into account number of
elements in the array. This way records are merged into groups power of 10,
i.e. arrays with length 55 will land in a group 10-99, with lenght 555 in a
group 100-999 etc. This was proposed by Alvaro, and personally I like this
approach, because it remediates the issue of one-size-fits-all for the static
threshold. But there are opinions that this introduces too much complexity.* 0004-Introduce-query_id_const_merge_threshold.patch
Fine tuning for the previous patch, makes only arrays with the length over a
certain threshold to be reduced.On top of that Yasuo Honda and Jakub Wartak have provided a couple of practical
examples, where handling of constant arrays has to be improved. David Geier
pointed out some examples that might be confusing as well. All those are
definitely worth addressing, but out of scope of this patch for now.
Hi! Can you please send a rebased version of this?
--
Best regards,
Kirill Reshke
On Thu, Nov 28, 2024 at 08:36:47PM GMT, Kirill Reshke wrote:
Hi! Can you please send a rebased version of this?
Sure, here it is.
Attachments:
v22-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/plain; charset=us-asciiDownload
From 2de1af6489d46449b2884a9194515cd1090d5e8c Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Tue, 3 Dec 2024 14:55:45 +0100
Subject: [PATCH v22 1/4] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on the number of parameters, because every element of
ArrayExpr is jumbled. In certain situations it's undesirable, especially
if the list becomes too large.
Make an array of Const expressions contribute only the first/last
elements to the jumble hash. Allow to enable this behavior via the new
pg_stat_statements parameter query_id_const_merge with the default value off.
Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane,
Michael Paquier, Sergei Kornilov, Alvaro Herrera, David Geier, Sutou Kouhei
Tested-by: Chengxi Sun, Yasuo Honda
---
contrib/pg_stat_statements/Makefile | 2 +-
.../pg_stat_statements/expected/merging.out | 167 ++++++++++++++++++
contrib/pg_stat_statements/meson.build | 1 +
.../pg_stat_statements/pg_stat_statements.c | 74 +++++++-
contrib/pg_stat_statements/sql/merging.sql | 58 ++++++
doc/src/sgml/pgstatstatements.sgml | 57 +++++-
src/backend/nodes/gen_node_support.pl | 21 ++-
src/backend/nodes/queryjumblefuncs.c | 107 ++++++++++-
src/backend/postmaster/launch_backend.c | 3 +
src/backend/utils/misc/postgresql.conf.sample | 1 -
src/include/nodes/nodes.h | 3 +
src/include/nodes/primnodes.h | 2 +-
src/include/nodes/queryjumble.h | 9 +-
13 files changed, 479 insertions(+), 26 deletions(-)
create mode 100644 contrib/pg_stat_statements/expected/merging.out
create mode 100644 contrib/pg_stat_statements/sql/merging.sql
diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile
index 241c02587b..eef8d69cc4 100644
--- a/contrib/pg_stat_statements/Makefile
+++ b/contrib/pg_stat_statements/Makefile
@@ -20,7 +20,7 @@ LDFLAGS_SL += $(filter -lm, $(LIBS))
REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_statements/pg_stat_statements.conf
REGRESS = select dml cursors utility level_tracking planning \
user_activity wal entry_timestamp privileges extended \
- parallel cleanup oldextversions
+ parallel cleanup oldextversions merging
# Disabled because these tests require "shared_preload_libraries=pg_stat_statements",
# which typical installcheck users do not have (e.g. buildfarm clients).
NO_INSTALLCHECK = 1
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
new file mode 100644
index 0000000000..1e58283afe
--- /dev/null
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -0,0 +1,167 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(4 rows)
+
+-- Normal scenario, too many simple constants for an IN query
+SET pg_stat_statements.query_id_const_merge = on;
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 4
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) and data = $3 | 3
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- No constants simplification
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 + $2, $3 + $4, $5 + $6, $7 + $8, $9 + $10, $11 + $12, $13 + $14, $15 + $16, $17 + $18) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Numeric type
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Test constants evaluation, verifies a tricky part to make sure there are no
+-- issues in the merging implementation
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result
+--------
+(0 rows)
+
+RESET pg_stat_statements.query_id_const_merge;
diff --git a/contrib/pg_stat_statements/meson.build b/contrib/pg_stat_statements/meson.build
index e659b5e2b7..12a0e757be 100644
--- a/contrib/pg_stat_statements/meson.build
+++ b/contrib/pg_stat_statements/meson.build
@@ -56,6 +56,7 @@ tests += {
'parallel',
'cleanup',
'oldextversions',
+ 'merging',
],
'regress_args': ['--temp-config', files('pg_stat_statements.conf')],
# Disabled because these tests require
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 49c657b3e0..bcbf1164c1 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -267,6 +267,9 @@ static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
static ProcessUtility_hook_type prev_ProcessUtility = NULL;
+/* An assign hook to keep query_id_const_merge in sync */
+static void pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra);
+
/* Links to shared memory state */
static pgssSharedState *pgss = NULL;
static HTAB *pgss_hash = NULL;
@@ -294,7 +297,8 @@ static bool pgss_track_utility = true; /* whether to track utility commands */
static bool pgss_track_planning = false; /* whether to track planning
* duration */
static bool pgss_save = true; /* whether to save stats across shutdown */
-
+static bool pgss_query_id_const_merge = false; /* request constants merging
+ * when computing query_id */
#define pgss_enabled(level) \
(!IsParallelWorker() && \
@@ -459,8 +463,21 @@ _PG_init(void)
NULL,
NULL);
+ DefineCustomBoolVariable("pg_stat_statements.query_id_const_merge",
+ "Whether to merge constants in a list when computing query_id.",
+ NULL,
+ &pgss_query_id_const_merge,
+ false,
+ PGC_SUSET,
+ 0,
+ NULL,
+ pgss_query_id_const_merge_assign_hook,
+ NULL);
+
MarkGUCPrefixReserved("pg_stat_statements");
+ SetQueryIdConstMerge(pgss_query_id_const_merge);
+
/*
* Install hooks.
*/
@@ -2810,6 +2827,10 @@ generate_normalized_query(JumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool merged_interval = false; /* Currently processed constants
+ belong to a merged constants
+ interval. */
+
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -2833,7 +2854,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
{
int off, /* Offset from start for cur tok */
tok_len; /* Length (in bytes) of that tok */
-
off = jstate->clocations[i].location;
/* Adjust recorded location if we're dealing with partial string */
off -= query_loc;
@@ -2848,13 +2868,44 @@ generate_normalized_query(JumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ /* Normal path, non merged constant */
+ if (!jstate->clocations[i].merged)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id);
+ /* In case previous constants were merged away, stop doing that */
+ merged_interval = false;
+ }
+ else if (!merged_interval)
+ {
+ /*
+ * We are not inside a merged interval yet, which means it is the
+ * the first merged constant.
+ *
+ * A merged constants interval must be represented via two
+ * constants with the merged flag. Currently we are at the first,
+ * verify there is another one.
+ */
+ Assert(i + 1 < jstate->clocations_count);
+ Assert(jstate->clocations[i + 1].merged);
+
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* Remember to skip until a non merged constant appears */
+ merged_interval = true;
+
+ /* Mark the interval in the normalized query */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ }
+
+ /* Otherwise the constant is merged away, move forward */
quer_loc = off + tok_len;
last_off = off;
last_tok_len = tok_len;
@@ -3012,3 +3063,12 @@ comp_location(const void *a, const void *b)
return pg_cmp_s32(l, r);
}
+
+/*
+ * Notify query jumbling about query_id_const_merge status
+ */
+static void
+pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra)
+{
+ SetQueryIdConstMerge(newvalue);
+}
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
new file mode 100644
index 0000000000..71985bb1cd
--- /dev/null
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -0,0 +1,58 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal scenario, too many simple constants for an IN query
+SET pg_stat_statements.query_id_const_merge = on;
+
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- No constants simplification
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+SELECT * FROM test_merge WHERE id IN (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Numeric type
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test constants evaluation, verifies a tricky part to make sure there are no
+-- issues in the merging implementation
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+RESET pg_stat_statements.query_id_const_merge;
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 501b468e9a..2276783786 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -621,11 +621,29 @@
<para>
In some cases, queries with visibly different texts might get merged into a
- single <structname>pg_stat_statements</structname> entry. Normally this will happen
- only for semantically equivalent queries, but there is a small chance of
- hash collisions causing unrelated queries to be merged into one entry.
- (This cannot happen for queries belonging to different users or databases,
- however.)
+ single <structname>pg_stat_statements</structname> entry. Normally this
+ will happen only for semantically equivalent queries, or if
+ <varname>pg_stat_statements.query_id_const_merge</varname> is enabled and
+ the only difference between queries is the length of an array with constants
+ they contain:
+
+<screen>
+=# SET query_id_const_merge = on;
+=# SELECT pg_stat_statements_reset();
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
+=# SELECT query, calls FROM pg_stat_statements;
+-[ RECORD 1 ]------------------------------
+query | SELECT * FROM test WHERE a IN (...)
+calls | 2
+-[ RECORD 2 ]------------------------------
+query | SELECT pg_stat_statements_reset()
+calls | 1
+</screen>
+
+ But there is a small chance of hash collisions causing unrelated queries to
+ be merged into one entry. (This cannot happen for queries belonging to
+ different users or databases, however.)
</para>
<para>
@@ -956,6 +974,35 @@
</para>
</listitem>
</varlistentry>
+
+ <varlistentry>
+ <term>
+ <varname>pg_stat_statements.query_id_const_merge</varname> (<type>bool</type>)
+ <indexterm>
+ <primary><varname>pg_stat_statements.query_id_const_merge</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+
+ <listitem>
+ <para>
+ Specifies how an array of constants (e.g. for an "IN" clause)
+ contributes to the query identifier computation. Normally every element
+ of an array contributes to the query identifier, which means the same
+ query will get multiple different identifiers, one for each occurrence
+ with an array of different lenght.
+
+ If this parameter is on, an array of constants will contribute only the
+ first and the last elements to the query identifier. It means two
+ occurences of the same query, where the only difference is number of
+ constants in the array, are going to get the same query identifier.
+ Such queries are represented in form <literal>'(...)'</literal>.
+
+ The parameter could be used to reduce amount of repeating data stored
+ via <structname>pg_stat_statements</structname>. The default value is off.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
<para>
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 81df3bdf95..d2a276c303 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -475,6 +475,7 @@ foreach my $infile (@ARGV)
equal_ignore_if_zero
query_jumble_ignore
query_jumble_location
+ query_jumble_merge
read_write_ignore
write_only_relids
write_only_nondefault_pathtarget
@@ -1282,6 +1283,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
my @a = @{ $node_type_info{$n}->{field_attrs}{$f} };
my $query_jumble_ignore = $struct_no_query_jumble;
my $query_jumble_location = 0;
+ my $query_jumble_merge = 0;
# extract per-field attributes
foreach my $a (@a)
@@ -1294,21 +1296,34 @@ _jumble${n}(JumbleState *jstate, Node *node)
{
$query_jumble_location = 1;
}
+ elsif ($a eq 'query_jumble_merge')
+ {
+ $query_jumble_merge = 1;
+ }
}
# node type
if (($t =~ /^(\w+)\*$/ or $t =~ /^struct\s+(\w+)\*$/)
and elem $1, @node_types)
{
- print $jff "\tJUMBLE_NODE($f);\n"
- unless $query_jumble_ignore;
+ # Merge constants if requested.
+ if ($query_jumble_merge)
+ {
+ print $jff "\tJUMBLE_ELEMENTS($f);\n"
+ unless $query_jumble_ignore;
+ }
+ else
+ {
+ print $jff "\tJUMBLE_NODE($f);\n"
+ unless $query_jumble_ignore;
+ }
}
elsif ($t eq 'ParseLoc')
{
# Track the node's location only if directly requested.
if ($query_jumble_location)
{
- print $jff "\tJUMBLE_LOCATION($f);\n"
+ print $jff "\tJUMBLE_LOCATION($f, false);\n"
unless $query_jumble_ignore;
}
}
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index e8bf95690b..2953073872 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -42,6 +42,9 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+/* Whether to merge constants in a list when computing query_id */
+bool query_id_const_merge = false;
+
/*
* True when compute_query_id is ON or AUTO, and a module requests them.
*
@@ -53,8 +56,10 @@ bool query_id_enabled = false;
static void AppendJumble(JumbleState *jstate,
const unsigned char *item, Size size);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void RecordConstLocation(JumbleState *jstate,
+ int location, bool merged);
static void _jumbleNode(JumbleState *jstate, Node *node);
+static void _jumbleElements(JumbleState *jstate, List *elements);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
static void _jumbleList(JumbleState *jstate, Node *node);
static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
@@ -160,6 +165,18 @@ EnableQueryId(void)
query_id_enabled = true;
}
+/*
+ * Controls constants merging for query identifier computation.
+ *
+ * Third-party plugins can use this function to enable/disable merging
+ * of constants in a list when query identifier is computed.
+ */
+void
+SetQueryIdConstMerge(bool value)
+{
+ query_id_const_merge = value;
+}
+
/*
* AppendJumble: Append a value that is substantive in a given query to
* the current jumble.
@@ -198,11 +215,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
}
/*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked.
+ *
+ * Merged argument signals that the constant represents the first or the last
+ * element in a series of merged constants, and everything but the first/last
+ * element contributes nothing to the jumble hash.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -218,15 +239,67 @@ RecordConstLocation(JumbleState *jstate, int location)
}
jstate->clocations[jstate->clocations_count].location = location;
/* initialize lengths to -1 to simplify third-party module usage */
+ jstate->clocations[jstate->clocations_count].merged = merged;
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
}
}
+/*
+ * Verify if the provided list contains could be merged down, which means it
+ * contains only constant expressions.
+ *
+ * Return value indicates if merging is possible.
+ *
+ * Note that this function searches only for explicit Const nodes and does not
+ * try to simplify expressions.
+ */
+static bool
+IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
+{
+ ListCell *temp;
+ Node *firstExpr = NULL;
+
+ if (elements == NIL)
+ return false;
+
+ if (!query_id_const_merge)
+ {
+ /* Merging is disabled, process everything one by one */
+ return false;
+ }
+
+ firstExpr = linitial(elements);
+
+ /*
+ * If the first expression is a constant, verify if the following elements
+ * are constants as well. If yes, the list is eligible for merging, and the
+ * order of magnitude need to be calculated.
+ */
+ if (IsA(firstExpr, Const))
+ {
+ foreach(temp, elements)
+ if (!IsA(lfirst(temp), Const))
+ return false;
+
+ *firstConst = (Const *) firstExpr;
+ *lastConst = llast_node(Const, elements);
+ return true;
+ }
+
+ /*
+ * If we end up here, it means no constants merging is possible, process
+ * the list as usual.
+ */
+ return false;
+}
+
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
-#define JUMBLE_LOCATION(location) \
- RecordConstLocation(jstate, expr->location)
+#define JUMBLE_ELEMENTS(list) \
+ _jumbleElements(jstate, (List *) expr->list)
+#define JUMBLE_LOCATION(location, merged) \
+ RecordConstLocation(jstate, expr->location, merged)
#define JUMBLE_FIELD(item) \
AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
#define JUMBLE_FIELD_SINGLE(item) \
@@ -239,6 +312,26 @@ do { \
#include "queryjumblefuncs.funcs.c"
+static void
+_jumbleElements(JumbleState *jstate, List *elements)
+{
+ Const *first, *last;
+ if (IsMergeableConstList(elements, &first, &last))
+ {
+ /*
+ * Both first and last constants have to be recorded. The first one
+ * will indicate the merged interval, the last one will tell us the
+ * length of the interval within the query text.
+ */
+ RecordConstLocation(jstate, first->location, true);
+ RecordConstLocation(jstate, last->location, true);
+ }
+ else
+ {
+ _jumbleNode(jstate, (Node *) elements);
+ }
+}
+
static void
_jumbleNode(JumbleState *jstate, Node *node)
{
@@ -375,5 +468,5 @@ _jumbleVariableSetStmt(JumbleState *jstate, Node *node)
if (expr->jumble_args)
JUMBLE_NODE(args);
JUMBLE_FIELD(is_local);
- JUMBLE_LOCATION(location);
+ JUMBLE_LOCATION(location, false);
}
diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c
index 1f2d829ec5..0e2761edd5 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -115,6 +115,7 @@ typedef struct
bool redirection_done;
bool IsBinaryUpgrade;
bool query_id_enabled;
+ bool query_id_const_merge;
int max_safe_fds;
int MaxBackends;
int num_pmchild_slots;
@@ -744,6 +745,7 @@ save_backend_variables(BackendParameters *param,
param->redirection_done = redirection_done;
param->IsBinaryUpgrade = IsBinaryUpgrade;
param->query_id_enabled = query_id_enabled;
+ param->query_id_const_merge = query_id_const_merge;
param->max_safe_fds = max_safe_fds;
param->MaxBackends = MaxBackends;
@@ -1004,6 +1006,7 @@ restore_backend_variables(BackendParameters *param)
redirection_done = param->redirection_done;
IsBinaryUpgrade = param->IsBinaryUpgrade;
query_id_enabled = param->query_id_enabled;
+ query_id_const_merge = param->query_id_const_merge;
max_safe_fds = param->max_safe_fds;
MaxBackends = param->MaxBackends;
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index a2ac7575ca..d46fc74daf 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -654,7 +654,6 @@
#log_planner_stats = off
#log_executor_stats = off
-
#------------------------------------------------------------------------------
# AUTOVACUUM
#------------------------------------------------------------------------------
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index c1ab3d1358..4df2226cc0 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -108,6 +108,9 @@ typedef enum NodeTag
* - query_jumble_location: Mark the field as a location to track. This is
* only allowed for integer fields that include "location" in their name.
*
+ * - query_jumble_merge: Allow to merge the field values for the query
+ * jumbling.
+ *
* - read_as(VALUE): In nodeRead(), replace the field's value with VALUE.
*
* - read_write_ignore: Ignore the field for read/write. This is only allowed
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index b0ef1952e8..4c9a0d1e88 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1377,7 +1377,7 @@ typedef struct ArrayExpr
/* common type of array elements */
Oid element_typeid pg_node_attr(query_jumble_ignore);
/* the array elements or sub-arrays */
- List *elements;
+ List *elements pg_node_attr(query_jumble_merge);
/* true if elements are sub-arrays */
bool multidims pg_node_attr(query_jumble_ignore);
/* token location, or -1 if unknown */
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index f1c55c8067..8daf0725d7 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -23,6 +23,12 @@ typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+
+ /*
+ * Indicates the constant represents the beginning or the end of a merged
+ * constants interval.
+ */
+ bool merged;
} LocationLen;
/*
@@ -62,12 +68,13 @@ enum ComputeQueryIdType
/* GUC parameters */
extern PGDLLIMPORT int compute_query_id;
-
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query);
extern void EnableQueryId(void);
+extern void SetQueryIdConstMerge(bool value);
extern PGDLLIMPORT bool query_id_enabled;
+extern PGDLLIMPORT bool query_id_const_merge;
/*
* Returns whether query identifier computation has been enabled, either
base-commit: 1acf10549e64c6a52ced570d712fcba1a2f5d1ec
--
2.45.1
v22-0002-Reusable-decimalLength-functions.patchtext/plain; charset=us-asciiDownload
From c692cdae79c7d319f08578d0e00f2e6f4663f3d2 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Wed, 3 Apr 2024 20:03:08 +0200
Subject: [PATCH v22 2/4] Reusable decimalLength functions
Move out decimalLength functions to reuse in the following patch.
Reviewed-by: Sutou Kouhei
Tested-by: Yasuo Honda
---
src/backend/utils/adt/numutils.c | 48 +----------------------
src/include/utils/numutils.h | 67 ++++++++++++++++++++++++++++++++
2 files changed, 68 insertions(+), 47 deletions(-)
create mode 100644 src/include/utils/numutils.h
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index 63c2beb6a2..b536778dad 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -21,6 +21,7 @@
#include "common/int.h"
#include "port/pg_bitutils.h"
#include "utils/builtins.h"
+#include "utils/numutils.h"
/*
* A table of all two-digit numbers. This is used to speed up decimal digit
@@ -38,53 +39,6 @@ static const char DIGIT_TABLE[200] =
"80" "81" "82" "83" "84" "85" "86" "87" "88" "89"
"90" "91" "92" "93" "94" "95" "96" "97" "98" "99";
-/*
- * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
- */
-static inline int
-decimalLength32(const uint32 v)
-{
- int t;
- static const uint32 PowersOfTen[] = {
- 1, 10, 100,
- 1000, 10000, 100000,
- 1000000, 10000000, 100000000,
- 1000000000
- };
-
- /*
- * Compute base-10 logarithm by dividing the base-2 logarithm by a
- * good-enough approximation of the base-2 logarithm of 10
- */
- t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
- return t + (v >= PowersOfTen[t]);
-}
-
-static inline int
-decimalLength64(const uint64 v)
-{
- int t;
- static const uint64 PowersOfTen[] = {
- UINT64CONST(1), UINT64CONST(10),
- UINT64CONST(100), UINT64CONST(1000),
- UINT64CONST(10000), UINT64CONST(100000),
- UINT64CONST(1000000), UINT64CONST(10000000),
- UINT64CONST(100000000), UINT64CONST(1000000000),
- UINT64CONST(10000000000), UINT64CONST(100000000000),
- UINT64CONST(1000000000000), UINT64CONST(10000000000000),
- UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
- UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
- UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
- };
-
- /*
- * Compute base-10 logarithm by dividing the base-2 logarithm by a
- * good-enough approximation of the base-2 logarithm of 10
- */
- t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
- return t + (v >= PowersOfTen[t]);
-}
-
static const int8 hexlookup[128] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
diff --git a/src/include/utils/numutils.h b/src/include/utils/numutils.h
new file mode 100644
index 0000000000..876e64f2df
--- /dev/null
+++ b/src/include/utils/numutils.h
@@ -0,0 +1,67 @@
+/*-------------------------------------------------------------------------
+ *
+ * numutils.h
+ * Decimal length functions for numutils.c
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/utils/numutils.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef NUMUTILS_H
+#define NUMUTILS_H
+
+#include "common/int.h"
+#include "port/pg_bitutils.h"
+
+/*
+ * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
+ */
+static inline int
+decimalLength32(const uint32 v)
+{
+ int t;
+ static const uint32 PowersOfTen[] = {
+ 1, 10, 100,
+ 1000, 10000, 100000,
+ 1000000, 10000000, 100000000,
+ 1000000000
+ };
+
+ /*
+ * Compute base-10 logarithm by dividing the base-2 logarithm by a
+ * good-enough approximation of the base-2 logarithm of 10
+ */
+ t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
+ return t + (v >= PowersOfTen[t]);
+}
+
+static inline int
+decimalLength64(const uint64 v)
+{
+ int t;
+ static const uint64 PowersOfTen[] = {
+ UINT64CONST(1), UINT64CONST(10),
+ UINT64CONST(100), UINT64CONST(1000),
+ UINT64CONST(10000), UINT64CONST(100000),
+ UINT64CONST(1000000), UINT64CONST(10000000),
+ UINT64CONST(100000000), UINT64CONST(1000000000),
+ UINT64CONST(10000000000), UINT64CONST(100000000000),
+ UINT64CONST(1000000000000), UINT64CONST(10000000000000),
+ UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
+ UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
+ UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
+ };
+
+ /*
+ * Compute base-10 logarithm by dividing the base-2 logarithm by a
+ * good-enough approximation of the base-2 logarithm of 10
+ */
+ t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
+ return t + (v >= PowersOfTen[t]);
+}
+
+#endif /* NUMUTILS_H */
--
2.45.1
v22-0003-Merge-constants-in-ArrayExpr-into-groups.patchtext/plain; charset=us-asciiDownload
From 800bfebec09d9876d28d3b24a3eaa256ff99787b Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Sun, 12 May 2024 11:51:10 +0200
Subject: [PATCH v22 3/4] Merge constants in ArrayExpr into groups
Using query_id_const_merge only first/last element in an ArrayExpr will
be used to compute query id. Extend this to take into account number of
elements, and merge constants into groups based on it. Resulting groups
are powers of 10, i.e. 1 to 9, 10 to 99, etc.
Reviewed-by: Sutou Kouhei
Tested-by: Yasuo Honda
---
.../pg_stat_statements/expected/merging.out | 84 +++++++++++++++----
.../pg_stat_statements/pg_stat_statements.c | 29 +++++--
contrib/pg_stat_statements/sql/merging.sql | 13 +++
doc/src/sgml/pgstatstatements.sgml | 11 +--
src/backend/nodes/gen_node_support.pl | 2 +-
src/backend/nodes/queryjumblefuncs.c | 55 ++++++++----
src/include/nodes/queryjumble.h | 10 ++-
7 files changed, 158 insertions(+), 46 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
index 1e58283afe..0cb4f67b8b 100644
--- a/contrib/pg_stat_statements/expected/merging.out
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -54,11 +54,11 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3);
(0 rows)
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
- query | calls
-----------------------------------------------------+-------
- SELECT * FROM test_merge WHERE id IN ($1) | 1
- SELECT * FROM test_merge WHERE id IN (...) | 1
- SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ query | calls
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
(3 rows)
SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
@@ -80,7 +80,60 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
query | calls
------------------------------------------------------------------------+-------
SELECT * FROM test_merge WHERE id IN ($1) | 1
- SELECT * FROM test_merge WHERE id IN (...) | 4
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 2
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 2
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(5 rows)
+
+-- Second order of magnitude, brace yourself
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+--------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [100-999 entries]) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 1
SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
(4 rows)
@@ -108,11 +161,12 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and dat
(0 rows)
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
- query | calls
-----------------------------------------------------------+-------
- SELECT * FROM test_merge WHERE id IN (...) and data = $3 | 3
- SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
-(2 rows)
+ query | calls
+--------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) and data = $3 | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) and data = $3 | 2
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
-- No constants simplification
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
@@ -147,10 +201,10 @@ SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
(0 rows)
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
- query | calls
-----------------------------------------------------+-------
- SELECT * FROM test_merge_numeric WHERE id IN (...) | 1
- SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ query | calls
+--------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (... [10-99 entries]) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
(2 rows)
-- Test constants evaluation, verifies a tricky part to make sure there are no
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index bcbf1164c1..24ab2a45ff 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2830,6 +2830,8 @@ generate_normalized_query(JumbleState *jstate, const char *query,
bool merged_interval = false; /* Currently processed constants
belong to a merged constants
interval. */
+ int magnitude; /* Order of magnitute for number of merged
+ constants */
/*
@@ -2844,8 +2846,13 @@ generate_normalized_query(JumbleState *jstate, const char *query,
* certainly isn't more than 11 bytes, even if n reaches INT_MAX. We
* could refine that limit based on the max value of n for the current
* query, but it hardly seems worth any extra effort to do so.
+ *
+ * On top of that, each pair of $n symbols representing a merged constants
+ * interval will be decorated with the explanationary text, adding 14
+ * bytes.
*/
- norm_query_buflen = query_len + jstate->clocations_count * 10;
+ norm_query_buflen = query_len + jstate->clocations_count * 10 +
+ jstate->clocations_merged_count * 14;
/* Allocate result buffer */
norm_query = palloc(norm_query_buflen + 1);
@@ -2870,7 +2877,8 @@ generate_normalized_query(JumbleState *jstate, const char *query,
Assert(len_to_wrt >= 0);
/* Normal path, non merged constant */
- if (!jstate->clocations[i].merged)
+ magnitude = jstate->clocations[i].magnitude;
+ if (magnitude == 0)
{
memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
n_quer_loc += len_to_wrt;
@@ -2887,13 +2895,23 @@ generate_normalized_query(JumbleState *jstate, const char *query,
/*
* We are not inside a merged interval yet, which means it is the
* the first merged constant.
- *
+ */
+ static const uint32 powers_of_ten[] = {
+ 1, 10, 100,
+ 1000, 10000, 100000,
+ 1000000, 10000000, 100000000,
+ 1000000000
+ };
+ int lower_merged = powers_of_ten[magnitude - 1];
+ int upper_merged = powers_of_ten[magnitude];
+
+ /*
* A merged constants interval must be represented via two
* constants with the merged flag. Currently we are at the first,
* verify there is another one.
*/
Assert(i + 1 < jstate->clocations_count);
- Assert(jstate->clocations[i + 1].merged);
+ Assert(jstate->clocations[i + 1].magnitude > 0);
memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
n_quer_loc += len_to_wrt;
@@ -2902,7 +2920,8 @@ generate_normalized_query(JumbleState *jstate, const char *query,
merged_interval = true;
/* Mark the interval in the normalized query */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "... [%d-%d entries]",
+ lower_merged, upper_merged - 1);
}
/* Otherwise the constant is merged away, move forward */
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
index 71985bb1cd..657044fade 100644
--- a/contrib/pg_stat_statements/sql/merging.sql
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -27,6 +27,19 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+-- Second order of magnitude, brace yourself
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
-- More conditions in the query
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 2276783786..55dfa5b50f 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -634,7 +634,7 @@
=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
=# SELECT query, calls FROM pg_stat_statements;
-[ RECORD 1 ]------------------------------
-query | SELECT * FROM test WHERE a IN (...)
+query | SELECT * FROM test WHERE a IN (... [10-99 entries])
calls | 2
-[ RECORD 2 ]------------------------------
query | SELECT pg_stat_statements_reset()
@@ -992,10 +992,11 @@ calls | 1
with an array of different lenght.
If this parameter is on, an array of constants will contribute only the
- first and the last elements to the query identifier. It means two
- occurences of the same query, where the only difference is number of
- constants in the array, are going to get the same query identifier.
- Such queries are represented in form <literal>'(...)'</literal>.
+ first element, the last element and the number of elements to the query
+ identifier. It means two occurences of the same query, where the only
+ difference is number of constants in the array, are going to get the
+ same query identifier if the arrays are of similar length.
+ Such queries are represented in form <literal>'(... [10-99 entries])'</literal>.
The parameter could be used to reduce amount of repeating data stored
via <structname>pg_stat_statements</structname>. The default value is off.
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index d2a276c303..c98d121dba 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -1323,7 +1323,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
# Track the node's location only if directly requested.
if ($query_jumble_location)
{
- print $jff "\tJUMBLE_LOCATION($f, false);\n"
+ print $jff "\tJUMBLE_LOCATION($f, 0);\n"
unless $query_jumble_ignore;
}
}
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index 2953073872..8ab3261d66 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -37,6 +37,8 @@
#include "nodes/queryjumble.h"
#include "parser/scansup.h"
+#include "utils/numutils.h"
+
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
/* GUC parameters */
@@ -57,7 +59,7 @@ bool query_id_enabled = false;
static void AppendJumble(JumbleState *jstate,
const unsigned char *item, Size size);
static void RecordConstLocation(JumbleState *jstate,
- int location, bool merged);
+ int location, int magnitude);
static void _jumbleNode(JumbleState *jstate, Node *node);
static void _jumbleElements(JumbleState *jstate, List *elements);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
@@ -129,6 +131,7 @@ JumbleQuery(Query *query)
jstate->clocations = (LocationLen *)
palloc(jstate->clocations_buf_size * sizeof(LocationLen));
jstate->clocations_count = 0;
+ jstate->clocations_merged_count = 0;
jstate->highest_extern_param_id = 0;
/* Compute query ID and mark the Query node with it */
@@ -218,12 +221,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
* Record location of constant within query string of query tree that is
* currently being walked.
*
- * Merged argument signals that the constant represents the first or the last
- * element in a series of merged constants, and everything but the first/last
- * element contributes nothing to the jumble hash.
+ * Magnitude argument larger than zero signals that the constant represents the
+ * first or the last element in a series of merged constants, and everything
+ * but such first/last element will contribute nothing to the jumble hash. The
+ * magnitute value specifies order of magnitute (i.e. how many digits it has)
+ * for the number of elements in the series, to represent the fact of merging
+ * later on.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location, bool merged)
+RecordConstLocation(JumbleState *jstate, int location, int magnitude)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -238,10 +244,12 @@ RecordConstLocation(JumbleState *jstate, int location, bool merged)
sizeof(LocationLen));
}
jstate->clocations[jstate->clocations_count].location = location;
+ jstate->clocations[jstate->clocations_count].magnitude = magnitude;
/* initialize lengths to -1 to simplify third-party module usage */
- jstate->clocations[jstate->clocations_count].merged = merged;
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
+ if (magnitude > 0)
+ jstate->clocations_merged_count++;
}
}
@@ -249,24 +257,26 @@ RecordConstLocation(JumbleState *jstate, int location, bool merged)
* Verify if the provided list contains could be merged down, which means it
* contains only constant expressions.
*
- * Return value indicates if merging is possible.
+ * Return value is the order of magnitude (i.e. how many digits it has) for
+ * length of the list (to use for representation purposes later on) if merging
+ * is possible, otherwise zero.
*
* Note that this function searches only for explicit Const nodes and does not
* try to simplify expressions.
*/
-static bool
+static int
IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
{
ListCell *temp;
Node *firstExpr = NULL;
if (elements == NIL)
- return false;
+ return 0;
if (!query_id_const_merge)
{
/* Merging is disabled, process everything one by one */
- return false;
+ return 0;
}
firstExpr = linitial(elements);
@@ -280,26 +290,26 @@ IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
{
foreach(temp, elements)
if (!IsA(lfirst(temp), Const))
- return false;
+ return 0;
*firstConst = (Const *) firstExpr;
*lastConst = llast_node(Const, elements);
- return true;
+ return decimalLength32(elements->length);
}
/*
* If we end up here, it means no constants merging is possible, process
* the list as usual.
*/
- return false;
+ return 0;
}
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
#define JUMBLE_ELEMENTS(list) \
_jumbleElements(jstate, (List *) expr->list)
-#define JUMBLE_LOCATION(location, merged) \
- RecordConstLocation(jstate, expr->location, merged)
+#define JUMBLE_LOCATION(location, magnitude) \
+ RecordConstLocation(jstate, expr->location, magnitude)
#define JUMBLE_FIELD(item) \
AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
#define JUMBLE_FIELD_SINGLE(item) \
@@ -316,15 +326,24 @@ static void
_jumbleElements(JumbleState *jstate, List *elements)
{
Const *first, *last;
- if (IsMergeableConstList(elements, &first, &last))
+ int magnitude = IsMergeableConstList(elements, &first, &last);
+
+ if (magnitude)
{
/*
* Both first and last constants have to be recorded. The first one
* will indicate the merged interval, the last one will tell us the
* length of the interval within the query text.
*/
- RecordConstLocation(jstate, first->location, true);
- RecordConstLocation(jstate, last->location, true);
+ RecordConstLocation(jstate, first->location, magnitude);
+ RecordConstLocation(jstate, last->location, magnitude);
+
+ /*
+ * After merging constants down we end up with only two constants, the
+ * first and the last one. To distinguish the order of magnitute behind
+ * merged constants, add its value into the jumble.
+ */
+ JUMBLE_FIELD_SINGLE(magnitude);
}
else
{
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 8daf0725d7..0e69e420b7 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -26,9 +26,12 @@ typedef struct LocationLen
/*
* Indicates the constant represents the beginning or the end of a merged
- * constants interval.
+ * constants interval. The value shows how many constants were merged away
+ * (up to a power of 10), or in other words the order of manitude for
+ * number of merged constants (i.e. how many digits it has). Otherwise the
+ * value is 0, indicating that no merging was performed.
*/
- bool merged;
+ int magnitude;
} LocationLen;
/*
@@ -52,6 +55,9 @@ typedef struct JumbleState
/* Current number of valid entries in clocations array */
int clocations_count;
+ /* Current number of entries with merged constants interval */
+ int clocations_merged_count;
+
/* highest Param id we've seen, in order to start normalization correctly */
int highest_extern_param_id;
} JumbleState;
--
2.45.1
v22-0004-Introduce-query_id_const_merge_threshold.patchtext/plain; charset=us-asciiDownload
From 14d755bdcdd73916a7f4879a6b856efa9654fba5 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Wed, 3 Apr 2024 20:03:45 +0200
Subject: [PATCH v22 4/4] Introduce query_id_const_merge_threshold
Replace query_id_const_merge with a threshold to allow merging only if
the number of elements is larger than specified value, which could be
configured using pg_stat_statements parameter query_id_const_merge_threshold.
Reviewed-by: Sutou Kouhei
Tested-by: Yasuo Honda
---
.../pg_stat_statements/expected/merging.out | 68 ++++++++++++++++++-
.../pg_stat_statements/pg_stat_statements.c | 36 +++++-----
contrib/pg_stat_statements/sql/merging.sql | 21 +++++-
doc/src/sgml/pgstatstatements.sgml | 23 ++++---
src/backend/nodes/queryjumblefuncs.c | 23 +++++--
src/backend/postmaster/launch_backend.c | 6 +-
src/include/nodes/queryjumble.h | 4 +-
7 files changed, 137 insertions(+), 44 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
index 0cb4f67b8b..552e248ff1 100644
--- a/contrib/pg_stat_statements/expected/merging.out
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -36,7 +36,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
(4 rows)
-- Normal scenario, too many simple constants for an IN query
-SET pg_stat_statements.query_id_const_merge = on;
+SET pg_stat_statements.query_id_const_merge_threshold = 1;
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
t
---
@@ -218,4 +218,68 @@ FROM cte;
--------
(0 rows)
-RESET pg_stat_statements.query_id_const_merge;
+-- With the threshold
+SET pg_stat_statements.query_id_const_merge_threshold = 10;
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 2
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+RESET pg_stat_statements.query_id_const_merge_threshold;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 24ab2a45ff..61b1c4ea30 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -267,8 +267,8 @@ static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
static ProcessUtility_hook_type prev_ProcessUtility = NULL;
-/* An assign hook to keep query_id_const_merge in sync */
-static void pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra);
+/* An assign hook to keep query_id_const_merge_threshold in sync */
+static void pgss_query_id_const_merge_assign_hook(int newvalue, void *extra);
/* Links to shared memory state */
static pgssSharedState *pgss = NULL;
@@ -297,8 +297,8 @@ static bool pgss_track_utility = true; /* whether to track utility commands */
static bool pgss_track_planning = false; /* whether to track planning
* duration */
static bool pgss_save = true; /* whether to save stats across shutdown */
-static bool pgss_query_id_const_merge = false; /* request constants merging
- * when computing query_id */
+static int pgss_query_id_const_merge_threshold = 0; /* request constants merging
+ * when computing query_id */
#define pgss_enabled(level) \
(!IsParallelWorker() && \
@@ -463,20 +463,22 @@ _PG_init(void)
NULL,
NULL);
- DefineCustomBoolVariable("pg_stat_statements.query_id_const_merge",
- "Whether to merge constants in a list when computing query_id.",
- NULL,
- &pgss_query_id_const_merge,
- false,
- PGC_SUSET,
- 0,
- NULL,
- pgss_query_id_const_merge_assign_hook,
- NULL);
+ DefineCustomIntVariable("pg_stat_statements.query_id_const_merge_threshold",
+ "Whether to merge constants in a list when computing query_id.",
+ NULL,
+ &pgss_query_id_const_merge_threshold,
+ 0,
+ 0,
+ INT_MAX,
+ PGC_SUSET,
+ 0,
+ NULL,
+ pgss_query_id_const_merge_assign_hook,
+ NULL);
MarkGUCPrefixReserved("pg_stat_statements");
- SetQueryIdConstMerge(pgss_query_id_const_merge);
+ SetQueryIdConstMerge(pgss_query_id_const_merge_threshold);
/*
* Install hooks.
@@ -3084,10 +3086,10 @@ comp_location(const void *a, const void *b)
}
/*
- * Notify query jumbling about query_id_const_merge status
+ * Notify query jumbling about query_id_const_merge_threshold status
*/
static void
-pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra)
+pgss_query_id_const_merge_assign_hook(int newvalue, void *extra)
{
SetQueryIdConstMerge(newvalue);
}
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
index 657044fade..fedeb35b8f 100644
--- a/contrib/pg_stat_statements/sql/merging.sql
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -15,7 +15,7 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-- Normal scenario, too many simple constants for an IN query
-SET pg_stat_statements.query_id_const_merge = on;
+SET pg_stat_statements.query_id_const_merge_threshold = 1;
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
SELECT * FROM test_merge WHERE id IN (1);
@@ -68,4 +68,21 @@ WITH cte AS (
SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
FROM cte;
-RESET pg_stat_statements.query_id_const_merge;
+-- With the threshold
+SET pg_stat_statements.query_id_const_merge_threshold = 10;
+
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+RESET pg_stat_statements.query_id_const_merge_threshold;
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 55dfa5b50f..8d107e7706 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -623,12 +623,12 @@
In some cases, queries with visibly different texts might get merged into a
single <structname>pg_stat_statements</structname> entry. Normally this
will happen only for semantically equivalent queries, or if
- <varname>pg_stat_statements.query_id_const_merge</varname> is enabled and
- the only difference between queries is the length of an array with constants
- they contain:
+ <varname>pg_stat_statements.query_id_const_merge_threshold</varname> is
+ enabled and the only difference between queries is the length of an array
+ with constants they contain:
<screen>
-=# SET query_id_const_merge = on;
+=# SET query_id_const_merge_threshold = 1;
=# SELECT pg_stat_statements_reset();
=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
@@ -977,9 +977,9 @@ calls | 1
<varlistentry>
<term>
- <varname>pg_stat_statements.query_id_const_merge</varname> (<type>bool</type>)
+ <varname>pg_stat_statements.query_id_const_merge_threshold</varname> (<type>integer</type>)
<indexterm>
- <primary><varname>pg_stat_statements.query_id_const_merge</varname> configuration parameter</primary>
+ <primary><varname>pg_stat_statements.query_id_const_merge_threshold</varname> configuration parameter</primary>
</indexterm>
</term>
@@ -991,11 +991,12 @@ calls | 1
query will get multiple different identifiers, one for each occurrence
with an array of different lenght.
- If this parameter is on, an array of constants will contribute only the
- first element, the last element and the number of elements to the query
- identifier. It means two occurences of the same query, where the only
- difference is number of constants in the array, are going to get the
- same query identifier if the arrays are of similar length.
+ If this parameter is greater than 0, an array with more than
+ <varname>pg_stat_statements.query_id_const_merge_threshold</varname>
+ constants will contribute only the first element, the last element
+ and the number of elements to the query identifier. It means two
+ occurences of the same query, where the only difference is number of
+ constants in the array, are going to get the same query identifier.
Such queries are represented in form <literal>'(... [10-99 entries])'</literal>.
The parameter could be used to reduce amount of repeating data stored
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index 8ab3261d66..88a94be933 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -44,8 +44,8 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
-/* Whether to merge constants in a list when computing query_id */
-bool query_id_const_merge = false;
+/* Lower threshold for the list length to merge constants when computing query_id */
+int query_id_const_merge_threshold = 1;
/*
* True when compute_query_id is ON or AUTO, and a module requests them.
@@ -172,12 +172,14 @@ EnableQueryId(void)
* Controls constants merging for query identifier computation.
*
* Third-party plugins can use this function to enable/disable merging
- * of constants in a list when query identifier is computed.
+ * of constants in a list when query identifier is computed. The argument
+ * specifies the lower threshold for an array length, above which merging will
+ * be applied.
*/
void
-SetQueryIdConstMerge(bool value)
+SetQueryIdConstMerge(int threshold)
{
- query_id_const_merge = value;
+ query_id_const_merge_threshold = threshold;
}
/*
@@ -255,7 +257,8 @@ RecordConstLocation(JumbleState *jstate, int location, int magnitude)
/*
* Verify if the provided list contains could be merged down, which means it
- * contains only constant expressions.
+ * contains only constant expressions and the list contains more than
+ * query_id_const_merge_threshold elements.
*
* Return value is the order of magnitude (i.e. how many digits it has) for
* length of the list (to use for representation purposes later on) if merging
@@ -273,12 +276,18 @@ IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
if (elements == NIL)
return 0;
- if (!query_id_const_merge)
+ if (query_id_const_merge_threshold < 1)
{
/* Merging is disabled, process everything one by one */
return 0;
}
+ if (elements->length < query_id_const_merge_threshold)
+ {
+ /* The list is not large enough */
+ return 0;
+ }
+
firstExpr = linitial(elements);
/*
diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c
index 0e2761edd5..8e438084e5 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -115,7 +115,7 @@ typedef struct
bool redirection_done;
bool IsBinaryUpgrade;
bool query_id_enabled;
- bool query_id_const_merge;
+ int query_id_const_merge_threshold;
int max_safe_fds;
int MaxBackends;
int num_pmchild_slots;
@@ -745,7 +745,7 @@ save_backend_variables(BackendParameters *param,
param->redirection_done = redirection_done;
param->IsBinaryUpgrade = IsBinaryUpgrade;
param->query_id_enabled = query_id_enabled;
- param->query_id_const_merge = query_id_const_merge;
+ param->query_id_const_merge_threshold = query_id_const_merge_threshold;
param->max_safe_fds = max_safe_fds;
param->MaxBackends = MaxBackends;
@@ -1006,7 +1006,7 @@ restore_backend_variables(BackendParameters *param)
redirection_done = param->redirection_done;
IsBinaryUpgrade = param->IsBinaryUpgrade;
query_id_enabled = param->query_id_enabled;
- query_id_const_merge = param->query_id_const_merge;
+ query_id_const_merge_threshold = param->query_id_const_merge_threshold;
max_safe_fds = param->max_safe_fds;
MaxBackends = param->MaxBackends;
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 0e69e420b7..90218c6053 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -77,10 +77,10 @@ extern PGDLLIMPORT int compute_query_id;
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query);
extern void EnableQueryId(void);
-extern void SetQueryIdConstMerge(bool value);
+extern void SetQueryIdConstMerge(int threshold);
extern PGDLLIMPORT bool query_id_enabled;
-extern PGDLLIMPORT bool query_id_const_merge;
+extern PGDLLIMPORT int query_id_const_merge_threshold;
/*
* Returns whether query identifier computation has been enabled, either
--
2.45.1
Hello
I noticed something that surprised me at first, but on further looking
it should have been obvious: setting pg_stat_statements.query_id_const_merge
affects the query ID for all readers of it, not just pg_stat_statement.
This is good because it preserves the property that pg_stat_activity
entries can be matched to pg_stat_statement entries by query_id.
Looking to commit 0001 soon.
--
Álvaro Herrera 48°01'N 7°57'E — https://www.EnterpriseDB.com/
"On the other flipper, one wrong move and we're Fatal Exceptions"
(T.U.X.: Term Unit X - http://www.thelinuxreview.com/TUX/)
I have only looked at 0001, but I am wondering why
query_id_const_merge is a pg_stat_statements GUC
rather than a core GUC?
The dependency of pg_stat_statements to take advantage
of this useful feature does not seem right.
For example if the user does not have pg_stat_statements enabled,
but are sampling top queryId from pg_stat_activity, they will
likely want this merge behavior to build meaningful database
load graphs.
Other extensions that consume queryIds may also want this
behavior without needing to enable pg_stat_statements.
Also, we have compute_query_id as a core parameter, this
new guc will become an option for how to compute a queryId.
In the future we may want to introduce other controls for how a
queryId is generated.
Regards,
Sami
I do not have an explanation from the patch yet, but I have a test
that appears to show unexpected results. I only tested a few datatypes,
but from what I observe, some merge as expected and others do not;
i.e. int columns merge correctly but bigint do not.
"""
show pg_stat_statements.query_id_const_merge ;
create table foo (col_int int, col_smallint smallint, col_bigint
bigint, col_float float, col_text text, col_varchar varchar);
select from foo where col_int in (1, 2, 3);
select from foo where col_int in (1, 2, 3, 4);
select from foo where col_int in (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
select from foo where col_smallint in (1, 2, 3);
select from foo where col_smallint in (1, 2, 3, 4);
select from foo where col_bigint in (1, 2, 3);
select from foo where col_bigint in (1, 2, 3, 4);
select from foo where col_bigint in (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
select from foo where col_float in (1, 2, 3);
select from foo where col_float in (1, 2, 3, 4);
select query, queryid, calls from pg_stat_statements where query like
'select from foo where%' order by 1 desc ;
"""
postgres=# show pg_stat_statements.query_id_const_merge ;
pg_stat_statements.query_id_const_merge
-----------------------------------------
on
(1 row)
....
.......
..........
postgres=# select query, queryid, calls from pg_stat_statements where
query like 'select from foo where%' order by 1 desc ;
query
| queryid | calls
-------------------------------------------------------------------------------+----------------------+-------
select from foo where col_smallint in (...)
| -2065640271713949220 | 2
select from foo where col_int in (...)
| 2911888824129257715 | 3
select from foo where col_float in ($1, $2, $3, $4)
| -6847088148705359339 | 1
select from foo where col_float in ($1, $2, $3)
| 1631437678183488606 | 1
select from foo where col_bigint in ($1, $2, $3, $4, $5, $6, $7, $8,
$9, $10) | 3174053975478689499 | 1
select from foo where col_bigint in ($1, $2, $3, $4)
| -5236067031911646410 | 1
select from foo where col_bigint in ($1, $2, $3)
| -5529594240898645457 | 1
(7 rows)
---
Sami
On 2025-Feb-11, Sami Imseih wrote:
I do not have an explanation from the patch yet, but I have a test
that appears to show unexpected results. I only tested a few datatypes,
but from what I observe, some merge as expected and others do not;
i.e. int columns merge correctly but bigint do not.
Yep, I noticed this too, and realized that this is because these values
are wrapped in casts of some sort, while the others are not.
select from foo where col_bigint in (1, 2, 3);
select from foo where col_bigint in (1, 2, 3, 4);
select from foo where col_bigint in (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
select from foo where col_float in (1, 2, 3);
select from foo where col_float in (1, 2, 3, 4);
You can see that it works correctly if you use quotes around the values,
e.g.
select from foo where col_float in ('1', '2', '3');
select from foo where col_float in ('1', '2', '3', '4');
and so on. There are no casts here because these literals are of type
unknown.
I suppose this is telling us that detecting the case with consts wrapped
in casts is not really optional. (Dmitry said this was supported at
early stages of the patch, and now I'm really curious about that
implementation because what IsMergeableConstList sees is a FuncExpr that
invokes the cast function for float8 to int4.)
--
Álvaro Herrera PostgreSQL Developer — https://www.EnterpriseDB.com/
"La fuerza no está en los medios físicos
sino que reside en una voluntad indomable" (Gandhi)
On Tue, Feb 11, 2025 at 10:49:59AM GMT, Sami Imseih wrote:
I have only looked at 0001, but I am wondering why
query_id_const_merge is a pg_stat_statements GUC
rather than a core GUC?
It was moved from being a core GUC into a pg_stat_statements GUC on the request
from the reviewers. Community tries to prevent adding more and more core GUCs
into PostgreSQL.
On Tue, Feb 11, 2025 at 07:18:23PM GMT, �lvaro Herrera wrote:
On 2025-Feb-11, Sami Imseih wrote:I do not have an explanation from the patch yet, but I have a test
that appears to show unexpected results. I only tested a few datatypes,
but from what I observe, some merge as expected and others do not;
i.e. int columns merge correctly but bigint do not.Yep, I noticed this too, and realized that this is because these values
are wrapped in casts of some sort, while the others are not.select from foo where col_bigint in (1, 2, 3);
select from foo where col_bigint in (1, 2, 3, 4);
select from foo where col_bigint in (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
select from foo where col_float in (1, 2, 3);
select from foo where col_float in (1, 2, 3, 4);You can see that it works correctly if you use quotes around the values,
e.g.
select from foo where col_float in ('1', '2', '3');
select from foo where col_float in ('1', '2', '3', '4');
and so on. There are no casts here because these literals are of type
unknown.I suppose this is telling us that detecting the case with consts wrapped
in casts is not really optional. (Dmitry said this was supported at
early stages of the patch, and now I'm really curious about that
implementation because what IsMergeableConstList sees is a FuncExpr that
invokes the cast function for float8 to int4.)
Yes, those cases in question are usually FuncExpr. The original patch
implementation used to handle that via simplifying the node with
eval_const_expressions to figure out if the value we work with is a constant.
This approach was marked as too risky by reviewers, as this could reach a lot
of unexpected functionality in the mutator part.
On 2025-Feb-11, Dmitry Dolgov wrote:
On Tue, Feb 11, 2025 at 10:49:59AM GMT, Sami Imseih wrote:
I have only looked at 0001, but I am wondering why
query_id_const_merge is a pg_stat_statements GUC
rather than a core GUC?It was moved from being a core GUC into a pg_stat_statements GUC on the request
from the reviewers. Community tries to prevent adding more and more core GUCs
into PostgreSQL.
I understand, but I tend to disagree with the argument because it's okay
to simplify things that can be simplified, but if simplifying makes them
more complex, then it goes against the original desire for simplicity
anyway. My impression is that it would be better to put it back.
(Otherwise, how do you document the behavior that pg_stat_activity
suddenly emits different query_ids than before because you installed
pg_stat_statement and enabled this feature? It just doesn't make much
sense.)
I suppose this is telling us that detecting the case with consts wrapped
in casts is not really optional. (Dmitry said this was supported at
early stages of the patch, and now I'm really curious about that
implementation because what IsMergeableConstList sees is a FuncExpr that
invokes the cast function for float8 to int4.)Yes, those cases in question are usually FuncExpr. The original patch
implementation used to handle that via simplifying the node with
eval_const_expressions to figure out if the value we work with is a constant.
This approach was marked as too risky by reviewers, as this could reach a lot
of unexpected functionality in the mutator part.
Hmm, what about doing something much simpler, such as testing whether
there's just a CoerceViaIO/RelabelType around a Const or a one-parameter
function call of an immutable boostrap-OID function that has a Const as
argument, and trivial cases like that? Something very very simple
that's going to catch the majority of cases without anything as complex
as a node walker.
Maybe something like statext_is_compatible_clause_internal() can be an
inspiration (and even that is far more complex than we need here.)
--
Álvaro Herrera PostgreSQL Developer — https://www.EnterpriseDB.com/
"If it is not right, do not do it.
If it is not true, do not say it." (Marcus Aurelius, Meditations)
On 2025-Feb-11, Sami Imseih wrote:
I have only looked at 0001, but I am wondering why
query_id_const_merge is a pg_stat_statements GUC
rather than a core GUC?
I was wondering the same thing and found the explanation
here:
/messages/by-id/ZTmuCtymIS3n3fP_@paquier.xyz
Other extensions that consume queryIds may also want this
behavior without needing to enable pg_stat_statements.
I agree. In fact, pg_stat_activity will behave differently (using
merged query_ids) if this value is turned on, for which you need the
contrib module. This makes no sense to me.
Besides, the patch cheats in this regard: what Dmitry did was
create a function SetQueryIdConstMerge() which the extension with the
GUC can call to set the value of the variable. I really don't see that
this is better. I think we should put the GUC back where it was in v15
of the patch. (I didn't check what other changes there were
afterwards.)
About the GUC name -- query_id_const_merge -- I think this is too much a
hacker's name. How about
query_id_merge_values
query_id_merge_value_lists
query_id_squash_constant_lists
--
Álvaro Herrera Breisgau, Deutschland — https://www.EnterpriseDB.com/
"No es bueno caminar con un hombre muerto"
On Tue, Feb 11, 2025 at 07:51:43PM GMT, �lvaro Herrera wrote:
On 2025-Feb-11, Dmitry Dolgov wrote:On Tue, Feb 11, 2025 at 10:49:59AM GMT, Sami Imseih wrote:
I have only looked at 0001, but I am wondering why
query_id_const_merge is a pg_stat_statements GUC
rather than a core GUC?It was moved from being a core GUC into a pg_stat_statements GUC on the request
from the reviewers. Community tries to prevent adding more and more core GUCs
into PostgreSQL.I understand, but I tend to disagree with the argument because it's okay
to simplify things that can be simplified, but if simplifying makes them
more complex, then it goes against the original desire for simplicity
anyway. My impression is that it would be better to put it back.(Otherwise, how do you document the behavior that pg_stat_activity
suddenly emits different query_ids than before because you installed
pg_stat_statement and enabled this feature? It just doesn't make much
sense.)
I don't have strong opinion on that and open to move it back, so that we
can see if anyone will object.
I suppose this is telling us that detecting the case with consts wrapped
in casts is not really optional. (Dmitry said this was supported at
early stages of the patch, and now I'm really curious about that
implementation because what IsMergeableConstList sees is a FuncExpr that
invokes the cast function for float8 to int4.)Yes, those cases in question are usually FuncExpr. The original patch
implementation used to handle that via simplifying the node with
eval_const_expressions to figure out if the value we work with is a constant.
This approach was marked as too risky by reviewers, as this could reach a lot
of unexpected functionality in the mutator part.Hmm, what about doing something much simpler, such as testing whether
there's just a CoerceViaIO/RelabelType around a Const or a one-parameter
function call of an immutable boostrap-OID function that has a Const as
argument, and trivial cases like that? Something very very simple
that's going to catch the majority of cases without anything as complex
as a node walker.Maybe something like statext_is_compatible_clause_internal() can be an
inspiration (and even that is far more complex than we need here.)
I'm somewhat hesitant to cover only some cases, but let me try, maybe
it's indeed going to be good enough.
On Tue, Feb 11, 2025 at 08:00:27PM GMT, Dmitry Dolgov wrote:
Hmm, what about doing something much simpler, such as testing whether
there's just a CoerceViaIO/RelabelType around a Const or a one-parameter
function call of an immutable boostrap-OID function that has a Const as
argument, and trivial cases like that? Something very very simple
that's going to catch the majority of cases without anything as complex
as a node walker.Maybe something like statext_is_compatible_clause_internal() can be an
inspiration (and even that is far more complex than we need here.)I'm somewhat hesitant to cover only some cases, but let me try, maybe
it's indeed going to be good enough.
I've been experimenting with this today, and while it's easy to
implement, there is one annoying thing for which I don't have a solution
yet. When generating a normalized version for such merged queries in
pgss we rely on expression location, something like:
select i from t where i in (a1, a2, a3, ..., aN);
| |
expr loc1 expr loc2
We remember loc1 and loc2, then do not copy anything between then into
the normalized query. Now, the expression location is only known up to
the parsing token, without taking into account e.g. parenthesis in more
complex expressions. Which means we don't know exactly where an
expression starts or ends, and it's hard to correctly represent queries
like:
select i from t where i in (((a1)), ((a2)), ((a3)), ..., ((aN)));
| |
expr loc1 expr loc2
The normalized version looks like this:
select i from t where i in (((...)));
While it does not affect the actual functionality and is purely
cosmetic, it's quite visible and causes questions. In theory this could
be addressed by extending fill_in_constant_lengths to chase parenthesis,
but it sounds complicated. Another option is to try a different visual
representation of merging, something that will keep the first and the
last constant intact.
On 2025-Feb-12, Dmitry Dolgov wrote:
I've been experimenting with this today, and while it's easy to
implement,
Great.
there is one annoying thing for which I don't have a solution
yet. When generating a normalized version for such merged queries in
pgss we rely on expression location, something like:select i from t where i in (a1, a2, a3, ..., aN);
| |
expr loc1 expr loc2We remember loc1 and loc2, then do not copy anything between then into
the normalized query. Now, the expression location is only known up to
the parsing token, without taking into account e.g. parenthesis in more
complex expressions. Which means we don't know exactly where an
expression starts or ends, and it's hard to correctly represent queries
like:select i from t where i in (((a1)), ((a2)), ((a3)), ..., ((aN)));
| |
expr loc1 expr loc2The normalized version looks like this:
select i from t where i in (((...)));
While it does not affect the actual functionality and is purely
cosmetic, it's quite visible and causes questions.
The nastiness level of this seems quite low, compared to what happens to
this other example if we didn't handle these easy cases:
create table t (a float);
select i from t where i in (1, 2);
select i from t where i in (1, '2');
select i from t where i in ('1', 2);
select i from t where i in ('1', '2');
select i from t where i in (1.0, 1.0);
(The point here is that the datatype differs for the constants from the
lexer down in each of these cases.)
I think it's more important to handle this better than what the posted
patch does, than improving the lexing in presence of other lexical
elements in the query. With the current patch I get _five_
pg_stat_statements entries from these queries above, where only one of
them was able to apply merging of the elements:
queryid │ query
──────────────────────┼─────────────────────────────────────
-5783267088740508246 │ select i from t where i in ($1, $2)
6446023427308995149 │ select i from t where i in ($1, $2)
3778867339896201523 │ select i from t where i in (...)
-8733218180609156532 │ select i from t where i in ($1, $2)
-5106509834475638715 │ select i from t where i in ($1, $2)
If I understand what you're saying, it's that the extra parenthesis
cause the recorded query text be a little uglier (but the queryid still
ends up being one and the same for all queries), which seems much less
of a problem. I'm okay saying that cases like that can be improved
later. (It seems to me that you want to improve the way we pass the
lexed string down to pg_stat_statements, and frankly that even seems a
different problem altogether.)
--
Álvaro Herrera PostgreSQL Developer — https://www.EnterpriseDB.com/
The nastiness level of this seems quite low, compared to what happens to
this other example if we didn't handle these easy cases:create table t (a float);
select i from t where i in (1, 2);
select i from t where i in (1, '2');
select i from t where i in ('1', 2);
select i from t where i in ('1', '2');
select i from t where i in (1.0, 1.0);(The point here is that the datatype differs for the constants from the
lexer down in each of these cases.)I think it's more important to handle this better than what the posted
patch does, than improving the lexing in presence of other lexical
elements in the query. With the current patch I get _five_
pg_stat_statements entries from these queries above, where only one of
them was able to apply merging of the elements:
I want to throw out an idea I was looking at today, if I may, and
apologies if this
was brought up earlier. The approachI am looking at is to have a
custom jumbling
function for ArrayExpr to handle FuncExpr and Const. The simple Const
case is quite simple,
but in the case of FuncExpr we can traverse the "args" to find the constants
and simple record the location. I don't think this adds extra overhead as
this work to traverse the "args" must already occur.
I see a few benefits with this approach:
1/ It deals with the different constant types in the IN-LIST
2/ It also deals with an IN-LIST that contains a subquery with an IN-LIST. Maybe
I am wrong, but the current patch does not support this case.
The negative aspect of this patch is the custom jumbling, but we have
examples of that for other expression types.
select pg_stat_statements_reset();
drop table if exists foo;
create table foo (col_int int, col_smallint smallint, col_bigint
bigint, col_float float, col_text text, col_varchar varchar);
select from foo where col_int in (1, 2, 3);
select from foo where col_int in (1, 2, 3, 4);
select from foo where col_int in (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, '11');
select from foo where col_bigint in (1, 2, 3);
select from foo where col_bigint in (1, 2, 3, 4);
select from foo where col_bigint in (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, '11');
select from foo where col_int in ((select 1 from foo where col_int in
(1, 2, 3, 4, 5)));
select from foo where col_int in ((select 1 from foo where col_int in
(1, 2, 3, 4, 5, 6, '7')));
select from foo where col_bigint in ((select 1 from foo where col_int
in (1, 2, 3, 4, 5)));
select from foo where col_bigint in ((select 1 from foo where col_int
in (1, 2, 3, 4, 5, 6, '7')));
select query, queryid, calls from pg_stat_statements where query like
'select from foo where%' order by stats_since asc;
query
| queryid | calls
--------------------------------------------------------------------------------------------------+----------------------+-------
select from foo where col_int in ($1, $2, $3)
| -8353422683112638164 | 3
select from foo where col_bigint in ($1, $2, $3)
| -1245684714076243746 | 3
select from foo where col_int in ((select $1 from foo where col_int
in ($2, $3, $4, $5, $6))) | 6337165648188018609 | 2
select from foo where col_bigint in ((select $1 from foo where
col_int in ($2, $3, $4, $5, $6))) | -1521638960953725050 | 2
(4 rows)
I will park the poc of the idea here. It does not deal with the presentation
as the current patch, but maybe it could be something to work from.
Regards,
Sami
Attachments:
experiment_custom_array_expr.patchapplication/octet-stream; name=experiment_custom_array_expr.patchDownload
From 6c8474745527d6d535cd28120a39ba5e2d6c27aa Mon Sep 17 00:00:00 2001
From: "Sami Imseih (AWS)"
<simseih@dev-dsk-simseih-1d-3940b79e.us-east-1.amazon.com>
Date: Wed, 12 Feb 2025 18:57:48 +0000
Subject: [PATCH 1/1] experiment_custom_array_expr
---
src/backend/nodes/queryjumblefuncs.c | 41 ++++++++++++++++++++++++++++
src/include/nodes/primnodes.h | 10 ++++---
2 files changed, 47 insertions(+), 4 deletions(-)
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index b103a28193..e3abf59106 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -58,6 +58,7 @@ static void _jumbleNode(JumbleState *jstate, Node *node);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
static void _jumbleList(JumbleState *jstate, Node *node);
static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
+static void _jumbleArrayExpr(JumbleState *jstate, Node *node);
/*
* Given a possibly multi-statement source string, confine our attention to the
@@ -377,3 +378,43 @@ _jumbleVariableSetStmt(JumbleState *jstate, Node *node)
JUMBLE_FIELD(is_local);
JUMBLE_LOCATION(location);
}
+
+static void
+_jumbleArrayExpr(JumbleState *jstate, Node *node)
+{
+ {
+ ListCell *l;
+ ListCell *l2;
+ Node *elements = (Node *) ((ArrayExpr *) node)->elements;
+
+ if (elements)
+ {
+ foreach(l, (List *) elements)
+ {
+ Node *expr = (Node *) lfirst(l);
+
+ if (IsA(expr, FuncExpr))
+ {
+ foreach(l2, ((FuncExpr *) expr)->args)
+ {
+ expr = (Node *) lfirst(l2);
+
+ if (IsA(expr, Const))
+ {
+ Const *c = (Const *) expr;
+
+ RecordConstLocation(jstate, c->location);
+ } else
+ _jumbleNode(jstate, expr);
+ }
+ } else if (IsA(expr, Const))
+ {
+ Const *c = (Const *) expr;
+
+ RecordConstLocation(jstate, c->location);
+ } else
+ _jumbleNode(jstate, expr);
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 839e71d52f..e1dcc93f4a 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1386,17 +1386,19 @@ typedef struct CaseTestExpr
*/
typedef struct ArrayExpr
{
+ pg_node_attr(custom_query_jumble)
+
Expr xpr;
/* type of expression result */
- Oid array_typeid pg_node_attr(query_jumble_ignore);
+ Oid array_typeid;
/* OID of collation, or InvalidOid if none */
- Oid array_collid pg_node_attr(query_jumble_ignore);
+ Oid array_collid;
/* common type of array elements */
- Oid element_typeid pg_node_attr(query_jumble_ignore);
+ Oid element_typeid;
/* the array elements or sub-arrays */
List *elements;
/* true if elements are sub-arrays */
- bool multidims pg_node_attr(query_jumble_ignore);
+ bool multidims;
/* token location, or -1 if unknown */
ParseLoc location;
} ArrayExpr;
--
2.47.1
On Wed, Feb 12, 2025 at 07:39:39PM GMT, �lvaro Herrera wrote:
The nastiness level of this seems quite low, compared to what happens to
this other example if we didn't handle these easy cases:create table t (a float);
select i from t where i in (1, 2);
select i from t where i in (1, '2');
select i from t where i in ('1', 2);
select i from t where i in ('1', '2');
select i from t where i in (1.0, 1.0);
Yep, the current version I've got so far produces the same
pg_stat_statements entry for all of those queries. I'm going to move out
the renamed GUC and post the new patch tomorrow.
If I understand what you're saying, it's that the extra parenthesis
cause the recorded query text be a little uglier (but the queryid still
ends up being one and the same for all queries), which seems much less
of a problem.
Right, that's correct. After thinking a bit more I think this ugliness
could be addressed easier, if we take into account that all of that is
happening withing a list of elements with more or less strict format.
On Wed, Feb 12, 2025 at 08:48:03PM GMT, Dmitry Dolgov wrote:
On Wed, Feb 12, 2025 at 07:39:39PM GMT, �lvaro Herrera wrote:
The nastiness level of this seems quite low, compared to what happens to
this other example if we didn't handle these easy cases:create table t (a float);
select i from t where i in (1, 2);
select i from t where i in (1, '2');
select i from t where i in ('1', 2);
select i from t where i in ('1', '2');
select i from t where i in (1.0, 1.0);Yep, the current version I've got so far produces the same
pg_stat_statements entry for all of those queries. I'm going to move out
the renamed GUC and post the new patch tomorrow.
Here is how it looks like (posting only the first patch, since we
concentrate on it). This version handles just a little more to cover
simpe cases like the implicit convertion above. The GUC is also moved
out from pgss and renamed to query_id_merge_values. On top I've added
more tests showing the impact, as well as sometimes awkward looking
normalized query I was talking about. I'm going to experiment how to
iron out the latter.
Attachments:
v23-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/plain; charset=us-asciiDownload
From ef4115248cd7213494e2bdf8175eaa930aa41640 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Tue, 3 Dec 2024 14:55:45 +0100
Subject: [PATCH v23] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on the number of parameters, because every element of
ArrayExpr is jumbled. In certain situations it's undesirable, especially
if the list becomes too large.
Make an array of Const expressions contribute only the first/last
elements to the jumble hash. Allow to enable this behavior via the new
pg_stat_statements parameter query_id_merge_values with the default value off.
Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane,
Michael Paquier, Sergei Kornilov, Alvaro Herrera, David Geier, Sutou Kouhei,
Sami Imseih
Tested-by: Chengxi Sun, Yasuo Honda
---
contrib/pg_stat_statements/Makefile | 2 +-
.../pg_stat_statements/expected/merging.out | 432 ++++++++++++++++++
contrib/pg_stat_statements/meson.build | 1 +
.../pg_stat_statements/pg_stat_statements.c | 47 +-
contrib/pg_stat_statements/sql/merging.sql | 169 +++++++
doc/src/sgml/config.sgml | 27 ++
doc/src/sgml/pgstatstatements.sgml | 28 +-
src/backend/nodes/gen_node_support.pl | 21 +-
src/backend/nodes/queryjumblefuncs.c | 167 ++++++-
src/backend/postmaster/launch_backend.c | 3 +
src/backend/utils/misc/guc_tables.c | 10 +
src/backend/utils/misc/postgresql.conf.sample | 1 -
src/include/nodes/nodes.h | 3 +
src/include/nodes/primnodes.h | 2 +-
src/include/nodes/queryjumble.h | 8 +-
15 files changed, 895 insertions(+), 26 deletions(-)
create mode 100644 contrib/pg_stat_statements/expected/merging.out
create mode 100644 contrib/pg_stat_statements/sql/merging.sql
diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile
index 241c02587b..eef8d69cc4 100644
--- a/contrib/pg_stat_statements/Makefile
+++ b/contrib/pg_stat_statements/Makefile
@@ -20,7 +20,7 @@ LDFLAGS_SL += $(filter -lm, $(LIBS))
REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_statements/pg_stat_statements.conf
REGRESS = select dml cursors utility level_tracking planning \
user_activity wal entry_timestamp privileges extended \
- parallel cleanup oldextversions
+ parallel cleanup oldextversions merging
# Disabled because these tests require "shared_preload_libraries=pg_stat_statements",
# which typical installcheck users do not have (e.g. buildfarm clients).
NO_INSTALLCHECK = 1
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
new file mode 100644
index 0000000000..e7815a200c
--- /dev/null
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -0,0 +1,432 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(4 rows)
+
+-- Normal scenario, too many simple constants for an IN query
+SET query_id_merge_values = on;
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 4
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) and data = $3 | 3
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- No constants simplification for OpExpr
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+-- In the following two queries the operator expressions (+) and (@) have
+-- different oppno, and will be given different query_id if merged, even though
+-- the normalized query will be the same
+SELECT * FROM test_merge WHERE id IN
+ (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN
+ (@ '-1', @ '-2', @ '-3', @ '-4', @ '-5', @ '-6', @ '-7', @ '-8', @ '-9');
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN +| 1
+ ($1 + $2, $3 + $4, $5 + $6, $7 + $8, $9 + $10, $11 + $12, $13 + $14, $15 + $16, $17 + $18) |
+ SELECT * FROM test_merge WHERE id IN +| 1
+ (@ $1, @ $2, @ $3, @ $4, @ $5, @ $6, @ $7, @ $8, @ $9) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
+
+-- FuncExpr
+-- Verify multiple type representation end up with the same query_id
+CREATE TABLE test_float (data float);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT data FROM test_float WHERE data IN (1, 2);
+ data
+------
+(0 rows)
+
+SELECT data FROM test_float WHERE data IN (1, '2');
+ data
+------
+(0 rows)
+
+SELECT data FROM test_float WHERE data IN ('1', 2);
+ data
+------
+(0 rows)
+
+SELECT data FROM test_float WHERE data IN ('1', '2');
+ data
+------
+(0 rows)
+
+SELECT data FROM test_float WHERE data IN (1.0, 1.0);
+ data
+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT data FROM test_float WHERE data IN (...) | 5
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Numeric type, implicit cast is merged
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE data IN (...) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Bigint, implicit cast is merged
+CREATE TABLE test_merge_bigint (id int, data bigint);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_bigint WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-----------------------------------------------------+-------
+ SELECT * FROM test_merge_bigint WHERE data IN (...) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Bigint, explicit cast is not merged
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_bigint WHERE data IN
+ (1::bigint, 2::bigint, 3::bigint, 4::bigint, 5::bigint, 6::bigint,
+ 7::bigint, 8::bigint, 9::bigint, 10::bigint, 11::bigint);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_bigint WHERE data IN +| 1
+ ($1::bigint, $2::bigint, $3::bigint, $4::bigint, $5::bigint, $6::bigint,+|
+ $7::bigint, $8::bigint, $9::bigint, $10::bigint, $11::bigint) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Bigint, long tokens with parenthesis
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_bigint WHERE id IN
+ (abs(100), abs(200), abs(300), abs(400), abs(500), abs(600), abs(700),
+ abs(800), abs(900), abs(1000), ((abs(1100))));
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_bigint WHERE id IN +| 1
+ (abs($1), abs($2), abs($3), abs($4), abs($5), abs($6), abs($7),+|
+ abs($8), abs($9), abs($10), ((abs($11)))) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- CoerceViaIO, SubLink instead of a Const
+CREATE TABLE test_merge_jsonb (id int, data jsonb);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_jsonb WHERE data IN
+ ((SELECT '"1"')::jsonb, (SELECT '"2"')::jsonb, (SELECT '"3"')::jsonb,
+ (SELECT '"4"')::jsonb, (SELECT '"5"')::jsonb, (SELECT '"6"')::jsonb,
+ (SELECT '"7"')::jsonb, (SELECT '"8"')::jsonb, (SELECT '"9"')::jsonb,
+ (SELECT '"10"')::jsonb);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_jsonb WHERE data IN +| 1
+ ((SELECT $1)::jsonb, (SELECT $2)::jsonb, (SELECT $3)::jsonb,+|
+ (SELECT $4)::jsonb, (SELECT $5)::jsonb, (SELECT $6)::jsonb,+|
+ (SELECT $7)::jsonb, (SELECT $8)::jsonb, (SELECT $9)::jsonb,+|
+ (SELECT $10)::jsonb) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- CoerceViaIO
+-- Create some dummy type to force CoerceViaIO
+CREATE TYPE casttesttype;
+CREATE FUNCTION casttesttype_in(cstring)
+ RETURNS casttesttype
+ AS 'textin'
+ LANGUAGE internal STRICT IMMUTABLE;
+NOTICE: return type casttesttype is only a shell
+CREATE FUNCTION casttesttype_out(casttesttype)
+ RETURNS cstring
+ AS 'textout'
+ LANGUAGE internal STRICT IMMUTABLE;
+NOTICE: argument type casttesttype is only a shell
+LINE 1: CREATE FUNCTION casttesttype_out(casttesttype)
+ ^
+CREATE TYPE casttesttype (
+ internallength = variable,
+ input = casttesttype_in,
+ output = casttesttype_out,
+ alignment = int4
+);
+CREATE CAST (int4 AS casttesttype) WITH INOUT;
+CREATE FUNCTION casttesttype_eq(casttesttype, casttesttype)
+returns boolean language sql immutable as $$
+ SELECT true
+$$;
+CREATE OPERATOR = (
+ leftarg = casttesttype,
+ rightarg = casttesttype,
+ procedure = casttesttype_eq,
+ commutator = =);
+CREATE TABLE test_merge_cast (id int, data casttesttype);
+-- Use the introduced type to construct a list of CoerceViaIO around Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_cast WHERE data IN
+ (1::int4::casttesttype, 2::int4::casttesttype, 3::int4::casttesttype,
+ 4::int4::casttesttype, 5::int4::casttesttype, 6::int4::casttesttype,
+ 7::int4::casttesttype, 8::int4::casttesttype, 9::int4::casttesttype,
+ 10::int4::casttesttype, 11::int4::casttesttype);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_merge_cast WHERE data IN +| 1
+ (...::int4::casttesttype) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Some casting expression are simplified to Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_jsonb WHERE data IN
+ (('"1"')::jsonb, ('"2"')::jsonb, ('"3"')::jsonb, ('"4"')::jsonb,
+ ( '"5"')::jsonb, ( '"6"')::jsonb, ( '"7"')::jsonb, ( '"8"')::jsonb,
+ ( '"9"')::jsonb, ( '"10"')::jsonb);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_merge_jsonb WHERE data IN +| 1
+ ((...)::jsonb) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- RelabelType
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...::oid) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Test constants evaluation in a CTE, which was causing issues in the past
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result
+--------
+(0 rows)
+
+-- Simple array would be merged as well
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+ array
+------------------------
+ {1,2,3,4,5,6,7,8,9,10}
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT ARRAY[...] | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+RESET query_id_merge_values;
diff --git a/contrib/pg_stat_statements/meson.build b/contrib/pg_stat_statements/meson.build
index 4446af58c5..8a96aff625 100644
--- a/contrib/pg_stat_statements/meson.build
+++ b/contrib/pg_stat_statements/meson.build
@@ -56,6 +56,7 @@ tests += {
'parallel',
'cleanup',
'oldextversions',
+ 'merging',
],
'regress_args': ['--temp-config', files('pg_stat_statements.conf')],
# Disabled because these tests require
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index bebf8134eb..1aa5021367 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -295,7 +295,6 @@ static bool pgss_track_planning = false; /* whether to track planning
* duration */
static bool pgss_save = true; /* whether to save stats across shutdown */
-
#define pgss_enabled(level) \
(!IsParallelWorker() && \
(pgss_track == PGSS_TRACK_ALL || \
@@ -2809,6 +2808,10 @@ generate_normalized_query(JumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool merged_interval = false; /* Currently processed constants
+ belong to a merged constants
+ interval. */
+
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -2832,7 +2835,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
{
int off, /* Offset from start for cur tok */
tok_len; /* Length (in bytes) of that tok */
-
off = jstate->clocations[i].location;
/* Adjust recorded location if we're dealing with partial string */
off -= query_loc;
@@ -2847,13 +2849,44 @@ generate_normalized_query(JumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ /* Normal path, non merged constant */
+ if (!jstate->clocations[i].merged)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id);
+
+ /* In case previous constants were merged away, stop doing that */
+ merged_interval = false;
+ }
+ else if (!merged_interval)
+ {
+ /*
+ * We are not inside a merged interval yet, which means it is the
+ * the first merged constant.
+ *
+ * A merged constants interval must be represented via two
+ * constants with the merged flag. Currently we are at the first,
+ * verify there is another one.
+ */
+ Assert(i + 1 < jstate->clocations_count);
+ Assert(jstate->clocations[i + 1].merged);
+
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* Remember to skip until a non merged constant appears */
+ merged_interval = true;
+
+ /* Mark the interval in the normalized query */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ }
+ /* Otherwise the constant is merged away, move forward */
quer_loc = off + tok_len;
last_off = off;
last_tok_len = tok_len;
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
new file mode 100644
index 0000000000..080601b149
--- /dev/null
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -0,0 +1,169 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal scenario, too many simple constants for an IN query
+SET query_id_merge_values = on;
+
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- No constants simplification for OpExpr
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+-- In the following two queries the operator expressions (+) and (@) have
+-- different oppno, and will be given different query_id if merged, even though
+-- the normalized query will be the same
+SELECT * FROM test_merge WHERE id IN
+ (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+SELECT * FROM test_merge WHERE id IN
+ (@ '-1', @ '-2', @ '-3', @ '-4', @ '-5', @ '-6', @ '-7', @ '-8', @ '-9');
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- FuncExpr
+
+-- Verify multiple type representation end up with the same query_id
+CREATE TABLE test_float (data float);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT data FROM test_float WHERE data IN (1, 2);
+SELECT data FROM test_float WHERE data IN (1, '2');
+SELECT data FROM test_float WHERE data IN ('1', 2);
+SELECT data FROM test_float WHERE data IN ('1', '2');
+SELECT data FROM test_float WHERE data IN (1.0, 1.0);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Numeric type, implicit cast is merged
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_numeric WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Bigint, implicit cast is merged
+CREATE TABLE test_merge_bigint (id int, data bigint);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_bigint WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Bigint, explicit cast is not merged
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_bigint WHERE data IN
+ (1::bigint, 2::bigint, 3::bigint, 4::bigint, 5::bigint, 6::bigint,
+ 7::bigint, 8::bigint, 9::bigint, 10::bigint, 11::bigint);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Bigint, long tokens with parenthesis
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_bigint WHERE id IN
+ (abs(100), abs(200), abs(300), abs(400), abs(500), abs(600), abs(700),
+ abs(800), abs(900), abs(1000), ((abs(1100))));
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- CoerceViaIO, SubLink instead of a Const
+CREATE TABLE test_merge_jsonb (id int, data jsonb);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_jsonb WHERE data IN
+ ((SELECT '"1"')::jsonb, (SELECT '"2"')::jsonb, (SELECT '"3"')::jsonb,
+ (SELECT '"4"')::jsonb, (SELECT '"5"')::jsonb, (SELECT '"6"')::jsonb,
+ (SELECT '"7"')::jsonb, (SELECT '"8"')::jsonb, (SELECT '"9"')::jsonb,
+ (SELECT '"10"')::jsonb);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- CoerceViaIO
+
+-- Create some dummy type to force CoerceViaIO
+CREATE TYPE casttesttype;
+
+CREATE FUNCTION casttesttype_in(cstring)
+ RETURNS casttesttype
+ AS 'textin'
+ LANGUAGE internal STRICT IMMUTABLE;
+
+CREATE FUNCTION casttesttype_out(casttesttype)
+ RETURNS cstring
+ AS 'textout'
+ LANGUAGE internal STRICT IMMUTABLE;
+
+CREATE TYPE casttesttype (
+ internallength = variable,
+ input = casttesttype_in,
+ output = casttesttype_out,
+ alignment = int4
+);
+
+CREATE CAST (int4 AS casttesttype) WITH INOUT;
+
+CREATE FUNCTION casttesttype_eq(casttesttype, casttesttype)
+returns boolean language sql immutable as $$
+ SELECT true
+$$;
+
+CREATE OPERATOR = (
+ leftarg = casttesttype,
+ rightarg = casttesttype,
+ procedure = casttesttype_eq,
+ commutator = =);
+
+CREATE TABLE test_merge_cast (id int, data casttesttype);
+
+-- Use the introduced type to construct a list of CoerceViaIO around Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_cast WHERE data IN
+ (1::int4::casttesttype, 2::int4::casttesttype, 3::int4::casttesttype,
+ 4::int4::casttesttype, 5::int4::casttesttype, 6::int4::casttesttype,
+ 7::int4::casttesttype, 8::int4::casttesttype, 9::int4::casttesttype,
+ 10::int4::casttesttype, 11::int4::casttesttype);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Some casting expression are simplified to Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_jsonb WHERE data IN
+ (('"1"')::jsonb, ('"2"')::jsonb, ('"3"')::jsonb, ('"4"')::jsonb,
+ ( '"5"')::jsonb, ( '"6"')::jsonb, ( '"7"')::jsonb, ( '"8"')::jsonb,
+ ( '"9"')::jsonb, ( '"10"')::jsonb);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- RelabelType
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test constants evaluation in a CTE, which was causing issues in the past
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+-- Simple array would be merged as well
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+RESET query_id_merge_values;
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 5e4f201e09..b6090a43d5 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8436,6 +8436,33 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
</listitem>
</varlistentry>
+ <varlistentry id="guc-query-id-merge-values" xreflabel="query_id_merge_values">
+ <term><varname>query_id_merge_values</varname> (<type>bool</type>)
+ <indexterm>
+ <primary><varname>query_id_merge_values</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Specifies how an array of constants (e.g. for an <literal>IN</literal>
+ clause) contributes to the query identifier computation. Normally every
+ element of an array contributes to the query identifier, which means the
+ same query will get multiple different identifiers, one for each
+ occurrence with an array of different lenght.
+
+ If this parameter is on, an array of constants will contribute only the
+ first and the last elements to the query identifier. It means two
+ occurences of the same query, where the only difference is number of
+ constants in the array, are going to get the same query identifier.
+ Such queries are represented in form <literal>'(...)'</literal>.
+
+ The parameter could be used to reduce amount of repeating data stored
+ via <xref linkend="pgstatstatements"/>. The default value is
+ <literal>off</literal>.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry id="guc-log-statement-stats">
<term><varname>log_statement_stats</varname> (<type>boolean</type>)
<indexterm>
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 501b468e9a..a776ba3019 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -621,11 +621,28 @@
<para>
In some cases, queries with visibly different texts might get merged into a
- single <structname>pg_stat_statements</structname> entry. Normally this will happen
- only for semantically equivalent queries, but there is a small chance of
- hash collisions causing unrelated queries to be merged into one entry.
- (This cannot happen for queries belonging to different users or databases,
- however.)
+ single <structname>pg_stat_statements</structname> entry. Normally this
+ will happen only for semantically equivalent queries, or if
+ <varname>query_id_merge_values</varname> is enabled and the only difference
+ between queries is the length of an array with constants they contain:
+
+<screen>
+=# SET query_id_merge_values = on;
+=# SELECT pg_stat_statements_reset();
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
+=# SELECT query, calls FROM pg_stat_statements;
+-[ RECORD 1 ]------------------------------
+query | SELECT * FROM test WHERE a IN (...)
+calls | 2
+-[ RECORD 2 ]------------------------------
+query | SELECT pg_stat_statements_reset()
+calls | 1
+</screen>
+
+ But there is a small chance of hash collisions causing unrelated queries to
+ be merged into one entry. (This cannot happen for queries belonging to
+ different users or databases, however.)
</para>
<para>
@@ -956,6 +973,7 @@
</para>
</listitem>
</varlistentry>
+
</variablelist>
<para>
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 1a657f7e0a..c421664879 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -476,6 +476,7 @@ foreach my $infile (@ARGV)
equal_ignore_if_zero
query_jumble_ignore
query_jumble_location
+ query_jumble_merge
read_write_ignore
write_only_relids
write_only_nondefault_pathtarget
@@ -1283,6 +1284,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
my @a = @{ $node_type_info{$n}->{field_attrs}{$f} };
my $query_jumble_ignore = $struct_no_query_jumble;
my $query_jumble_location = 0;
+ my $query_jumble_merge = 0;
# extract per-field attributes
foreach my $a (@a)
@@ -1295,21 +1297,34 @@ _jumble${n}(JumbleState *jstate, Node *node)
{
$query_jumble_location = 1;
}
+ elsif ($a eq 'query_jumble_merge')
+ {
+ $query_jumble_merge = 1;
+ }
}
# node type
if (($t =~ /^(\w+)\*$/ or $t =~ /^struct\s+(\w+)\*$/)
and elem $1, @node_types)
{
- print $jff "\tJUMBLE_NODE($f);\n"
- unless $query_jumble_ignore;
+ # Merge constants if requested.
+ if ($query_jumble_merge)
+ {
+ print $jff "\tJUMBLE_ELEMENTS($f);\n"
+ unless $query_jumble_ignore;
+ }
+ else
+ {
+ print $jff "\tJUMBLE_NODE($f);\n"
+ unless $query_jumble_ignore;
+ }
}
elsif ($t eq 'ParseLoc')
{
# Track the node's location only if directly requested.
if ($query_jumble_location)
{
- print $jff "\tJUMBLE_LOCATION($f);\n"
+ print $jff "\tJUMBLE_LOCATION($f, false);\n"
unless $query_jumble_ignore;
}
}
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index b103a28193..216c6cff95 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -32,9 +32,13 @@
*/
#include "postgres.h"
+#include "access/transam.h"
+#include "catalog/pg_proc.h"
#include "common/hashfn.h"
#include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
#include "nodes/queryjumble.h"
+#include "utils/lsyscache.h"
#include "parser/scansup.h"
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
@@ -42,6 +46,9 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+/* Whether to merge constants in a list when computing query_id */
+bool query_id_merge_values = false;
+
/*
* True when compute_query_id is ON or AUTO, and a module requests them.
*
@@ -53,8 +60,10 @@ bool query_id_enabled = false;
static void AppendJumble(JumbleState *jstate,
const unsigned char *item, Size size);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void RecordConstLocation(JumbleState *jstate,
+ int location, bool merged);
static void _jumbleNode(JumbleState *jstate, Node *node);
+static void _jumbleElements(JumbleState *jstate, List *elements);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
static void _jumbleList(JumbleState *jstate, Node *node);
static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
@@ -198,11 +207,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
}
/*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked.
+ *
+ * Merged argument signals that the constant represents the first or the last
+ * element in a series of merged constants, and everything but the first/last
+ * element contributes nothing to the jumble hash.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -218,15 +231,128 @@ RecordConstLocation(JumbleState *jstate, int location)
}
jstate->clocations[jstate->clocations_count].location = location;
/* initialize lengths to -1 to simplify third-party module usage */
+ jstate->clocations[jstate->clocations_count].merged = merged;
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
}
}
+/*
+ * Verify few simple cases where we can deduce that the expression is a
+ * constant:
+ *
+ * - Simplify the expression, if it's wrapped into RelabelType and CoerceViaIO.
+ * - If it's a FuncExpr, check if the function is an immutable builtin
+ * function doing implicit cast with constant arguments.
+ * - Otherwise test if the expression is a simple Const.
+ *
+ * We could also handle some simple OpExpr here as well, but since such queries
+ * will also have opno jumbled, this might lead to a confusing situation where
+ * two different queries end up with the same normalized query but different
+ * query_id.
+ *
+ * Note that we intentionally do not recurse on the function arguments and only
+ * test them for being Const expression for simplicity.
+ */
+static bool
+IsMergeableConst(Node *element)
+{
+ if (IsA(element, RelabelType))
+ element = (Node *) ((RelabelType *) element)->arg;
+
+ if (IsA(element, CoerceViaIO))
+ element = (Node *) ((CoerceViaIO *) element)->arg;
+
+ if(IsA(element, FuncExpr))
+ {
+ FuncExpr *func = (FuncExpr *) element;
+ char provolatile = func_volatile(func->funcid);
+ ListCell *temp;
+
+ if (provolatile != PROVOLATILE_IMMUTABLE)
+ return false;
+
+ if (func->funcid > FirstGenbkiObjectId)
+ return false;
+
+ if (func->funcformat != COERCE_IMPLICIT_CAST)
+ return false;
+
+ foreach(temp, func->args)
+ {
+ Node *arg = lfirst(temp);
+
+ if (!IsA(arg, Const))
+ return false;
+ }
+
+ return true;
+ }
+
+ if (!IsA(element, Const))
+ return false;
+
+ return true;
+}
+
+/*
+ * Verify if the provided list could be merged down, which means it contains
+ * only constant expressions.
+ *
+ * Return value indicates if merging is possible.
+ *
+ * Note that this function searches only for explicit Const nodes and does not
+ * try to simplify expressions.
+ */
+static bool
+IsMergeableConstList(List *elements, Node **firstExpr, Node **lastExpr)
+{
+ ListCell *temp;
+ Node *firstElem = NULL;
+
+ if (elements == NIL)
+ return false;
+
+ if (!query_id_merge_values)
+ {
+ /* Merging is disabled, process everything one by one */
+ return false;
+ }
+
+ firstElem = linitial(elements);
+
+ /*
+ * If the first expression is a constant, verify if the following elements
+ * are constants as well. If yes, the list is eligible for merging.
+ */
+ if (IsMergeableConst(firstElem))
+ {
+ foreach(temp, elements)
+ {
+ Node *element = lfirst(temp);
+
+ if (!IsMergeableConst(element))
+ return false;
+ }
+
+ *firstExpr = firstElem;
+ *lastExpr = llast(elements);
+ return true;
+ }
+
+ /*
+ * If we end up here, it means no constants merging is possible, process
+ * the list as usual.
+ */
+ return false;
+}
+
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
-#define JUMBLE_LOCATION(location) \
- RecordConstLocation(jstate, expr->location)
+#define JUMBLE_ELEMENTS(list) \
+ _jumbleElements(jstate, (List *) expr->list)
+#define JUMBLE_LOCATION(location, merged) \
+ RecordConstLocation(jstate, expr->location, merged)
#define JUMBLE_FIELD(item) \
AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
#define JUMBLE_FIELD_SINGLE(item) \
@@ -239,6 +365,33 @@ do { \
#include "queryjumblefuncs.funcs.c"
+static void
+_jumbleElements(JumbleState *jstate, List *elements)
+{
+ Node *first, *last;
+ if (IsMergeableConstList(elements, &first, &last))
+ {
+ /*
+ * Both first and last constants have to be recorded. The first one
+ * will indicate the merged interval, the last one will tell us the
+ * length of the interval within the query text.
+ *
+ * Note that for the last exression we actually need not the expression
+ * location (which is the leftmost expression), but where it ends. For
+ * the limited set of supported cases now (implicit coerce via
+ * FuncExpr, Const) it's fine to use exprLocation, but if more complex
+ * composite expressions will be supported, e.g. OpExpr or FuncExpr as
+ * an explicit call, the rightmost expression will be needed.
+ */
+ RecordConstLocation(jstate, exprLocation(first), true);
+ RecordConstLocation(jstate, exprLocation(last), true);
+ }
+ else
+ {
+ _jumbleNode(jstate, (Node *) elements);
+ }
+}
+
static void
_jumbleNode(JumbleState *jstate, Node *node)
{
@@ -375,5 +528,5 @@ _jumbleVariableSetStmt(JumbleState *jstate, Node *node)
if (expr->jumble_args)
JUMBLE_NODE(args);
JUMBLE_FIELD(is_local);
- JUMBLE_LOCATION(location);
+ JUMBLE_LOCATION(location, false);
}
diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c
index a97a1eda6d..3230c866d4 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -115,6 +115,7 @@ typedef struct
bool redirection_done;
bool IsBinaryUpgrade;
bool query_id_enabled;
+ bool query_id_merge_values;
int max_safe_fds;
int MaxBackends;
int num_pmchild_slots;
@@ -744,6 +745,7 @@ save_backend_variables(BackendParameters *param,
param->redirection_done = redirection_done;
param->IsBinaryUpgrade = IsBinaryUpgrade;
param->query_id_enabled = query_id_enabled;
+ param->query_id_merge_values = query_id_merge_values;
param->max_safe_fds = max_safe_fds;
param->MaxBackends = MaxBackends;
@@ -1004,6 +1006,7 @@ restore_backend_variables(BackendParameters *param)
redirection_done = param->redirection_done;
IsBinaryUpgrade = param->IsBinaryUpgrade;
query_id_enabled = param->query_id_enabled;
+ query_id_merge_values = param->query_id_merge_values;
max_safe_fds = param->max_safe_fds;
MaxBackends = param->MaxBackends;
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 226af43fe2..d9a0c6a8d4 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -2106,6 +2106,16 @@ struct config_bool ConfigureNamesBool[] =
NULL, NULL, NULL
},
+ {
+ {"query_id_merge_values", PGC_USERSET, STATS_MONITORING,
+ gettext_noop("Allows to merge constants in a list when computing "
+ "query_id."),
+ },
+ &query_id_merge_values,
+ false,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index d472987ed4..6138388b77 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -650,7 +650,6 @@
#log_planner_stats = off
#log_executor_stats = off
-
#------------------------------------------------------------------------------
# VACUUMING
#------------------------------------------------------------------------------
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 580238bfab..5656302544 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -108,6 +108,9 @@ typedef enum NodeTag
* - query_jumble_location: Mark the field as a location to track. This is
* only allowed for integer fields that include "location" in their name.
*
+ * - query_jumble_merge: Allow to merge the field values for the query
+ * jumbling.
+ *
* - read_as(VALUE): In nodeRead(), replace the field's value with VALUE.
*
* - read_write_ignore: Ignore the field for read/write. This is only allowed
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 839e71d52f..89587d4c10 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1394,7 +1394,7 @@ typedef struct ArrayExpr
/* common type of array elements */
Oid element_typeid pg_node_attr(query_jumble_ignore);
/* the array elements or sub-arrays */
- List *elements;
+ List *elements pg_node_attr(query_jumble_merge);
/* true if elements are sub-arrays */
bool multidims pg_node_attr(query_jumble_ignore);
/* token location, or -1 if unknown */
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 50eb956658..8d1922c0ff 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -23,6 +23,12 @@ typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+
+ /*
+ * Indicates the constant represents the beginning or the end of a merged
+ * constants interval.
+ */
+ bool merged;
} LocationLen;
/*
@@ -62,12 +68,12 @@ enum ComputeQueryIdType
/* GUC parameters */
extern PGDLLIMPORT int compute_query_id;
-
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query);
extern void EnableQueryId(void);
extern PGDLLIMPORT bool query_id_enabled;
+extern PGDLLIMPORT bool query_id_merge_values;
/*
* Returns whether query identifier computation has been enabled, either
base-commit: 773c51dd39ada5f107a3656377a9611ff89132f1
--
2.45.1
On 2025-Feb-13, Dmitry Dolgov wrote:
Here is how it looks like (posting only the first patch, since we
concentrate on it). This version handles just a little more to cover
simpe cases like the implicit convertion above. The GUC is also moved
out from pgss and renamed to query_id_merge_values. On top I've added
more tests showing the impact, as well as sometimes awkward looking
normalized query I was talking about. I'm going to experiment how to
iron out the latter.
Thanks! It's looking better. Some small comments -- please add the new
GUC to postgresql.conf.sample. Also, how wed are you to
"query_id_merge_values" as a name? It's not in any way obvious that
this is about values in arrays. How about query_id_squash_arrays? Or
are you thinking in having values in other types of structures squashed
as well, and that this first patch does it for arrays only but you want
the GUC to also control some future feature?
(I think I prefer "squash" here as a verb to "merge").
+static bool +IsMergeableConst(Node *element) +{ + if (IsA(element, RelabelType)) + element = (Node *) ((RelabelType *) element)->arg; + + if (IsA(element, CoerceViaIO)) + element = (Node *) ((CoerceViaIO *) element)->arg; + + if(IsA(element, FuncExpr)) + { + FuncExpr *func = (FuncExpr *) element; + char provolatile = func_volatile(func->funcid);
I think calling func_volatile potentially once per array element is not
good; this might cause dozens/thousands of identical syscache lookups.
Maybe we can pass an initially NIL list from IsMergeableConstList (as
List **), which IsMergeableConst fills with OIDs of functions that have
been checked and found acceptable. Then the second time around we
search the list first and only do func_volatile() after not finding a
match.
Another thing I didn't quite understand is why you did this rather
baroque-looking list scan:
+static bool +IsMergeableConstList(List *elements, Node **firstExpr, Node **lastExpr) +{ + ListCell *temp; + Node *firstElem = NULL; + + if (elements == NIL) + return false; + + if (!query_id_merge_values) + { + /* Merging is disabled, process everything one by one */ + return false; + } + + firstElem = linitial(elements); + + /* + * If the first expression is a constant, verify if the following elements + * are constants as well. If yes, the list is eligible for merging. + */ + if (IsMergeableConst(firstElem)) + { + foreach(temp, elements) + { + Node *element = lfirst(temp); + + if (!IsMergeableConst(element)) + return false; + } + + *firstExpr = firstElem; + *lastExpr = llast(elements); + return true; + }
Why not just scan the list in the straightforward way, that is
foreach(temp, elements)
{
if (!IsMergeableConst(lfirst(temp)))
return false;
}
*firstExpr = linitial(elements);
*lastExpr = llast(elements);
return true;
Is there something being optimized here specifically for the first
element? I don't see it.
--
Álvaro Herrera PostgreSQL Developer — https://www.EnterpriseDB.com/
On Thu, Feb 13, 2025 at 01:47:01PM GMT, �lvaro Herrera wrote:
Also, how wed are you to
"query_id_merge_values" as a name? It's not in any way obvious that
this is about values in arrays. How about query_id_squash_arrays? Or
are you thinking in having values in other types of structures squashed
as well, and that this first patch does it for arrays only but you want
the GUC to also control some future feature?(I think I prefer "squash" here as a verb to "merge").
Yeah, when choosing the name I was trying to keep it a bit generic. The
high level goal is to reduce repeated non-essential parts, and arrays of
constants are one clear scenario, but there could be more to it. Having
said that I don't have any particular plans for extending this logic so
far. I've ended up with query_id_squash_values, how does this sound?
I think calling func_volatile potentially once per array element is not
good; this might cause dozens/thousands of identical syscache lookups.
Maybe we can pass an initially NIL list from IsMergeableConstList (as
List **), which IsMergeableConst fills with OIDs of functions that have
been checked and found acceptable. Then the second time around we
search the list first and only do func_volatile() after not finding a
match.
Good point, added.
Another thing I didn't quite understand is why you did this rather
baroque-looking list scan:
I'm pretty sure there was some reason behind it, but when you pointed it
out that reason has promptly vanished in a puff of confusion. Fixed.
Attachments:
v24-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/plain; charset=us-asciiDownload
From 212f5534fb0f99e5daa74ea4464231faec157a58 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Tue, 3 Dec 2024 14:55:45 +0100
Subject: [PATCH v24] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on the number of parameters, because every element of
ArrayExpr is jumbled. In certain situations it's undesirable, especially
if the list becomes too large.
Make an array of Const expressions contribute only the first/last
elements to the jumble hash. Allow to enable this behavior via the new
pg_stat_statements parameter query_id_squash_values with the default value off.
Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane,
Michael Paquier, Sergei Kornilov, Alvaro Herrera, David Geier, Sutou Kouhei,
Sami Imseih
Tested-by: Chengxi Sun, Yasuo Honda
---
contrib/pg_stat_statements/Makefile | 2 +-
.../pg_stat_statements/expected/merging.out | 432 ++++++++++++++++++
contrib/pg_stat_statements/meson.build | 1 +
.../pg_stat_statements/pg_stat_statements.c | 47 +-
contrib/pg_stat_statements/sql/merging.sql | 169 +++++++
doc/src/sgml/config.sgml | 27 ++
doc/src/sgml/pgstatstatements.sgml | 28 +-
src/backend/nodes/gen_node_support.pl | 21 +-
src/backend/nodes/queryjumblefuncs.c | 165 ++++++-
src/backend/postmaster/launch_backend.c | 3 +
src/backend/utils/misc/guc_tables.c | 10 +
src/backend/utils/misc/postgresql.conf.sample | 2 +-
src/include/nodes/nodes.h | 3 +
src/include/nodes/primnodes.h | 2 +-
src/include/nodes/queryjumble.h | 8 +-
15 files changed, 894 insertions(+), 26 deletions(-)
create mode 100644 contrib/pg_stat_statements/expected/merging.out
create mode 100644 contrib/pg_stat_statements/sql/merging.sql
diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile
index 241c02587b..eef8d69cc4 100644
--- a/contrib/pg_stat_statements/Makefile
+++ b/contrib/pg_stat_statements/Makefile
@@ -20,7 +20,7 @@ LDFLAGS_SL += $(filter -lm, $(LIBS))
REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_statements/pg_stat_statements.conf
REGRESS = select dml cursors utility level_tracking planning \
user_activity wal entry_timestamp privileges extended \
- parallel cleanup oldextversions
+ parallel cleanup oldextversions merging
# Disabled because these tests require "shared_preload_libraries=pg_stat_statements",
# which typical installcheck users do not have (e.g. buildfarm clients).
NO_INSTALLCHECK = 1
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
new file mode 100644
index 0000000000..881174d0ca
--- /dev/null
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -0,0 +1,432 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(4 rows)
+
+-- Normal scenario, too many simple constants for an IN query
+SET query_id_squash_values = on;
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 4
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) and data = $3 | 3
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- No constants simplification for OpExpr
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+-- In the following two queries the operator expressions (+) and (@) have
+-- different oppno, and will be given different query_id if merged, even though
+-- the normalized query will be the same
+SELECT * FROM test_merge WHERE id IN
+ (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN
+ (@ '-1', @ '-2', @ '-3', @ '-4', @ '-5', @ '-6', @ '-7', @ '-8', @ '-9');
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN +| 1
+ ($1 + $2, $3 + $4, $5 + $6, $7 + $8, $9 + $10, $11 + $12, $13 + $14, $15 + $16, $17 + $18) |
+ SELECT * FROM test_merge WHERE id IN +| 1
+ (@ $1, @ $2, @ $3, @ $4, @ $5, @ $6, @ $7, @ $8, @ $9) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
+
+-- FuncExpr
+-- Verify multiple type representation end up with the same query_id
+CREATE TABLE test_float (data float);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT data FROM test_float WHERE data IN (1, 2);
+ data
+------
+(0 rows)
+
+SELECT data FROM test_float WHERE data IN (1, '2');
+ data
+------
+(0 rows)
+
+SELECT data FROM test_float WHERE data IN ('1', 2);
+ data
+------
+(0 rows)
+
+SELECT data FROM test_float WHERE data IN ('1', '2');
+ data
+------
+(0 rows)
+
+SELECT data FROM test_float WHERE data IN (1.0, 1.0);
+ data
+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT data FROM test_float WHERE data IN (...) | 5
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Numeric type, implicit cast is merged
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE data IN (...) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Bigint, implicit cast is merged
+CREATE TABLE test_merge_bigint (id int, data bigint);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_bigint WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-----------------------------------------------------+-------
+ SELECT * FROM test_merge_bigint WHERE data IN (...) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Bigint, explicit cast is not merged
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_bigint WHERE data IN
+ (1::bigint, 2::bigint, 3::bigint, 4::bigint, 5::bigint, 6::bigint,
+ 7::bigint, 8::bigint, 9::bigint, 10::bigint, 11::bigint);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_bigint WHERE data IN +| 1
+ ($1::bigint, $2::bigint, $3::bigint, $4::bigint, $5::bigint, $6::bigint,+|
+ $7::bigint, $8::bigint, $9::bigint, $10::bigint, $11::bigint) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Bigint, long tokens with parenthesis
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_bigint WHERE id IN
+ (abs(100), abs(200), abs(300), abs(400), abs(500), abs(600), abs(700),
+ abs(800), abs(900), abs(1000), ((abs(1100))));
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_bigint WHERE id IN +| 1
+ (abs($1), abs($2), abs($3), abs($4), abs($5), abs($6), abs($7),+|
+ abs($8), abs($9), abs($10), ((abs($11)))) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- CoerceViaIO, SubLink instead of a Const
+CREATE TABLE test_merge_jsonb (id int, data jsonb);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_jsonb WHERE data IN
+ ((SELECT '"1"')::jsonb, (SELECT '"2"')::jsonb, (SELECT '"3"')::jsonb,
+ (SELECT '"4"')::jsonb, (SELECT '"5"')::jsonb, (SELECT '"6"')::jsonb,
+ (SELECT '"7"')::jsonb, (SELECT '"8"')::jsonb, (SELECT '"9"')::jsonb,
+ (SELECT '"10"')::jsonb);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_jsonb WHERE data IN +| 1
+ ((SELECT $1)::jsonb, (SELECT $2)::jsonb, (SELECT $3)::jsonb,+|
+ (SELECT $4)::jsonb, (SELECT $5)::jsonb, (SELECT $6)::jsonb,+|
+ (SELECT $7)::jsonb, (SELECT $8)::jsonb, (SELECT $9)::jsonb,+|
+ (SELECT $10)::jsonb) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- CoerceViaIO
+-- Create some dummy type to force CoerceViaIO
+CREATE TYPE casttesttype;
+CREATE FUNCTION casttesttype_in(cstring)
+ RETURNS casttesttype
+ AS 'textin'
+ LANGUAGE internal STRICT IMMUTABLE;
+NOTICE: return type casttesttype is only a shell
+CREATE FUNCTION casttesttype_out(casttesttype)
+ RETURNS cstring
+ AS 'textout'
+ LANGUAGE internal STRICT IMMUTABLE;
+NOTICE: argument type casttesttype is only a shell
+LINE 1: CREATE FUNCTION casttesttype_out(casttesttype)
+ ^
+CREATE TYPE casttesttype (
+ internallength = variable,
+ input = casttesttype_in,
+ output = casttesttype_out,
+ alignment = int4
+);
+CREATE CAST (int4 AS casttesttype) WITH INOUT;
+CREATE FUNCTION casttesttype_eq(casttesttype, casttesttype)
+returns boolean language sql immutable as $$
+ SELECT true
+$$;
+CREATE OPERATOR = (
+ leftarg = casttesttype,
+ rightarg = casttesttype,
+ procedure = casttesttype_eq,
+ commutator = =);
+CREATE TABLE test_merge_cast (id int, data casttesttype);
+-- Use the introduced type to construct a list of CoerceViaIO around Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_cast WHERE data IN
+ (1::int4::casttesttype, 2::int4::casttesttype, 3::int4::casttesttype,
+ 4::int4::casttesttype, 5::int4::casttesttype, 6::int4::casttesttype,
+ 7::int4::casttesttype, 8::int4::casttesttype, 9::int4::casttesttype,
+ 10::int4::casttesttype, 11::int4::casttesttype);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_merge_cast WHERE data IN +| 1
+ (...::int4::casttesttype) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Some casting expression are simplified to Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_jsonb WHERE data IN
+ (('"1"')::jsonb, ('"2"')::jsonb, ('"3"')::jsonb, ('"4"')::jsonb,
+ ( '"5"')::jsonb, ( '"6"')::jsonb, ( '"7"')::jsonb, ( '"8"')::jsonb,
+ ( '"9"')::jsonb, ( '"10"')::jsonb);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_merge_jsonb WHERE data IN +| 1
+ ((...)::jsonb) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- RelabelType
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...::oid) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Test constants evaluation in a CTE, which was causing issues in the past
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result
+--------
+(0 rows)
+
+-- Simple array would be merged as well
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+ array
+------------------------
+ {1,2,3,4,5,6,7,8,9,10}
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT ARRAY[...] | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+RESET query_id_squash_values;
diff --git a/contrib/pg_stat_statements/meson.build b/contrib/pg_stat_statements/meson.build
index 4446af58c5..8a96aff625 100644
--- a/contrib/pg_stat_statements/meson.build
+++ b/contrib/pg_stat_statements/meson.build
@@ -56,6 +56,7 @@ tests += {
'parallel',
'cleanup',
'oldextversions',
+ 'merging',
],
'regress_args': ['--temp-config', files('pg_stat_statements.conf')],
# Disabled because these tests require
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index bebf8134eb..1aa5021367 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -295,7 +295,6 @@ static bool pgss_track_planning = false; /* whether to track planning
* duration */
static bool pgss_save = true; /* whether to save stats across shutdown */
-
#define pgss_enabled(level) \
(!IsParallelWorker() && \
(pgss_track == PGSS_TRACK_ALL || \
@@ -2809,6 +2808,10 @@ generate_normalized_query(JumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool merged_interval = false; /* Currently processed constants
+ belong to a merged constants
+ interval. */
+
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -2832,7 +2835,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
{
int off, /* Offset from start for cur tok */
tok_len; /* Length (in bytes) of that tok */
-
off = jstate->clocations[i].location;
/* Adjust recorded location if we're dealing with partial string */
off -= query_loc;
@@ -2847,13 +2849,44 @@ generate_normalized_query(JumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ /* Normal path, non merged constant */
+ if (!jstate->clocations[i].merged)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id);
+
+ /* In case previous constants were merged away, stop doing that */
+ merged_interval = false;
+ }
+ else if (!merged_interval)
+ {
+ /*
+ * We are not inside a merged interval yet, which means it is the
+ * the first merged constant.
+ *
+ * A merged constants interval must be represented via two
+ * constants with the merged flag. Currently we are at the first,
+ * verify there is another one.
+ */
+ Assert(i + 1 < jstate->clocations_count);
+ Assert(jstate->clocations[i + 1].merged);
+
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* Remember to skip until a non merged constant appears */
+ merged_interval = true;
+
+ /* Mark the interval in the normalized query */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ }
+ /* Otherwise the constant is merged away, move forward */
quer_loc = off + tok_len;
last_off = off;
last_tok_len = tok_len;
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
new file mode 100644
index 0000000000..a302d3d91f
--- /dev/null
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -0,0 +1,169 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal scenario, too many simple constants for an IN query
+SET query_id_squash_values = on;
+
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- No constants simplification for OpExpr
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+-- In the following two queries the operator expressions (+) and (@) have
+-- different oppno, and will be given different query_id if merged, even though
+-- the normalized query will be the same
+SELECT * FROM test_merge WHERE id IN
+ (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+SELECT * FROM test_merge WHERE id IN
+ (@ '-1', @ '-2', @ '-3', @ '-4', @ '-5', @ '-6', @ '-7', @ '-8', @ '-9');
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- FuncExpr
+
+-- Verify multiple type representation end up with the same query_id
+CREATE TABLE test_float (data float);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT data FROM test_float WHERE data IN (1, 2);
+SELECT data FROM test_float WHERE data IN (1, '2');
+SELECT data FROM test_float WHERE data IN ('1', 2);
+SELECT data FROM test_float WHERE data IN ('1', '2');
+SELECT data FROM test_float WHERE data IN (1.0, 1.0);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Numeric type, implicit cast is merged
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_numeric WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Bigint, implicit cast is merged
+CREATE TABLE test_merge_bigint (id int, data bigint);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_bigint WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Bigint, explicit cast is not merged
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_bigint WHERE data IN
+ (1::bigint, 2::bigint, 3::bigint, 4::bigint, 5::bigint, 6::bigint,
+ 7::bigint, 8::bigint, 9::bigint, 10::bigint, 11::bigint);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Bigint, long tokens with parenthesis
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_bigint WHERE id IN
+ (abs(100), abs(200), abs(300), abs(400), abs(500), abs(600), abs(700),
+ abs(800), abs(900), abs(1000), ((abs(1100))));
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- CoerceViaIO, SubLink instead of a Const
+CREATE TABLE test_merge_jsonb (id int, data jsonb);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_jsonb WHERE data IN
+ ((SELECT '"1"')::jsonb, (SELECT '"2"')::jsonb, (SELECT '"3"')::jsonb,
+ (SELECT '"4"')::jsonb, (SELECT '"5"')::jsonb, (SELECT '"6"')::jsonb,
+ (SELECT '"7"')::jsonb, (SELECT '"8"')::jsonb, (SELECT '"9"')::jsonb,
+ (SELECT '"10"')::jsonb);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- CoerceViaIO
+
+-- Create some dummy type to force CoerceViaIO
+CREATE TYPE casttesttype;
+
+CREATE FUNCTION casttesttype_in(cstring)
+ RETURNS casttesttype
+ AS 'textin'
+ LANGUAGE internal STRICT IMMUTABLE;
+
+CREATE FUNCTION casttesttype_out(casttesttype)
+ RETURNS cstring
+ AS 'textout'
+ LANGUAGE internal STRICT IMMUTABLE;
+
+CREATE TYPE casttesttype (
+ internallength = variable,
+ input = casttesttype_in,
+ output = casttesttype_out,
+ alignment = int4
+);
+
+CREATE CAST (int4 AS casttesttype) WITH INOUT;
+
+CREATE FUNCTION casttesttype_eq(casttesttype, casttesttype)
+returns boolean language sql immutable as $$
+ SELECT true
+$$;
+
+CREATE OPERATOR = (
+ leftarg = casttesttype,
+ rightarg = casttesttype,
+ procedure = casttesttype_eq,
+ commutator = =);
+
+CREATE TABLE test_merge_cast (id int, data casttesttype);
+
+-- Use the introduced type to construct a list of CoerceViaIO around Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_cast WHERE data IN
+ (1::int4::casttesttype, 2::int4::casttesttype, 3::int4::casttesttype,
+ 4::int4::casttesttype, 5::int4::casttesttype, 6::int4::casttesttype,
+ 7::int4::casttesttype, 8::int4::casttesttype, 9::int4::casttesttype,
+ 10::int4::casttesttype, 11::int4::casttesttype);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Some casting expression are simplified to Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_jsonb WHERE data IN
+ (('"1"')::jsonb, ('"2"')::jsonb, ('"3"')::jsonb, ('"4"')::jsonb,
+ ( '"5"')::jsonb, ( '"6"')::jsonb, ( '"7"')::jsonb, ( '"8"')::jsonb,
+ ( '"9"')::jsonb, ( '"10"')::jsonb);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- RelabelType
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test constants evaluation in a CTE, which was causing issues in the past
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+-- Simple array would be merged as well
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+RESET query_id_squash_values;
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 5e4f201e09..ddc8c853a2 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8436,6 +8436,33 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
</listitem>
</varlistentry>
+ <varlistentry id="guc-query-id-squash-values" xreflabel="query_id_squash_values">
+ <term><varname>query_id_squash_values</varname> (<type>bool</type>)
+ <indexterm>
+ <primary><varname>query_id_squash_values</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Specifies how an array of constants (e.g. for an <literal>IN</literal>
+ clause) contributes to the query identifier computation. Normally every
+ element of an array contributes to the query identifier, which means the
+ same query will get multiple different identifiers, one for each
+ occurrence with an array of different lenght.
+
+ If this parameter is on, an array of constants will contribute only the
+ first and the last elements to the query identifier. It means two
+ occurences of the same query, where the only difference is number of
+ constants in the array, are going to get the same query identifier.
+ Such queries are represented in form <literal>'(...)'</literal>.
+
+ The parameter could be used to reduce amount of repeating data stored
+ via <xref linkend="pgstatstatements"/>. The default value is
+ <literal>off</literal>.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry id="guc-log-statement-stats">
<term><varname>log_statement_stats</varname> (<type>boolean</type>)
<indexterm>
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 501b468e9a..9ce4988008 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -621,11 +621,28 @@
<para>
In some cases, queries with visibly different texts might get merged into a
- single <structname>pg_stat_statements</structname> entry. Normally this will happen
- only for semantically equivalent queries, but there is a small chance of
- hash collisions causing unrelated queries to be merged into one entry.
- (This cannot happen for queries belonging to different users or databases,
- however.)
+ single <structname>pg_stat_statements</structname> entry. Normally this
+ will happen only for semantically equivalent queries, or if
+ <varname>query_id_squash_values</varname> is enabled and the only difference
+ between queries is the length of an array with constants they contain:
+
+<screen>
+=# SET query_id_squash_values = on;
+=# SELECT pg_stat_statements_reset();
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
+=# SELECT query, calls FROM pg_stat_statements;
+-[ RECORD 1 ]------------------------------
+query | SELECT * FROM test WHERE a IN (...)
+calls | 2
+-[ RECORD 2 ]------------------------------
+query | SELECT pg_stat_statements_reset()
+calls | 1
+</screen>
+
+ But there is a small chance of hash collisions causing unrelated queries to
+ be merged into one entry. (This cannot happen for queries belonging to
+ different users or databases, however.)
</para>
<para>
@@ -956,6 +973,7 @@
</para>
</listitem>
</varlistentry>
+
</variablelist>
<para>
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 1a657f7e0a..c421664879 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -476,6 +476,7 @@ foreach my $infile (@ARGV)
equal_ignore_if_zero
query_jumble_ignore
query_jumble_location
+ query_jumble_merge
read_write_ignore
write_only_relids
write_only_nondefault_pathtarget
@@ -1283,6 +1284,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
my @a = @{ $node_type_info{$n}->{field_attrs}{$f} };
my $query_jumble_ignore = $struct_no_query_jumble;
my $query_jumble_location = 0;
+ my $query_jumble_merge = 0;
# extract per-field attributes
foreach my $a (@a)
@@ -1295,21 +1297,34 @@ _jumble${n}(JumbleState *jstate, Node *node)
{
$query_jumble_location = 1;
}
+ elsif ($a eq 'query_jumble_merge')
+ {
+ $query_jumble_merge = 1;
+ }
}
# node type
if (($t =~ /^(\w+)\*$/ or $t =~ /^struct\s+(\w+)\*$/)
and elem $1, @node_types)
{
- print $jff "\tJUMBLE_NODE($f);\n"
- unless $query_jumble_ignore;
+ # Merge constants if requested.
+ if ($query_jumble_merge)
+ {
+ print $jff "\tJUMBLE_ELEMENTS($f);\n"
+ unless $query_jumble_ignore;
+ }
+ else
+ {
+ print $jff "\tJUMBLE_NODE($f);\n"
+ unless $query_jumble_ignore;
+ }
}
elsif ($t eq 'ParseLoc')
{
# Track the node's location only if directly requested.
if ($query_jumble_location)
{
- print $jff "\tJUMBLE_LOCATION($f);\n"
+ print $jff "\tJUMBLE_LOCATION($f, false);\n"
unless $query_jumble_ignore;
}
}
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index b103a28193..b7f13dc658 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -32,9 +32,13 @@
*/
#include "postgres.h"
+#include "access/transam.h"
+#include "catalog/pg_proc.h"
#include "common/hashfn.h"
#include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
#include "nodes/queryjumble.h"
+#include "utils/lsyscache.h"
#include "parser/scansup.h"
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
@@ -42,6 +46,9 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+/* Whether to merge constants in a list when computing query_id */
+bool query_id_squash_values = false;
+
/*
* True when compute_query_id is ON or AUTO, and a module requests them.
*
@@ -53,8 +60,10 @@ bool query_id_enabled = false;
static void AppendJumble(JumbleState *jstate,
const unsigned char *item, Size size);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void RecordConstLocation(JumbleState *jstate,
+ int location, bool merged);
static void _jumbleNode(JumbleState *jstate, Node *node);
+static void _jumbleElements(JumbleState *jstate, List *elements);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
static void _jumbleList(JumbleState *jstate, Node *node);
static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
@@ -198,11 +207,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
}
/*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked.
+ *
+ * Merged argument signals that the constant represents the first or the last
+ * element in a series of merged constants, and everything but the first/last
+ * element contributes nothing to the jumble hash.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -218,15 +231,126 @@ RecordConstLocation(JumbleState *jstate, int location)
}
jstate->clocations[jstate->clocations_count].location = location;
/* initialize lengths to -1 to simplify third-party module usage */
+ jstate->clocations[jstate->clocations_count].merged = merged;
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
}
}
+/*
+ * Verify few simple cases where we can deduce that the expression is a
+ * constant:
+ *
+ * - Simplify the expression, if it's wrapped into RelabelType and CoerceViaIO.
+ * - If it's a FuncExpr, check if the function is an immutable builtin
+ * function doing implicit cast with constant arguments.
+ * - Otherwise test if the expression is a simple Const.
+ *
+ * We could also handle some simple OpExpr here as well, but since such queries
+ * will also have opno jumbled, this might lead to a confusing situation where
+ * two different queries end up with the same normalized query but different
+ * query_id.
+ *
+ * The argument known_immutable_funcs contains known function OIDs that were
+ * already proven to be immutable. If the expression to verify is a FuncExpr,
+ * we first check this list, and only if not found, test the function
+ * volatility and store the result back. Since most of the time constants
+ * merging will be dealing with same type of expressions, this avoids
+ * performing func_volatile over and over for the same functions.
+ *
+ * Note that we intentionally do not recurse on the function arguments and only
+ * test them for being Const expression for simplicity.
+ */
+static bool
+IsMergeableConst(Node *element, List **known_immutable_funcs)
+{
+ if (IsA(element, RelabelType))
+ element = (Node *) ((RelabelType *) element)->arg;
+
+ if (IsA(element, CoerceViaIO))
+ element = (Node *) ((CoerceViaIO *) element)->arg;
+
+ if(IsA(element, FuncExpr))
+ {
+ FuncExpr *func = (FuncExpr *) element;
+ ListCell *temp;
+
+ if (func->funcid > FirstGenbkiObjectId)
+ return false;
+
+ if (func->funcformat != COERCE_IMPLICIT_CAST)
+ return false;
+
+ if (!list_member_oid(*known_immutable_funcs, func->funcid))
+ {
+ /* Not found in the cache, verify and add if needed */
+ if(func_volatile(func->funcid) != PROVOLATILE_IMMUTABLE)
+ return false;
+
+ *known_immutable_funcs = lappend_oid(*known_immutable_funcs,
+ func->funcid);
+ }
+
+ foreach(temp, func->args)
+ {
+ Node *arg = lfirst(temp);
+
+ if (!IsA(arg, Const))
+ return false;
+ }
+
+ return true;
+ }
+
+ if (!IsA(element, Const))
+ return false;
+
+ return true;
+}
+
+/*
+ * Verify if the provided list could be merged down, which means it contains
+ * only constant expressions.
+ *
+ * Return value indicates if merging is possible.
+ *
+ * Note that this function searches only for explicit Const nodes and does not
+ * try to simplify expressions.
+ */
+static bool
+IsMergeableConstList(List *elements, Node **firstExpr, Node **lastExpr)
+{
+ ListCell *temp;
+
+ /* To keep track of immutable functions in elements */
+ List *immutable_funcs = NIL;
+
+ if (elements == NIL)
+ return false;
+
+ if (!query_id_squash_values)
+ {
+ /* Merging is disabled, process everything one by one */
+ return false;
+ }
+
+ foreach(temp, elements)
+ {
+ if (!IsMergeableConst(lfirst(temp), &immutable_funcs))
+ return false;
+ }
+ *firstExpr = linitial(elements);
+ *lastExpr = llast(elements);
+
+ return true;
+}
+
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
-#define JUMBLE_LOCATION(location) \
- RecordConstLocation(jstate, expr->location)
+#define JUMBLE_ELEMENTS(list) \
+ _jumbleElements(jstate, (List *) expr->list)
+#define JUMBLE_LOCATION(location, merged) \
+ RecordConstLocation(jstate, expr->location, merged)
#define JUMBLE_FIELD(item) \
AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
#define JUMBLE_FIELD_SINGLE(item) \
@@ -239,6 +363,33 @@ do { \
#include "queryjumblefuncs.funcs.c"
+static void
+_jumbleElements(JumbleState *jstate, List *elements)
+{
+ Node *first, *last;
+ if (IsMergeableConstList(elements, &first, &last))
+ {
+ /*
+ * Both first and last constants have to be recorded. The first one
+ * will indicate the merged interval, the last one will tell us the
+ * length of the interval within the query text.
+ *
+ * Note that for the last exression we actually need not the expression
+ * location (which is the leftmost expression), but where it ends. For
+ * the limited set of supported cases now (implicit coerce via
+ * FuncExpr, Const) it's fine to use exprLocation, but if more complex
+ * composite expressions will be supported, e.g. OpExpr or FuncExpr as
+ * an explicit call, the rightmost expression will be needed.
+ */
+ RecordConstLocation(jstate, exprLocation(first), true);
+ RecordConstLocation(jstate, exprLocation(last), true);
+ }
+ else
+ {
+ _jumbleNode(jstate, (Node *) elements);
+ }
+}
+
static void
_jumbleNode(JumbleState *jstate, Node *node)
{
@@ -375,5 +526,5 @@ _jumbleVariableSetStmt(JumbleState *jstate, Node *node)
if (expr->jumble_args)
JUMBLE_NODE(args);
JUMBLE_FIELD(is_local);
- JUMBLE_LOCATION(location);
+ JUMBLE_LOCATION(location, false);
}
diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c
index a97a1eda6d..052b700390 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -115,6 +115,7 @@ typedef struct
bool redirection_done;
bool IsBinaryUpgrade;
bool query_id_enabled;
+ bool query_id_squash_values;
int max_safe_fds;
int MaxBackends;
int num_pmchild_slots;
@@ -744,6 +745,7 @@ save_backend_variables(BackendParameters *param,
param->redirection_done = redirection_done;
param->IsBinaryUpgrade = IsBinaryUpgrade;
param->query_id_enabled = query_id_enabled;
+ param->query_id_squash_values = query_id_squash_values;
param->max_safe_fds = max_safe_fds;
param->MaxBackends = MaxBackends;
@@ -1004,6 +1006,7 @@ restore_backend_variables(BackendParameters *param)
redirection_done = param->redirection_done;
IsBinaryUpgrade = param->IsBinaryUpgrade;
query_id_enabled = param->query_id_enabled;
+ query_id_squash_values = param->query_id_squash_values;
max_safe_fds = param->max_safe_fds;
MaxBackends = param->MaxBackends;
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 226af43fe2..eca30ba1a8 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -2106,6 +2106,16 @@ struct config_bool ConfigureNamesBool[] =
NULL, NULL, NULL
},
+ {
+ {"query_id_squash_values", PGC_USERSET, STATS_MONITORING,
+ gettext_noop("Allows to merge constants in a list when computing "
+ "query_id."),
+ },
+ &query_id_squash_values,
+ false,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index d472987ed4..15939394bc 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -645,12 +645,12 @@
# - Monitoring -
#compute_query_id = auto
+#query_id_squash_values = off
#log_statement_stats = off
#log_parser_stats = off
#log_planner_stats = off
#log_executor_stats = off
-
#------------------------------------------------------------------------------
# VACUUMING
#------------------------------------------------------------------------------
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 580238bfab..5656302544 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -108,6 +108,9 @@ typedef enum NodeTag
* - query_jumble_location: Mark the field as a location to track. This is
* only allowed for integer fields that include "location" in their name.
*
+ * - query_jumble_merge: Allow to merge the field values for the query
+ * jumbling.
+ *
* - read_as(VALUE): In nodeRead(), replace the field's value with VALUE.
*
* - read_write_ignore: Ignore the field for read/write. This is only allowed
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 839e71d52f..89587d4c10 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1394,7 +1394,7 @@ typedef struct ArrayExpr
/* common type of array elements */
Oid element_typeid pg_node_attr(query_jumble_ignore);
/* the array elements or sub-arrays */
- List *elements;
+ List *elements pg_node_attr(query_jumble_merge);
/* true if elements are sub-arrays */
bool multidims pg_node_attr(query_jumble_ignore);
/* token location, or -1 if unknown */
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 50eb956658..d2f1c1e310 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -23,6 +23,12 @@ typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+
+ /*
+ * Indicates the constant represents the beginning or the end of a merged
+ * constants interval.
+ */
+ bool merged;
} LocationLen;
/*
@@ -62,12 +68,12 @@ enum ComputeQueryIdType
/* GUC parameters */
extern PGDLLIMPORT int compute_query_id;
-
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query);
extern void EnableQueryId(void);
extern PGDLLIMPORT bool query_id_enabled;
+extern PGDLLIMPORT bool query_id_squash_values;
/*
* Returns whether query identifier computation has been enabled, either
base-commit: 773c51dd39ada5f107a3656377a9611ff89132f1
--
2.45.1
Hi,
Thanks for the updated patch!
I spent some time looking at v24 today, and I have some findings/comments.
1/
Constants passed as parameters to a prepared statement will not be
handled as expected. I did not not test explicit PREPARE/EXECUTE statement,
but I assume it will have the same issue.
postgres=# show query_id_squash_values;
query_id_squash_values
------------------------
on
(1 row)
postgres=# select from foo where col_bigint in ($1, $2, $3, $4) \bind 1 2 3 4
postgres-# ;
--
(0 rows)
postgres=# select from foo where col_bigint in ($1, $2, $3, $4, $5)
\bind 1 2 3 4 5
postgres-# ;
--
(0 rows)
postgres=# select query, queryid, calls from pg_stat_statements where
query like 'select%from foo where%' order by stats_since asc;
query |
queryid | calls
----------------------------------------------------------+----------------------+-------
select from foo where col_bigint in ($1, $2, $3, $4) |
-1169585827903667511 | 1
select from foo where col_bigint in ($1, $2, $3, $4, $5) |
-5591703027615838766 | 1
(2 rows)
I think the answer is here is to also check for "Param" when deciding
if an element
should be merged.
i.e.
if (!IsA(element, Const) && !IsA(element, Param))
2/
This case with an array passed to aa function seems to cause a regression
in pg_stat_statements query text. As you can see the text is incomplete.
CREATE OR REPLACE FUNCTION arrtest(i int[]) RETURNS void AS $$
BEGIN
NULL;
END;
$$ LANGUAGE plpgsql;
postgres=# select arrtest(array[1, 2]) from foo where col_bigint in (1, 2, 3);
arrtest
---------
(0 rows)
postgres=# select query from pg_stat_statements;
query
---------------------------
select arrtest(array[...)
(1 row)
it should otherwise look like this:
postgres=# select query from pg_stat_statements;
query
-------------------------------------------------------------------------
select arrtest(array[$1, $2]) from foo where col_bigint in ($3, $4, $5)
(1 row)
3/
A typo in the docs.
c/lenght/length
+ occurrence with an array of different lenght.
4/
+ <para>
+ Specifies how an array of constants (e.g. for an <literal>IN</literal>
+ clause) contributes to the query identifier computation.
Is this parameter specific to only useful to merge the values of an IN list.
Should the documentation be more specific and say that only IN lists
will benefit from this parameter?
Also, if there is only 1 value in the list, it will have a different
queryId than
that of the same query in which more than 1 value is passed to the IN list.
Should the documentation be clear about that?
5/
pg_node_attr of query_jumble_merge is doing something
very specific to the elements list of an ArrayExpr. The
merge code likely cannot be used for other node types.
/* the array elements or sub-arrays */
- List *elements;
+ List *elements pg_node_attr(query_jumble_merge);
Why are we creating a new node attribute rather than following the
existing pattern of using the "custom_query_jumble" attribute on
ArrayExpr and creating a custom jumble function like we do with
_jumbleVariableSetStmt?
Regards,
Sami
On Thu, Feb 13, 2025 at 05:08:45PM GMT, Sami Imseih wrote:
Constants passed as parameters to a prepared statement will not be
handled as expected. I did not not test explicit PREPARE/EXECUTE statement,
but I assume it will have the same issue.
This is the same question of supporting various cases. The original
patch implementation handled Param expressions as well, this part was
explicitly rejected during review. I think as a first step it's
important to find a balance between applying this optimization in as
many cases as possible, and at the same time keep the implementation
simple to give the patch a chance. So far I'm inclined to leave Param
for the future work, although of course I'm open to discussion.
This case with an array passed to aa function seems to cause a regression
in pg_stat_statements query text. As you can see the text is incomplete.
I've already mentioned that in the previous email. To reiterate, it's
not a functionality regression, but an incomplete representation of a
normalized query which turned out to be hard to change. While I'm
working on that, there is a suggestion that it's not a blocker.
A typo in the docs.
c/lenght/length
One day I'll write documentation without any typo, but not today :) Thanks,
will fix with the next version.
Is this parameter specific to only useful to merge the values of an IN list.
Should the documentation be more specific and say that only IN lists
will benefit from this parameter?
You can find one test showing that this optimization is applied to a
plain ARRAY[1, 2, 3, ...], so it's not only IN expressions.
Also, if there is only 1 value in the list, it will have a different
queryId than
that of the same query in which more than 1 value is passed to the IN list.
Should the documentation be clear about that?
I tend to think there is not much value in emphasizing that. It will add more
mental overhead to process, but this part already says "an array of constants
will contribute only the first and the last elements to the query identifier"
-- having only the first element differs from having both, hence a new entry.
pg_node_attr of query_jumble_merge is doing something
very specific to the elements list of an ArrayExpr. The
merge code likely cannot be used for other node types.
It can be, take a look at pg_node_attr commentary. Any node can have a
field marked with query_jumble_merge attribut and benefit from merging.
Hi,
On Fri, Feb 14, 2025 at 09:36:08AM +0100, Dmitry Dolgov wrote:
On Thu, Feb 13, 2025 at 05:08:45PM GMT, Sami Imseih wrote:
This case with an array passed to aa function seems to cause a regression
in pg_stat_statements query text. As you can see the text is incomplete.I've already mentioned that in the previous email. To reiterate, it's
not a functionality regression, but an incomplete representation of a
normalized query which turned out to be hard to change. While I'm
working on that, there is a suggestion that it's not a blocker.
While talking about the normalized query text with this patch, I see that
merged values are now represented like this, per the regression tests files:
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE data IN (...) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
This was probably ok a few years back but pg 16 introduced a new GENERIC_PLAN
option for EXPLAIN (3c05284d83b2) to be able to run EXPLAIN on a query
extracted from pg_stat_statements (among other things).
This feature would break the use case. Note that this is not a hypothetical
need: I get very frequent reports on the PoWA project about the impossibility
to get an EXPLAIN (we do have some code that tries to reinject the parameters
from stored quals but we cannot always do it) that is used with the automatic
index suggestion, and we planned to rely on EXPLAIN (GENERIC_PLAN) to have an
always working solution. I suspect that other projects also rely on this
option for similar features.
Since the merging is a yes/no option (I think there used to be some discussions
about having a threshold or some other fancy modes), maybe you could instead
differentiate the merged version by have 2 constants rather than this "..." or
something like that?
On 2025-Feb-14, Julien Rouhaud wrote:
Since the merging is a yes/no option (I think there used to be some discussions
about having a threshold or some other fancy modes), maybe you could instead
differentiate the merged version by have 2 constants rather than this "..." or
something like that?
Maybe the representation can be "($1 /*, ... */)" so that it's obvious
that the array extends beyond the first element but is still
syntactically valid.
--
Álvaro Herrera Breisgau, Deutschland — https://www.EnterpriseDB.com/
"In Europe they call me Niklaus Wirth; in the US they call me Nickel's worth.
That's because in Europe they call me by name, and in the US by value!"
On Fri, Feb 14, 2025 at 10:36:48AM +0100, �lvaro Herrera wrote:
On 2025-Feb-14, Julien Rouhaud wrote:
Since the merging is a yes/no option (I think there used to be some discussions
about having a threshold or some other fancy modes), maybe you could instead
differentiate the merged version by have 2 constants rather than this "..." or
something like that?Maybe the representation can be "($1 /*, ... */)" so that it's obvious
that the array extends beyond the first element but is still
syntactically valid.
Yeah that works too and it's probably way easier to implement.
On Fri, Feb 14, 2025 at 05:57:01PM GMT, Julien Rouhaud wrote:
On Fri, Feb 14, 2025 at 10:36:48AM +0100, �lvaro Herrera wrote:On 2025-Feb-14, Julien Rouhaud wrote:
Since the merging is a yes/no option (I think there used to be some discussions
about having a threshold or some other fancy modes), maybe you could instead
differentiate the merged version by have 2 constants rather than this "..." or
something like that?Maybe the representation can be "($1 /*, ... */)" so that it's obvious
that the array extends beyond the first element but is still
syntactically valid.Yeah that works too and it's probably way easier to implement.
Agree, that looks good, here is the version that uses this format.
Attachments:
v25-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/plain; charset=us-asciiDownload
From e193fa6b99102c805e048d2cb70476291cd82fc3 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Tue, 3 Dec 2024 14:55:45 +0100
Subject: [PATCH v25] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on the number of parameters, because every element of
ArrayExpr is jumbled. In certain situations it's undesirable, especially
if the list becomes too large.
Make an array of Const expressions contribute only the first/last
elements to the jumble hash. Allow to enable this behavior via the new
pg_stat_statements parameter query_id_squash_values with the default value off.
Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane,
Michael Paquier, Sergei Kornilov, Alvaro Herrera, David Geier, Sutou Kouhei,
Sami Imseih, Julien Rouhaud
Tested-by: Chengxi Sun, Yasuo Honda
---
contrib/pg_stat_statements/Makefile | 2 +-
.../pg_stat_statements/expected/merging.out | 432 ++++++++++++++++++
contrib/pg_stat_statements/meson.build | 1 +
.../pg_stat_statements/pg_stat_statements.c | 47 +-
contrib/pg_stat_statements/sql/merging.sql | 169 +++++++
doc/src/sgml/config.sgml | 27 ++
doc/src/sgml/pgstatstatements.sgml | 28 +-
src/backend/nodes/gen_node_support.pl | 21 +-
src/backend/nodes/queryjumblefuncs.c | 165 ++++++-
src/backend/postmaster/launch_backend.c | 3 +
src/backend/utils/misc/guc_tables.c | 10 +
src/backend/utils/misc/postgresql.conf.sample | 2 +-
src/include/nodes/nodes.h | 3 +
src/include/nodes/primnodes.h | 2 +-
src/include/nodes/queryjumble.h | 8 +-
15 files changed, 894 insertions(+), 26 deletions(-)
create mode 100644 contrib/pg_stat_statements/expected/merging.out
create mode 100644 contrib/pg_stat_statements/sql/merging.sql
diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile
index 241c02587b..eef8d69cc4 100644
--- a/contrib/pg_stat_statements/Makefile
+++ b/contrib/pg_stat_statements/Makefile
@@ -20,7 +20,7 @@ LDFLAGS_SL += $(filter -lm, $(LIBS))
REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_statements/pg_stat_statements.conf
REGRESS = select dml cursors utility level_tracking planning \
user_activity wal entry_timestamp privileges extended \
- parallel cleanup oldextversions
+ parallel cleanup oldextversions merging
# Disabled because these tests require "shared_preload_libraries=pg_stat_statements",
# which typical installcheck users do not have (e.g. buildfarm clients).
NO_INSTALLCHECK = 1
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
new file mode 100644
index 0000000000..470cdf5a7f
--- /dev/null
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -0,0 +1,432 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(4 rows)
+
+-- Normal scenario, too many simple constants for an IN query
+SET query_id_squash_values = on;
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 /*, ... */) | 1
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 /*, ... */) | 4
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+--------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 /*, ... */) and data = $3 | 3
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- No constants simplification for OpExpr
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+-- In the following two queries the operator expressions (+) and (@) have
+-- different oppno, and will be given different query_id if merged, even though
+-- the normalized query will be the same
+SELECT * FROM test_merge WHERE id IN
+ (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN
+ (@ '-1', @ '-2', @ '-3', @ '-4', @ '-5', @ '-6', @ '-7', @ '-8', @ '-9');
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN +| 1
+ ($1 + $2, $3 + $4, $5 + $6, $7 + $8, $9 + $10, $11 + $12, $13 + $14, $15 + $16, $17 + $18) |
+ SELECT * FROM test_merge WHERE id IN +| 1
+ (@ $1, @ $2, @ $3, @ $4, @ $5, @ $6, @ $7, @ $8, @ $9) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
+
+-- FuncExpr
+-- Verify multiple type representation end up with the same query_id
+CREATE TABLE test_float (data float);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT data FROM test_float WHERE data IN (1, 2);
+ data
+------
+(0 rows)
+
+SELECT data FROM test_float WHERE data IN (1, '2');
+ data
+------
+(0 rows)
+
+SELECT data FROM test_float WHERE data IN ('1', 2);
+ data
+------
+(0 rows)
+
+SELECT data FROM test_float WHERE data IN ('1', '2');
+ data
+------
+(0 rows)
+
+SELECT data FROM test_float WHERE data IN (1.0, 1.0);
+ data
+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-----------------------------------------------------------+-------
+ SELECT data FROM test_float WHERE data IN ($1 /*, ... */) | 5
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Numeric type, implicit cast is merged
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE data IN ($1 /*, ... */) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Bigint, implicit cast is merged
+CREATE TABLE test_merge_bigint (id int, data bigint);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_bigint WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------+-------
+ SELECT * FROM test_merge_bigint WHERE data IN ($1 /*, ... */) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Bigint, explicit cast is not merged
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_bigint WHERE data IN
+ (1::bigint, 2::bigint, 3::bigint, 4::bigint, 5::bigint, 6::bigint,
+ 7::bigint, 8::bigint, 9::bigint, 10::bigint, 11::bigint);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_bigint WHERE data IN +| 1
+ ($1::bigint, $2::bigint, $3::bigint, $4::bigint, $5::bigint, $6::bigint,+|
+ $7::bigint, $8::bigint, $9::bigint, $10::bigint, $11::bigint) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Bigint, long tokens with parenthesis
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_bigint WHERE id IN
+ (abs(100), abs(200), abs(300), abs(400), abs(500), abs(600), abs(700),
+ abs(800), abs(900), abs(1000), ((abs(1100))));
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_bigint WHERE id IN +| 1
+ (abs($1), abs($2), abs($3), abs($4), abs($5), abs($6), abs($7),+|
+ abs($8), abs($9), abs($10), ((abs($11)))) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- CoerceViaIO, SubLink instead of a Const
+CREATE TABLE test_merge_jsonb (id int, data jsonb);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_jsonb WHERE data IN
+ ((SELECT '"1"')::jsonb, (SELECT '"2"')::jsonb, (SELECT '"3"')::jsonb,
+ (SELECT '"4"')::jsonb, (SELECT '"5"')::jsonb, (SELECT '"6"')::jsonb,
+ (SELECT '"7"')::jsonb, (SELECT '"8"')::jsonb, (SELECT '"9"')::jsonb,
+ (SELECT '"10"')::jsonb);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_jsonb WHERE data IN +| 1
+ ((SELECT $1)::jsonb, (SELECT $2)::jsonb, (SELECT $3)::jsonb,+|
+ (SELECT $4)::jsonb, (SELECT $5)::jsonb, (SELECT $6)::jsonb,+|
+ (SELECT $7)::jsonb, (SELECT $8)::jsonb, (SELECT $9)::jsonb,+|
+ (SELECT $10)::jsonb) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- CoerceViaIO
+-- Create some dummy type to force CoerceViaIO
+CREATE TYPE casttesttype;
+CREATE FUNCTION casttesttype_in(cstring)
+ RETURNS casttesttype
+ AS 'textin'
+ LANGUAGE internal STRICT IMMUTABLE;
+NOTICE: return type casttesttype is only a shell
+CREATE FUNCTION casttesttype_out(casttesttype)
+ RETURNS cstring
+ AS 'textout'
+ LANGUAGE internal STRICT IMMUTABLE;
+NOTICE: argument type casttesttype is only a shell
+LINE 1: CREATE FUNCTION casttesttype_out(casttesttype)
+ ^
+CREATE TYPE casttesttype (
+ internallength = variable,
+ input = casttesttype_in,
+ output = casttesttype_out,
+ alignment = int4
+);
+CREATE CAST (int4 AS casttesttype) WITH INOUT;
+CREATE FUNCTION casttesttype_eq(casttesttype, casttesttype)
+returns boolean language sql immutable as $$
+ SELECT true
+$$;
+CREATE OPERATOR = (
+ leftarg = casttesttype,
+ rightarg = casttesttype,
+ procedure = casttesttype_eq,
+ commutator = =);
+CREATE TABLE test_merge_cast (id int, data casttesttype);
+-- Use the introduced type to construct a list of CoerceViaIO around Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_cast WHERE data IN
+ (1::int4::casttesttype, 2::int4::casttesttype, 3::int4::casttesttype,
+ 4::int4::casttesttype, 5::int4::casttesttype, 6::int4::casttesttype,
+ 7::int4::casttesttype, 8::int4::casttesttype, 9::int4::casttesttype,
+ 10::int4::casttesttype, 11::int4::casttesttype);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_merge_cast WHERE data IN +| 1
+ ($1 /*, ... */::int4::casttesttype) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Some casting expression are simplified to Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_jsonb WHERE data IN
+ (('"1"')::jsonb, ('"2"')::jsonb, ('"3"')::jsonb, ('"4"')::jsonb,
+ ( '"5"')::jsonb, ( '"6"')::jsonb, ( '"7"')::jsonb, ( '"8"')::jsonb,
+ ( '"9"')::jsonb, ( '"10"')::jsonb);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_merge_jsonb WHERE data IN +| 1
+ (($1 /*, ... */)::jsonb) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- RelabelType
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 /*, ... */::oid) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Test constants evaluation in a CTE, which was causing issues in the past
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result
+--------
+(0 rows)
+
+-- Simple array would be merged as well
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+ array
+------------------------
+ {1,2,3,4,5,6,7,8,9,10}
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT ARRAY[$1 /*, ... */] | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+RESET query_id_squash_values;
diff --git a/contrib/pg_stat_statements/meson.build b/contrib/pg_stat_statements/meson.build
index 4446af58c5..8a96aff625 100644
--- a/contrib/pg_stat_statements/meson.build
+++ b/contrib/pg_stat_statements/meson.build
@@ -56,6 +56,7 @@ tests += {
'parallel',
'cleanup',
'oldextversions',
+ 'merging',
],
'regress_args': ['--temp-config', files('pg_stat_statements.conf')],
# Disabled because these tests require
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index bebf8134eb..1e94f892e2 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -295,7 +295,6 @@ static bool pgss_track_planning = false; /* whether to track planning
* duration */
static bool pgss_save = true; /* whether to save stats across shutdown */
-
#define pgss_enabled(level) \
(!IsParallelWorker() && \
(pgss_track == PGSS_TRACK_ALL || \
@@ -2809,6 +2808,10 @@ generate_normalized_query(JumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool merged_interval = false; /* Currently processed constants
+ belong to a merged constants
+ interval. */
+
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -2832,7 +2835,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
{
int off, /* Offset from start for cur tok */
tok_len; /* Length (in bytes) of that tok */
-
off = jstate->clocations[i].location;
/* Adjust recorded location if we're dealing with partial string */
off -= query_loc;
@@ -2847,13 +2849,44 @@ generate_normalized_query(JumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ /* Normal path, non merged constant */
+ if (!jstate->clocations[i].merged)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id);
+
+ /* In case previous constants were merged away, stop doing that */
+ merged_interval = false;
+ }
+ else if (!merged_interval)
+ {
+ /*
+ * We are not inside a merged interval yet, which means it is the
+ * the first merged constant.
+ *
+ * A merged constants interval must be represented via two
+ * constants with the merged flag. Currently we are at the first,
+ * verify there is another one.
+ */
+ Assert(i + 1 < jstate->clocations_count);
+ Assert(jstate->clocations[i + 1].merged);
+
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* Remember to skip until a non merged constant appears */
+ merged_interval = true;
+
+ /* Mark the interval in the normalized query */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$1 /*, ... */");
+ }
+ /* Otherwise the constant is merged away, move forward */
quer_loc = off + tok_len;
last_off = off;
last_tok_len = tok_len;
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
new file mode 100644
index 0000000000..a302d3d91f
--- /dev/null
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -0,0 +1,169 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal scenario, too many simple constants for an IN query
+SET query_id_squash_values = on;
+
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- No constants simplification for OpExpr
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+-- In the following two queries the operator expressions (+) and (@) have
+-- different oppno, and will be given different query_id if merged, even though
+-- the normalized query will be the same
+SELECT * FROM test_merge WHERE id IN
+ (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+SELECT * FROM test_merge WHERE id IN
+ (@ '-1', @ '-2', @ '-3', @ '-4', @ '-5', @ '-6', @ '-7', @ '-8', @ '-9');
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- FuncExpr
+
+-- Verify multiple type representation end up with the same query_id
+CREATE TABLE test_float (data float);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT data FROM test_float WHERE data IN (1, 2);
+SELECT data FROM test_float WHERE data IN (1, '2');
+SELECT data FROM test_float WHERE data IN ('1', 2);
+SELECT data FROM test_float WHERE data IN ('1', '2');
+SELECT data FROM test_float WHERE data IN (1.0, 1.0);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Numeric type, implicit cast is merged
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_numeric WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Bigint, implicit cast is merged
+CREATE TABLE test_merge_bigint (id int, data bigint);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_bigint WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Bigint, explicit cast is not merged
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_bigint WHERE data IN
+ (1::bigint, 2::bigint, 3::bigint, 4::bigint, 5::bigint, 6::bigint,
+ 7::bigint, 8::bigint, 9::bigint, 10::bigint, 11::bigint);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Bigint, long tokens with parenthesis
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_bigint WHERE id IN
+ (abs(100), abs(200), abs(300), abs(400), abs(500), abs(600), abs(700),
+ abs(800), abs(900), abs(1000), ((abs(1100))));
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- CoerceViaIO, SubLink instead of a Const
+CREATE TABLE test_merge_jsonb (id int, data jsonb);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_jsonb WHERE data IN
+ ((SELECT '"1"')::jsonb, (SELECT '"2"')::jsonb, (SELECT '"3"')::jsonb,
+ (SELECT '"4"')::jsonb, (SELECT '"5"')::jsonb, (SELECT '"6"')::jsonb,
+ (SELECT '"7"')::jsonb, (SELECT '"8"')::jsonb, (SELECT '"9"')::jsonb,
+ (SELECT '"10"')::jsonb);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- CoerceViaIO
+
+-- Create some dummy type to force CoerceViaIO
+CREATE TYPE casttesttype;
+
+CREATE FUNCTION casttesttype_in(cstring)
+ RETURNS casttesttype
+ AS 'textin'
+ LANGUAGE internal STRICT IMMUTABLE;
+
+CREATE FUNCTION casttesttype_out(casttesttype)
+ RETURNS cstring
+ AS 'textout'
+ LANGUAGE internal STRICT IMMUTABLE;
+
+CREATE TYPE casttesttype (
+ internallength = variable,
+ input = casttesttype_in,
+ output = casttesttype_out,
+ alignment = int4
+);
+
+CREATE CAST (int4 AS casttesttype) WITH INOUT;
+
+CREATE FUNCTION casttesttype_eq(casttesttype, casttesttype)
+returns boolean language sql immutable as $$
+ SELECT true
+$$;
+
+CREATE OPERATOR = (
+ leftarg = casttesttype,
+ rightarg = casttesttype,
+ procedure = casttesttype_eq,
+ commutator = =);
+
+CREATE TABLE test_merge_cast (id int, data casttesttype);
+
+-- Use the introduced type to construct a list of CoerceViaIO around Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_cast WHERE data IN
+ (1::int4::casttesttype, 2::int4::casttesttype, 3::int4::casttesttype,
+ 4::int4::casttesttype, 5::int4::casttesttype, 6::int4::casttesttype,
+ 7::int4::casttesttype, 8::int4::casttesttype, 9::int4::casttesttype,
+ 10::int4::casttesttype, 11::int4::casttesttype);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Some casting expression are simplified to Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_jsonb WHERE data IN
+ (('"1"')::jsonb, ('"2"')::jsonb, ('"3"')::jsonb, ('"4"')::jsonb,
+ ( '"5"')::jsonb, ( '"6"')::jsonb, ( '"7"')::jsonb, ( '"8"')::jsonb,
+ ( '"9"')::jsonb, ( '"10"')::jsonb);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- RelabelType
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test constants evaluation in a CTE, which was causing issues in the past
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+-- Simple array would be merged as well
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+RESET query_id_squash_values;
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 5e4f201e09..ae9597ad33 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8436,6 +8436,33 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
</listitem>
</varlistentry>
+ <varlistentry id="guc-query-id-squash-values" xreflabel="query_id_squash_values">
+ <term><varname>query_id_squash_values</varname> (<type>bool</type>)
+ <indexterm>
+ <primary><varname>query_id_squash_values</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Specifies how an array of constants (e.g. for an <literal>IN</literal>
+ clause) contributes to the query identifier computation. Normally every
+ element of an array contributes to the query identifier, which means the
+ same query will get multiple different identifiers, one for each
+ occurrence with an array of different length.
+
+ If this parameter is on, an array of constants will contribute only the
+ first and the last elements to the query identifier. It means two
+ occurences of the same query, where the only difference is number of
+ constants in the array, are going to get the same query identifier.
+ Such queries are represented in form <literal>'(...)'</literal>.
+
+ The parameter could be used to reduce amount of repeating data stored
+ via <xref linkend="pgstatstatements"/>. The default value is
+ <literal>off</literal>.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry id="guc-log-statement-stats">
<term><varname>log_statement_stats</varname> (<type>boolean</type>)
<indexterm>
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 501b468e9a..9ce4988008 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -621,11 +621,28 @@
<para>
In some cases, queries with visibly different texts might get merged into a
- single <structname>pg_stat_statements</structname> entry. Normally this will happen
- only for semantically equivalent queries, but there is a small chance of
- hash collisions causing unrelated queries to be merged into one entry.
- (This cannot happen for queries belonging to different users or databases,
- however.)
+ single <structname>pg_stat_statements</structname> entry. Normally this
+ will happen only for semantically equivalent queries, or if
+ <varname>query_id_squash_values</varname> is enabled and the only difference
+ between queries is the length of an array with constants they contain:
+
+<screen>
+=# SET query_id_squash_values = on;
+=# SELECT pg_stat_statements_reset();
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
+=# SELECT query, calls FROM pg_stat_statements;
+-[ RECORD 1 ]------------------------------
+query | SELECT * FROM test WHERE a IN (...)
+calls | 2
+-[ RECORD 2 ]------------------------------
+query | SELECT pg_stat_statements_reset()
+calls | 1
+</screen>
+
+ But there is a small chance of hash collisions causing unrelated queries to
+ be merged into one entry. (This cannot happen for queries belonging to
+ different users or databases, however.)
</para>
<para>
@@ -956,6 +973,7 @@
</para>
</listitem>
</varlistentry>
+
</variablelist>
<para>
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 1a657f7e0a..c421664879 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -476,6 +476,7 @@ foreach my $infile (@ARGV)
equal_ignore_if_zero
query_jumble_ignore
query_jumble_location
+ query_jumble_merge
read_write_ignore
write_only_relids
write_only_nondefault_pathtarget
@@ -1283,6 +1284,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
my @a = @{ $node_type_info{$n}->{field_attrs}{$f} };
my $query_jumble_ignore = $struct_no_query_jumble;
my $query_jumble_location = 0;
+ my $query_jumble_merge = 0;
# extract per-field attributes
foreach my $a (@a)
@@ -1295,21 +1297,34 @@ _jumble${n}(JumbleState *jstate, Node *node)
{
$query_jumble_location = 1;
}
+ elsif ($a eq 'query_jumble_merge')
+ {
+ $query_jumble_merge = 1;
+ }
}
# node type
if (($t =~ /^(\w+)\*$/ or $t =~ /^struct\s+(\w+)\*$/)
and elem $1, @node_types)
{
- print $jff "\tJUMBLE_NODE($f);\n"
- unless $query_jumble_ignore;
+ # Merge constants if requested.
+ if ($query_jumble_merge)
+ {
+ print $jff "\tJUMBLE_ELEMENTS($f);\n"
+ unless $query_jumble_ignore;
+ }
+ else
+ {
+ print $jff "\tJUMBLE_NODE($f);\n"
+ unless $query_jumble_ignore;
+ }
}
elsif ($t eq 'ParseLoc')
{
# Track the node's location only if directly requested.
if ($query_jumble_location)
{
- print $jff "\tJUMBLE_LOCATION($f);\n"
+ print $jff "\tJUMBLE_LOCATION($f, false);\n"
unless $query_jumble_ignore;
}
}
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index b103a28193..b7f13dc658 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -32,9 +32,13 @@
*/
#include "postgres.h"
+#include "access/transam.h"
+#include "catalog/pg_proc.h"
#include "common/hashfn.h"
#include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
#include "nodes/queryjumble.h"
+#include "utils/lsyscache.h"
#include "parser/scansup.h"
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
@@ -42,6 +46,9 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+/* Whether to merge constants in a list when computing query_id */
+bool query_id_squash_values = false;
+
/*
* True when compute_query_id is ON or AUTO, and a module requests them.
*
@@ -53,8 +60,10 @@ bool query_id_enabled = false;
static void AppendJumble(JumbleState *jstate,
const unsigned char *item, Size size);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void RecordConstLocation(JumbleState *jstate,
+ int location, bool merged);
static void _jumbleNode(JumbleState *jstate, Node *node);
+static void _jumbleElements(JumbleState *jstate, List *elements);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
static void _jumbleList(JumbleState *jstate, Node *node);
static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
@@ -198,11 +207,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
}
/*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked.
+ *
+ * Merged argument signals that the constant represents the first or the last
+ * element in a series of merged constants, and everything but the first/last
+ * element contributes nothing to the jumble hash.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -218,15 +231,126 @@ RecordConstLocation(JumbleState *jstate, int location)
}
jstate->clocations[jstate->clocations_count].location = location;
/* initialize lengths to -1 to simplify third-party module usage */
+ jstate->clocations[jstate->clocations_count].merged = merged;
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
}
}
+/*
+ * Verify few simple cases where we can deduce that the expression is a
+ * constant:
+ *
+ * - Simplify the expression, if it's wrapped into RelabelType and CoerceViaIO.
+ * - If it's a FuncExpr, check if the function is an immutable builtin
+ * function doing implicit cast with constant arguments.
+ * - Otherwise test if the expression is a simple Const.
+ *
+ * We could also handle some simple OpExpr here as well, but since such queries
+ * will also have opno jumbled, this might lead to a confusing situation where
+ * two different queries end up with the same normalized query but different
+ * query_id.
+ *
+ * The argument known_immutable_funcs contains known function OIDs that were
+ * already proven to be immutable. If the expression to verify is a FuncExpr,
+ * we first check this list, and only if not found, test the function
+ * volatility and store the result back. Since most of the time constants
+ * merging will be dealing with same type of expressions, this avoids
+ * performing func_volatile over and over for the same functions.
+ *
+ * Note that we intentionally do not recurse on the function arguments and only
+ * test them for being Const expression for simplicity.
+ */
+static bool
+IsMergeableConst(Node *element, List **known_immutable_funcs)
+{
+ if (IsA(element, RelabelType))
+ element = (Node *) ((RelabelType *) element)->arg;
+
+ if (IsA(element, CoerceViaIO))
+ element = (Node *) ((CoerceViaIO *) element)->arg;
+
+ if(IsA(element, FuncExpr))
+ {
+ FuncExpr *func = (FuncExpr *) element;
+ ListCell *temp;
+
+ if (func->funcid > FirstGenbkiObjectId)
+ return false;
+
+ if (func->funcformat != COERCE_IMPLICIT_CAST)
+ return false;
+
+ if (!list_member_oid(*known_immutable_funcs, func->funcid))
+ {
+ /* Not found in the cache, verify and add if needed */
+ if(func_volatile(func->funcid) != PROVOLATILE_IMMUTABLE)
+ return false;
+
+ *known_immutable_funcs = lappend_oid(*known_immutable_funcs,
+ func->funcid);
+ }
+
+ foreach(temp, func->args)
+ {
+ Node *arg = lfirst(temp);
+
+ if (!IsA(arg, Const))
+ return false;
+ }
+
+ return true;
+ }
+
+ if (!IsA(element, Const))
+ return false;
+
+ return true;
+}
+
+/*
+ * Verify if the provided list could be merged down, which means it contains
+ * only constant expressions.
+ *
+ * Return value indicates if merging is possible.
+ *
+ * Note that this function searches only for explicit Const nodes and does not
+ * try to simplify expressions.
+ */
+static bool
+IsMergeableConstList(List *elements, Node **firstExpr, Node **lastExpr)
+{
+ ListCell *temp;
+
+ /* To keep track of immutable functions in elements */
+ List *immutable_funcs = NIL;
+
+ if (elements == NIL)
+ return false;
+
+ if (!query_id_squash_values)
+ {
+ /* Merging is disabled, process everything one by one */
+ return false;
+ }
+
+ foreach(temp, elements)
+ {
+ if (!IsMergeableConst(lfirst(temp), &immutable_funcs))
+ return false;
+ }
+ *firstExpr = linitial(elements);
+ *lastExpr = llast(elements);
+
+ return true;
+}
+
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
-#define JUMBLE_LOCATION(location) \
- RecordConstLocation(jstate, expr->location)
+#define JUMBLE_ELEMENTS(list) \
+ _jumbleElements(jstate, (List *) expr->list)
+#define JUMBLE_LOCATION(location, merged) \
+ RecordConstLocation(jstate, expr->location, merged)
#define JUMBLE_FIELD(item) \
AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
#define JUMBLE_FIELD_SINGLE(item) \
@@ -239,6 +363,33 @@ do { \
#include "queryjumblefuncs.funcs.c"
+static void
+_jumbleElements(JumbleState *jstate, List *elements)
+{
+ Node *first, *last;
+ if (IsMergeableConstList(elements, &first, &last))
+ {
+ /*
+ * Both first and last constants have to be recorded. The first one
+ * will indicate the merged interval, the last one will tell us the
+ * length of the interval within the query text.
+ *
+ * Note that for the last exression we actually need not the expression
+ * location (which is the leftmost expression), but where it ends. For
+ * the limited set of supported cases now (implicit coerce via
+ * FuncExpr, Const) it's fine to use exprLocation, but if more complex
+ * composite expressions will be supported, e.g. OpExpr or FuncExpr as
+ * an explicit call, the rightmost expression will be needed.
+ */
+ RecordConstLocation(jstate, exprLocation(first), true);
+ RecordConstLocation(jstate, exprLocation(last), true);
+ }
+ else
+ {
+ _jumbleNode(jstate, (Node *) elements);
+ }
+}
+
static void
_jumbleNode(JumbleState *jstate, Node *node)
{
@@ -375,5 +526,5 @@ _jumbleVariableSetStmt(JumbleState *jstate, Node *node)
if (expr->jumble_args)
JUMBLE_NODE(args);
JUMBLE_FIELD(is_local);
- JUMBLE_LOCATION(location);
+ JUMBLE_LOCATION(location, false);
}
diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c
index a97a1eda6d..052b700390 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -115,6 +115,7 @@ typedef struct
bool redirection_done;
bool IsBinaryUpgrade;
bool query_id_enabled;
+ bool query_id_squash_values;
int max_safe_fds;
int MaxBackends;
int num_pmchild_slots;
@@ -744,6 +745,7 @@ save_backend_variables(BackendParameters *param,
param->redirection_done = redirection_done;
param->IsBinaryUpgrade = IsBinaryUpgrade;
param->query_id_enabled = query_id_enabled;
+ param->query_id_squash_values = query_id_squash_values;
param->max_safe_fds = max_safe_fds;
param->MaxBackends = MaxBackends;
@@ -1004,6 +1006,7 @@ restore_backend_variables(BackendParameters *param)
redirection_done = param->redirection_done;
IsBinaryUpgrade = param->IsBinaryUpgrade;
query_id_enabled = param->query_id_enabled;
+ query_id_squash_values = param->query_id_squash_values;
max_safe_fds = param->max_safe_fds;
MaxBackends = param->MaxBackends;
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 226af43fe2..eca30ba1a8 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -2106,6 +2106,16 @@ struct config_bool ConfigureNamesBool[] =
NULL, NULL, NULL
},
+ {
+ {"query_id_squash_values", PGC_USERSET, STATS_MONITORING,
+ gettext_noop("Allows to merge constants in a list when computing "
+ "query_id."),
+ },
+ &query_id_squash_values,
+ false,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index d472987ed4..15939394bc 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -645,12 +645,12 @@
# - Monitoring -
#compute_query_id = auto
+#query_id_squash_values = off
#log_statement_stats = off
#log_parser_stats = off
#log_planner_stats = off
#log_executor_stats = off
-
#------------------------------------------------------------------------------
# VACUUMING
#------------------------------------------------------------------------------
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 580238bfab..5656302544 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -108,6 +108,9 @@ typedef enum NodeTag
* - query_jumble_location: Mark the field as a location to track. This is
* only allowed for integer fields that include "location" in their name.
*
+ * - query_jumble_merge: Allow to merge the field values for the query
+ * jumbling.
+ *
* - read_as(VALUE): In nodeRead(), replace the field's value with VALUE.
*
* - read_write_ignore: Ignore the field for read/write. This is only allowed
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 839e71d52f..89587d4c10 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1394,7 +1394,7 @@ typedef struct ArrayExpr
/* common type of array elements */
Oid element_typeid pg_node_attr(query_jumble_ignore);
/* the array elements or sub-arrays */
- List *elements;
+ List *elements pg_node_attr(query_jumble_merge);
/* true if elements are sub-arrays */
bool multidims pg_node_attr(query_jumble_ignore);
/* token location, or -1 if unknown */
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 50eb956658..d2f1c1e310 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -23,6 +23,12 @@ typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+
+ /*
+ * Indicates the constant represents the beginning or the end of a merged
+ * constants interval.
+ */
+ bool merged;
} LocationLen;
/*
@@ -62,12 +68,12 @@ enum ComputeQueryIdType
/* GUC parameters */
extern PGDLLIMPORT int compute_query_id;
-
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query);
extern void EnableQueryId(void);
extern PGDLLIMPORT bool query_id_enabled;
+extern PGDLLIMPORT bool query_id_squash_values;
/*
* Returns whether query identifier computation has been enabled, either
base-commit: 773c51dd39ada5f107a3656377a9611ff89132f1
--
2.45.1
Since the merging is a yes/no option (I think there used to be some discussions
about having a threshold or some other fancy modes), maybe you could instead
differentiate the merged version by have 2 constants rather than this "..." or
something like that?Maybe the representation can be "($1 /*, ... */)" so that it's obvious
that the array extends beyond the first element but is still
syntactically valid.
Yeah that works too and it's probably way easier to implement.
+1
Just to throw out an alternate idea using comments. What about
adding a comment at the start to the query "/* query_id_squash_values */"
and keep the parameter symbols as-is. The comment at the start will
indicate that this feature was used.
On Thu, Feb 13, 2025 at 05:08:45PM GMT, Sami Imseih wrote:
Constants passed as parameters to a prepared statement will not be
handled as expected. I did not not test explicit PREPARE/EXECUTE statement,
but I assume it will have the same issue.
This is the same question of supporting various cases. The original
patch implementation handled Param expressions as well, this part was
explicitly rejected during review. I think as a first step it's
important to find a balance between applying this optimization in as
many cases as possible, and at the same time keep the implementation
simple to give the patch a chance. So far I'm inclined to leave Param
for the future work, although of course I'm open to discussion.
I do see the discussion here [1]/messages/by-id/20230211104707.grsicemegr7d3mgh@erthalion.local, sorry for not noticing it.
I am not sure about this though. At minimum this needs to be documented,
However, I think the prepared statement case is too common of a case to
skip for the first release of tis feature, and users that will likely
benefit from this feature are using prepared statements ( i.e. JDBC, etc ).
But, others may disagree.
pg_node_attr of query_jumble_merge is doing something
very specific to the elements list of an ArrayExpr. The
merge code likely cannot be used for other node types.
It can be, take a look at pg_node_attr commentary. Any node can have a
field marked with query_jumble_merge attribut and benefit from merging.
I can't think of other cases beyond ArrayExpr where this will be needed.
The node that could use this will need to carry constants, but ArrayExpr
is the only case I can think of in which this will be useful for jumbling.
There should be a really good reason IMO to do something other than the
existing pattern of using custom_query_jumble.
I scanned through the thread and could not find a discussion on this,
but maybe others have an opinion.
This case with an array passed to aa function seems to cause a regression
in pg_stat_statements query text. As you can see the text is incomplete.
I've already mentioned that in the previous email. To reiterate, it's
not a functionality regression, but an incomplete representation of a
normalized query which turned out to be hard to change. While I'm
working on that, there is a suggestion that it's not a blocker.
It's not a functionality regression as far as query execution
or pg_stat_statements counters go, but it is a regression as far as
displaying query text in pg_stat_statements. pg_stat_statements, unlike
pg_stat_acitivty, makes a guaranteee not to trim text as stated in the docs [2]https://www.postgresql.org/docs/current/pgstatstatements.html
"The representative query texts are kept in an external disk file,
and do not consume shared memory. Therefore,
even very lengthy query texts can be stored successfully."
I don't think any feature that trims text in pg_stat_statements is acceptable,
IMO. Others may disagree.
Regards,
Sami
[1]: /messages/by-id/20230211104707.grsicemegr7d3mgh@erthalion.local
[2]: https://www.postgresql.org/docs/current/pgstatstatements.html
On Fri, Feb 14, 2025 at 05:26:19AM GMT, Sami Imseih wrote:
On Thu, Feb 13, 2025 at 05:08:45PM GMT, Sami Imseih wrote:
Constants passed as parameters to a prepared statement will not be
handled as expected. I did not not test explicit PREPARE/EXECUTE statement,
but I assume it will have the same issue.This is the same question of supporting various cases. The original
patch implementation handled Param expressions as well, this part was
explicitly rejected during review. I think as a first step it's
important to find a balance between applying this optimization in as
many cases as possible, and at the same time keep the implementation
simple to give the patch a chance. So far I'm inclined to leave Param
for the future work, although of course I'm open to discussion.I do see the discussion here [1], sorry for not noticing it.
I am not sure about this though. At minimum this needs to be documented,
However, I think the prepared statement case is too common of a case to
skip for the first release of tis feature, and users that will likely
benefit from this feature are using prepared statements ( i.e. JDBC, etc ).
Right, prepared statements are quite common case. This would be the
first thing I'll take on in the case if this patch will find it's way
into the release. As you can see it's not at all obvious that that will
happen, I estimate chances for that to be higher if moving in smaller
steps.
pg_node_attr of query_jumble_merge is doing something
very specific to the elements list of an ArrayExpr. The
merge code likely cannot be used for other node types.It can be, take a look at pg_node_attr commentary. Any node can have a
field marked with query_jumble_merge attribut and benefit from merging.I can't think of other cases beyond ArrayExpr where this will be needed.
The node that could use this will need to carry constants, but ArrayExpr
is the only case I can think of in which this will be useful for jumbling.
There should be a really good reason IMO to do something other than the
existing pattern of using custom_query_jumble.
Well, there are plenty expression nodes that have lists in them, maybe
more will be added in the future. And as before, the idea of using
pg_node_attr was a resonable suggestion from Michael Paquier on top of
the original design (which indeed used custom jumble function for
ArrayExpr).
It's not a functionality regression as far as query execution
or pg_stat_statements counters go, but it is a regression as far as
displaying query text in pg_stat_statements. pg_stat_statements, unlike
pg_stat_acitivty, makes a guaranteee not to trim text as stated in the docs [2]
"The representative query texts are kept in an external disk file,
and do not consume shared memory. Therefore,
even very lengthy query texts can be stored successfully."
Just to clarify, the part you reference doesn't say anything about
trimming, doesn't it? In fact, the query text stored in
pg_stat_statements might be as well somewhat different from one that was
executed, due to similar queries having the same query_id and differ
only in e.g. parenthesis.
But in any case, you're right that the original thing was a bug. I
didn't realize you're talking about missing chunk of the normalized
query. The issue could be triggered when having multiple merged
intervals withing the same query.
Btw, there was another mistake in the last version introducing
"$1 /*, ... */" format, the constant position has to be of course
calculated as usual.
Attachments:
v26-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/plain; charset=us-asciiDownload
From 0b65b35500906460c061007f7cbb372eeaf9c4ab Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Tue, 3 Dec 2024 14:55:45 +0100
Subject: [PATCH v26] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on the number of parameters, because every element of
ArrayExpr is jumbled. In certain situations it's undesirable, especially
if the list becomes too large.
Make an array of Const expressions contribute only the first/last
elements to the jumble hash. Allow to enable this behavior via the new
pg_stat_statements parameter query_id_squash_values with the default value off.
Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane,
Michael Paquier, Sergei Kornilov, Alvaro Herrera, David Geier, Sutou Kouhei,
Sami Imseih, Julien Rouhaud
Tested-by: Chengxi Sun, Yasuo Honda
---
contrib/pg_stat_statements/Makefile | 2 +-
.../pg_stat_statements/expected/merging.out | 465 ++++++++++++++++++
contrib/pg_stat_statements/meson.build | 1 +
.../pg_stat_statements/pg_stat_statements.c | 56 ++-
contrib/pg_stat_statements/sql/merging.sql | 180 +++++++
doc/src/sgml/config.sgml | 28 ++
doc/src/sgml/pgstatstatements.sgml | 28 +-
src/backend/nodes/gen_node_support.pl | 21 +-
src/backend/nodes/queryjumblefuncs.c | 165 ++++++-
src/backend/postmaster/launch_backend.c | 3 +
src/backend/utils/misc/guc_tables.c | 10 +
src/backend/utils/misc/postgresql.conf.sample | 2 +-
src/include/nodes/nodes.h | 3 +
src/include/nodes/primnodes.h | 2 +-
src/include/nodes/queryjumble.h | 8 +-
15 files changed, 948 insertions(+), 26 deletions(-)
create mode 100644 contrib/pg_stat_statements/expected/merging.out
create mode 100644 contrib/pg_stat_statements/sql/merging.sql
diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile
index 241c02587b..eef8d69cc4 100644
--- a/contrib/pg_stat_statements/Makefile
+++ b/contrib/pg_stat_statements/Makefile
@@ -20,7 +20,7 @@ LDFLAGS_SL += $(filter -lm, $(LIBS))
REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_statements/pg_stat_statements.conf
REGRESS = select dml cursors utility level_tracking planning \
user_activity wal entry_timestamp privileges extended \
- parallel cleanup oldextversions
+ parallel cleanup oldextversions merging
# Disabled because these tests require "shared_preload_libraries=pg_stat_statements",
# which typical installcheck users do not have (e.g. buildfarm clients).
NO_INSTALLCHECK = 1
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
new file mode 100644
index 0000000000..144861f243
--- /dev/null
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -0,0 +1,465 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(4 rows)
+
+-- Normal scenario, too many simple constants for an IN query
+SET query_id_squash_values = on;
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 /*, ... */) | 1
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 /*, ... */) | 4
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) AND data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) AND data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) AND data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+--------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 /*, ... */) AND data = $3 | 3
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Multiple merged intervals
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9)
+ AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
+ AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
+ AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 /*, ... */)+| 3
+ AND data IN ($3 /*, ... */) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- No constants simplification for OpExpr
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+-- In the following two queries the operator expressions (+) and (@) have
+-- different oppno, and will be given different query_id if merged, even though
+-- the normalized query will be the same
+SELECT * FROM test_merge WHERE id IN
+ (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN
+ (@ '-1', @ '-2', @ '-3', @ '-4', @ '-5', @ '-6', @ '-7', @ '-8', @ '-9');
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN +| 1
+ ($1 + $2, $3 + $4, $5 + $6, $7 + $8, $9 + $10, $11 + $12, $13 + $14, $15 + $16, $17 + $18) |
+ SELECT * FROM test_merge WHERE id IN +| 1
+ (@ $1, @ $2, @ $3, @ $4, @ $5, @ $6, @ $7, @ $8, @ $9) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
+
+-- FuncExpr
+-- Verify multiple type representation end up with the same query_id
+CREATE TABLE test_float (data float);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT data FROM test_float WHERE data IN (1, 2);
+ data
+------
+(0 rows)
+
+SELECT data FROM test_float WHERE data IN (1, '2');
+ data
+------
+(0 rows)
+
+SELECT data FROM test_float WHERE data IN ('1', 2);
+ data
+------
+(0 rows)
+
+SELECT data FROM test_float WHERE data IN ('1', '2');
+ data
+------
+(0 rows)
+
+SELECT data FROM test_float WHERE data IN (1.0, 1.0);
+ data
+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-----------------------------------------------------------+-------
+ SELECT data FROM test_float WHERE data IN ($1 /*, ... */) | 5
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Numeric type, implicit cast is merged
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE data IN ($1 /*, ... */) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Bigint, implicit cast is merged
+CREATE TABLE test_merge_bigint (id int, data bigint);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_bigint WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------+-------
+ SELECT * FROM test_merge_bigint WHERE data IN ($1 /*, ... */) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Bigint, explicit cast is not merged
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_bigint WHERE data IN
+ (1::bigint, 2::bigint, 3::bigint, 4::bigint, 5::bigint, 6::bigint,
+ 7::bigint, 8::bigint, 9::bigint, 10::bigint, 11::bigint);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_bigint WHERE data IN +| 1
+ ($1::bigint, $2::bigint, $3::bigint, $4::bigint, $5::bigint, $6::bigint,+|
+ $7::bigint, $8::bigint, $9::bigint, $10::bigint, $11::bigint) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Bigint, long tokens with parenthesis
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_bigint WHERE id IN
+ (abs(100), abs(200), abs(300), abs(400), abs(500), abs(600), abs(700),
+ abs(800), abs(900), abs(1000), ((abs(1100))));
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_bigint WHERE id IN +| 1
+ (abs($1), abs($2), abs($3), abs($4), abs($5), abs($6), abs($7),+|
+ abs($8), abs($9), abs($10), ((abs($11)))) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- CoerceViaIO, SubLink instead of a Const
+CREATE TABLE test_merge_jsonb (id int, data jsonb);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_jsonb WHERE data IN
+ ((SELECT '"1"')::jsonb, (SELECT '"2"')::jsonb, (SELECT '"3"')::jsonb,
+ (SELECT '"4"')::jsonb, (SELECT '"5"')::jsonb, (SELECT '"6"')::jsonb,
+ (SELECT '"7"')::jsonb, (SELECT '"8"')::jsonb, (SELECT '"9"')::jsonb,
+ (SELECT '"10"')::jsonb);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_jsonb WHERE data IN +| 1
+ ((SELECT $1)::jsonb, (SELECT $2)::jsonb, (SELECT $3)::jsonb,+|
+ (SELECT $4)::jsonb, (SELECT $5)::jsonb, (SELECT $6)::jsonb,+|
+ (SELECT $7)::jsonb, (SELECT $8)::jsonb, (SELECT $9)::jsonb,+|
+ (SELECT $10)::jsonb) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- CoerceViaIO
+-- Create some dummy type to force CoerceViaIO
+CREATE TYPE casttesttype;
+CREATE FUNCTION casttesttype_in(cstring)
+ RETURNS casttesttype
+ AS 'textin'
+ LANGUAGE internal STRICT IMMUTABLE;
+NOTICE: return type casttesttype is only a shell
+CREATE FUNCTION casttesttype_out(casttesttype)
+ RETURNS cstring
+ AS 'textout'
+ LANGUAGE internal STRICT IMMUTABLE;
+NOTICE: argument type casttesttype is only a shell
+LINE 1: CREATE FUNCTION casttesttype_out(casttesttype)
+ ^
+CREATE TYPE casttesttype (
+ internallength = variable,
+ input = casttesttype_in,
+ output = casttesttype_out,
+ alignment = int4
+);
+CREATE CAST (int4 AS casttesttype) WITH INOUT;
+CREATE FUNCTION casttesttype_eq(casttesttype, casttesttype)
+returns boolean language sql immutable as $$
+ SELECT true
+$$;
+CREATE OPERATOR = (
+ leftarg = casttesttype,
+ rightarg = casttesttype,
+ procedure = casttesttype_eq,
+ commutator = =);
+CREATE TABLE test_merge_cast (id int, data casttesttype);
+-- Use the introduced type to construct a list of CoerceViaIO around Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_cast WHERE data IN
+ (1::int4::casttesttype, 2::int4::casttesttype, 3::int4::casttesttype,
+ 4::int4::casttesttype, 5::int4::casttesttype, 6::int4::casttesttype,
+ 7::int4::casttesttype, 8::int4::casttesttype, 9::int4::casttesttype,
+ 10::int4::casttesttype, 11::int4::casttesttype);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_merge_cast WHERE data IN +| 1
+ ($1 /*, ... */::int4::casttesttype) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Some casting expression are simplified to Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_jsonb WHERE data IN
+ (('"1"')::jsonb, ('"2"')::jsonb, ('"3"')::jsonb, ('"4"')::jsonb,
+ ( '"5"')::jsonb, ( '"6"')::jsonb, ( '"7"')::jsonb, ( '"8"')::jsonb,
+ ( '"9"')::jsonb, ( '"10"')::jsonb);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_merge_jsonb WHERE data IN +| 1
+ (($1 /*, ... */)::jsonb) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- RelabelType
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 /*, ... */::oid) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Test constants evaluation in a CTE, which was causing issues in the past
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result
+--------
+(0 rows)
+
+-- Simple array would be merged as well
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+ array
+------------------------
+ {1,2,3,4,5,6,7,8,9,10}
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT ARRAY[$1 /*, ... */] | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+RESET query_id_squash_values;
diff --git a/contrib/pg_stat_statements/meson.build b/contrib/pg_stat_statements/meson.build
index 4446af58c5..8a96aff625 100644
--- a/contrib/pg_stat_statements/meson.build
+++ b/contrib/pg_stat_statements/meson.build
@@ -56,6 +56,7 @@ tests += {
'parallel',
'cleanup',
'oldextversions',
+ 'merging',
],
'regress_args': ['--temp-config', files('pg_stat_statements.conf')],
# Disabled because these tests require
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index bebf8134eb..20f5612688 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -295,7 +295,6 @@ static bool pgss_track_planning = false; /* whether to track planning
* duration */
static bool pgss_save = true; /* whether to save stats across shutdown */
-
#define pgss_enabled(level) \
(!IsParallelWorker() && \
(pgss_track == PGSS_TRACK_ALL || \
@@ -2809,6 +2808,10 @@ generate_normalized_query(JumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool merged_interval = false; /* Currently processed constants
+ belong to a merged constants
+ interval. */
+
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -2832,7 +2835,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
{
int off, /* Offset from start for cur tok */
tok_len; /* Length (in bytes) of that tok */
-
off = jstate->clocations[i].location;
/* Adjust recorded location if we're dealing with partial string */
off -= query_loc;
@@ -2847,13 +2849,53 @@ generate_normalized_query(JumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ /* Normal path, non merged constant */
+ if (!jstate->clocations[i].merged)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id);
+
+ /* In case previous constants were merged away, stop doing that */
+ merged_interval = false;
+ }
+ else if (!merged_interval)
+ {
+ /*
+ * We are not inside a merged interval yet, which means it is the
+ * the first merged constant.
+ *
+ * A merged constants interval must be represented via two
+ * constants with the merged flag. Currently we are at the first,
+ * verify there is another one.
+ */
+ Assert(i + 1 < jstate->clocations_count);
+ Assert(jstate->clocations[i + 1].merged);
+
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* Remember to skip until a non merged constant appears */
+ merged_interval = true;
+
+ /* Mark the interval in the normalized query */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d /*, ... */",
+ i + 1 + jstate->highest_extern_param_id);
+ }
+ else
+ {
+ /*
+ * If it's a merged constant during a merged_interval, it has to
+ * close it.
+ */
+ merged_interval = false;
+ }
+ /* Otherwise the constant is merged away, move forward */
quer_loc = off + tok_len;
last_off = off;
last_tok_len = tok_len;
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
new file mode 100644
index 0000000000..282466f9b9
--- /dev/null
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -0,0 +1,180 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal scenario, too many simple constants for an IN query
+SET query_id_squash_values = on;
+
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) AND data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) AND data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) AND data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Multiple merged intervals
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9)
+ AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
+ AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
+ AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- No constants simplification for OpExpr
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+-- In the following two queries the operator expressions (+) and (@) have
+-- different oppno, and will be given different query_id if merged, even though
+-- the normalized query will be the same
+SELECT * FROM test_merge WHERE id IN
+ (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+SELECT * FROM test_merge WHERE id IN
+ (@ '-1', @ '-2', @ '-3', @ '-4', @ '-5', @ '-6', @ '-7', @ '-8', @ '-9');
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- FuncExpr
+
+-- Verify multiple type representation end up with the same query_id
+CREATE TABLE test_float (data float);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT data FROM test_float WHERE data IN (1, 2);
+SELECT data FROM test_float WHERE data IN (1, '2');
+SELECT data FROM test_float WHERE data IN ('1', 2);
+SELECT data FROM test_float WHERE data IN ('1', '2');
+SELECT data FROM test_float WHERE data IN (1.0, 1.0);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Numeric type, implicit cast is merged
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_numeric WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Bigint, implicit cast is merged
+CREATE TABLE test_merge_bigint (id int, data bigint);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_bigint WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Bigint, explicit cast is not merged
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_bigint WHERE data IN
+ (1::bigint, 2::bigint, 3::bigint, 4::bigint, 5::bigint, 6::bigint,
+ 7::bigint, 8::bigint, 9::bigint, 10::bigint, 11::bigint);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Bigint, long tokens with parenthesis
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_bigint WHERE id IN
+ (abs(100), abs(200), abs(300), abs(400), abs(500), abs(600), abs(700),
+ abs(800), abs(900), abs(1000), ((abs(1100))));
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- CoerceViaIO, SubLink instead of a Const
+CREATE TABLE test_merge_jsonb (id int, data jsonb);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_jsonb WHERE data IN
+ ((SELECT '"1"')::jsonb, (SELECT '"2"')::jsonb, (SELECT '"3"')::jsonb,
+ (SELECT '"4"')::jsonb, (SELECT '"5"')::jsonb, (SELECT '"6"')::jsonb,
+ (SELECT '"7"')::jsonb, (SELECT '"8"')::jsonb, (SELECT '"9"')::jsonb,
+ (SELECT '"10"')::jsonb);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- CoerceViaIO
+
+-- Create some dummy type to force CoerceViaIO
+CREATE TYPE casttesttype;
+
+CREATE FUNCTION casttesttype_in(cstring)
+ RETURNS casttesttype
+ AS 'textin'
+ LANGUAGE internal STRICT IMMUTABLE;
+
+CREATE FUNCTION casttesttype_out(casttesttype)
+ RETURNS cstring
+ AS 'textout'
+ LANGUAGE internal STRICT IMMUTABLE;
+
+CREATE TYPE casttesttype (
+ internallength = variable,
+ input = casttesttype_in,
+ output = casttesttype_out,
+ alignment = int4
+);
+
+CREATE CAST (int4 AS casttesttype) WITH INOUT;
+
+CREATE FUNCTION casttesttype_eq(casttesttype, casttesttype)
+returns boolean language sql immutable as $$
+ SELECT true
+$$;
+
+CREATE OPERATOR = (
+ leftarg = casttesttype,
+ rightarg = casttesttype,
+ procedure = casttesttype_eq,
+ commutator = =);
+
+CREATE TABLE test_merge_cast (id int, data casttesttype);
+
+-- Use the introduced type to construct a list of CoerceViaIO around Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_cast WHERE data IN
+ (1::int4::casttesttype, 2::int4::casttesttype, 3::int4::casttesttype,
+ 4::int4::casttesttype, 5::int4::casttesttype, 6::int4::casttesttype,
+ 7::int4::casttesttype, 8::int4::casttesttype, 9::int4::casttesttype,
+ 10::int4::casttesttype, 11::int4::casttesttype);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Some casting expression are simplified to Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_jsonb WHERE data IN
+ (('"1"')::jsonb, ('"2"')::jsonb, ('"3"')::jsonb, ('"4"')::jsonb,
+ ( '"5"')::jsonb, ( '"6"')::jsonb, ( '"7"')::jsonb, ( '"8"')::jsonb,
+ ( '"9"')::jsonb, ( '"10"')::jsonb);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- RelabelType
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test constants evaluation in a CTE, which was causing issues in the past
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+-- Simple array would be merged as well
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+RESET query_id_squash_values;
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 5e4f201e09..4ef10e9cbd 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8436,6 +8436,34 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
</listitem>
</varlistentry>
+ <varlistentry id="guc-query-id-squash-values" xreflabel="query_id_squash_values">
+ <term><varname>query_id_squash_values</varname> (<type>bool</type>)
+ <indexterm>
+ <primary><varname>query_id_squash_values</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Specifies how an array of constants (e.g. for an <literal>IN</literal>
+ clause) contributes to the query identifier computation. Normally every
+ element of an array contributes to the query identifier, which means the
+ same query will get multiple different identifiers, one for each
+ occurrence with an array of different length.
+
+ If this parameter is on, an array of constants will contribute only the
+ first and the last elements to the query identifier. It means two
+ occurences of the same query, where the only difference is number of
+ constants in the array, are going to get the same query identifier.
+ Such queries are represented in form <literal>'($1 /*, ... */)'</literal>.
+
+ The parameter could be used to reduce amount of repeating data stored
+ via <xref linkend="pgstatstatements"/>. Only constants are affected,
+ bind parameters cannot benefit from this functionality. The default
+ value is <literal>off</literal>.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry id="guc-log-statement-stats">
<term><varname>log_statement_stats</varname> (<type>boolean</type>)
<indexterm>
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 501b468e9a..0ef9785854 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -621,11 +621,28 @@
<para>
In some cases, queries with visibly different texts might get merged into a
- single <structname>pg_stat_statements</structname> entry. Normally this will happen
- only for semantically equivalent queries, but there is a small chance of
- hash collisions causing unrelated queries to be merged into one entry.
- (This cannot happen for queries belonging to different users or databases,
- however.)
+ single <structname>pg_stat_statements</structname> entry. Normally this
+ will happen only for semantically equivalent queries, or if
+ <varname>query_id_squash_values</varname> is enabled and the only difference
+ between queries is the length of an array with constants they contain:
+
+<screen>
+=# SET query_id_squash_values = on;
+=# SELECT pg_stat_statements_reset();
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
+=# SELECT query, calls FROM pg_stat_statements;
+-[ RECORD 1 ]------------------------------
+query | SELECT * FROM test WHERE a IN ($1 /*, ... */)
+calls | 2
+-[ RECORD 2 ]------------------------------
+query | SELECT pg_stat_statements_reset()
+calls | 1
+</screen>
+
+ But there is a small chance of hash collisions causing unrelated queries to
+ be merged into one entry. (This cannot happen for queries belonging to
+ different users or databases, however.)
</para>
<para>
@@ -956,6 +973,7 @@
</para>
</listitem>
</varlistentry>
+
</variablelist>
<para>
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 1a657f7e0a..c421664879 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -476,6 +476,7 @@ foreach my $infile (@ARGV)
equal_ignore_if_zero
query_jumble_ignore
query_jumble_location
+ query_jumble_merge
read_write_ignore
write_only_relids
write_only_nondefault_pathtarget
@@ -1283,6 +1284,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
my @a = @{ $node_type_info{$n}->{field_attrs}{$f} };
my $query_jumble_ignore = $struct_no_query_jumble;
my $query_jumble_location = 0;
+ my $query_jumble_merge = 0;
# extract per-field attributes
foreach my $a (@a)
@@ -1295,21 +1297,34 @@ _jumble${n}(JumbleState *jstate, Node *node)
{
$query_jumble_location = 1;
}
+ elsif ($a eq 'query_jumble_merge')
+ {
+ $query_jumble_merge = 1;
+ }
}
# node type
if (($t =~ /^(\w+)\*$/ or $t =~ /^struct\s+(\w+)\*$/)
and elem $1, @node_types)
{
- print $jff "\tJUMBLE_NODE($f);\n"
- unless $query_jumble_ignore;
+ # Merge constants if requested.
+ if ($query_jumble_merge)
+ {
+ print $jff "\tJUMBLE_ELEMENTS($f);\n"
+ unless $query_jumble_ignore;
+ }
+ else
+ {
+ print $jff "\tJUMBLE_NODE($f);\n"
+ unless $query_jumble_ignore;
+ }
}
elsif ($t eq 'ParseLoc')
{
# Track the node's location only if directly requested.
if ($query_jumble_location)
{
- print $jff "\tJUMBLE_LOCATION($f);\n"
+ print $jff "\tJUMBLE_LOCATION($f, false);\n"
unless $query_jumble_ignore;
}
}
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index b103a28193..b7f13dc658 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -32,9 +32,13 @@
*/
#include "postgres.h"
+#include "access/transam.h"
+#include "catalog/pg_proc.h"
#include "common/hashfn.h"
#include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
#include "nodes/queryjumble.h"
+#include "utils/lsyscache.h"
#include "parser/scansup.h"
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
@@ -42,6 +46,9 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+/* Whether to merge constants in a list when computing query_id */
+bool query_id_squash_values = false;
+
/*
* True when compute_query_id is ON or AUTO, and a module requests them.
*
@@ -53,8 +60,10 @@ bool query_id_enabled = false;
static void AppendJumble(JumbleState *jstate,
const unsigned char *item, Size size);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void RecordConstLocation(JumbleState *jstate,
+ int location, bool merged);
static void _jumbleNode(JumbleState *jstate, Node *node);
+static void _jumbleElements(JumbleState *jstate, List *elements);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
static void _jumbleList(JumbleState *jstate, Node *node);
static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
@@ -198,11 +207,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
}
/*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked.
+ *
+ * Merged argument signals that the constant represents the first or the last
+ * element in a series of merged constants, and everything but the first/last
+ * element contributes nothing to the jumble hash.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -218,15 +231,126 @@ RecordConstLocation(JumbleState *jstate, int location)
}
jstate->clocations[jstate->clocations_count].location = location;
/* initialize lengths to -1 to simplify third-party module usage */
+ jstate->clocations[jstate->clocations_count].merged = merged;
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
}
}
+/*
+ * Verify few simple cases where we can deduce that the expression is a
+ * constant:
+ *
+ * - Simplify the expression, if it's wrapped into RelabelType and CoerceViaIO.
+ * - If it's a FuncExpr, check if the function is an immutable builtin
+ * function doing implicit cast with constant arguments.
+ * - Otherwise test if the expression is a simple Const.
+ *
+ * We could also handle some simple OpExpr here as well, but since such queries
+ * will also have opno jumbled, this might lead to a confusing situation where
+ * two different queries end up with the same normalized query but different
+ * query_id.
+ *
+ * The argument known_immutable_funcs contains known function OIDs that were
+ * already proven to be immutable. If the expression to verify is a FuncExpr,
+ * we first check this list, and only if not found, test the function
+ * volatility and store the result back. Since most of the time constants
+ * merging will be dealing with same type of expressions, this avoids
+ * performing func_volatile over and over for the same functions.
+ *
+ * Note that we intentionally do not recurse on the function arguments and only
+ * test them for being Const expression for simplicity.
+ */
+static bool
+IsMergeableConst(Node *element, List **known_immutable_funcs)
+{
+ if (IsA(element, RelabelType))
+ element = (Node *) ((RelabelType *) element)->arg;
+
+ if (IsA(element, CoerceViaIO))
+ element = (Node *) ((CoerceViaIO *) element)->arg;
+
+ if(IsA(element, FuncExpr))
+ {
+ FuncExpr *func = (FuncExpr *) element;
+ ListCell *temp;
+
+ if (func->funcid > FirstGenbkiObjectId)
+ return false;
+
+ if (func->funcformat != COERCE_IMPLICIT_CAST)
+ return false;
+
+ if (!list_member_oid(*known_immutable_funcs, func->funcid))
+ {
+ /* Not found in the cache, verify and add if needed */
+ if(func_volatile(func->funcid) != PROVOLATILE_IMMUTABLE)
+ return false;
+
+ *known_immutable_funcs = lappend_oid(*known_immutable_funcs,
+ func->funcid);
+ }
+
+ foreach(temp, func->args)
+ {
+ Node *arg = lfirst(temp);
+
+ if (!IsA(arg, Const))
+ return false;
+ }
+
+ return true;
+ }
+
+ if (!IsA(element, Const))
+ return false;
+
+ return true;
+}
+
+/*
+ * Verify if the provided list could be merged down, which means it contains
+ * only constant expressions.
+ *
+ * Return value indicates if merging is possible.
+ *
+ * Note that this function searches only for explicit Const nodes and does not
+ * try to simplify expressions.
+ */
+static bool
+IsMergeableConstList(List *elements, Node **firstExpr, Node **lastExpr)
+{
+ ListCell *temp;
+
+ /* To keep track of immutable functions in elements */
+ List *immutable_funcs = NIL;
+
+ if (elements == NIL)
+ return false;
+
+ if (!query_id_squash_values)
+ {
+ /* Merging is disabled, process everything one by one */
+ return false;
+ }
+
+ foreach(temp, elements)
+ {
+ if (!IsMergeableConst(lfirst(temp), &immutable_funcs))
+ return false;
+ }
+ *firstExpr = linitial(elements);
+ *lastExpr = llast(elements);
+
+ return true;
+}
+
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
-#define JUMBLE_LOCATION(location) \
- RecordConstLocation(jstate, expr->location)
+#define JUMBLE_ELEMENTS(list) \
+ _jumbleElements(jstate, (List *) expr->list)
+#define JUMBLE_LOCATION(location, merged) \
+ RecordConstLocation(jstate, expr->location, merged)
#define JUMBLE_FIELD(item) \
AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
#define JUMBLE_FIELD_SINGLE(item) \
@@ -239,6 +363,33 @@ do { \
#include "queryjumblefuncs.funcs.c"
+static void
+_jumbleElements(JumbleState *jstate, List *elements)
+{
+ Node *first, *last;
+ if (IsMergeableConstList(elements, &first, &last))
+ {
+ /*
+ * Both first and last constants have to be recorded. The first one
+ * will indicate the merged interval, the last one will tell us the
+ * length of the interval within the query text.
+ *
+ * Note that for the last exression we actually need not the expression
+ * location (which is the leftmost expression), but where it ends. For
+ * the limited set of supported cases now (implicit coerce via
+ * FuncExpr, Const) it's fine to use exprLocation, but if more complex
+ * composite expressions will be supported, e.g. OpExpr or FuncExpr as
+ * an explicit call, the rightmost expression will be needed.
+ */
+ RecordConstLocation(jstate, exprLocation(first), true);
+ RecordConstLocation(jstate, exprLocation(last), true);
+ }
+ else
+ {
+ _jumbleNode(jstate, (Node *) elements);
+ }
+}
+
static void
_jumbleNode(JumbleState *jstate, Node *node)
{
@@ -375,5 +526,5 @@ _jumbleVariableSetStmt(JumbleState *jstate, Node *node)
if (expr->jumble_args)
JUMBLE_NODE(args);
JUMBLE_FIELD(is_local);
- JUMBLE_LOCATION(location);
+ JUMBLE_LOCATION(location, false);
}
diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c
index a97a1eda6d..052b700390 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -115,6 +115,7 @@ typedef struct
bool redirection_done;
bool IsBinaryUpgrade;
bool query_id_enabled;
+ bool query_id_squash_values;
int max_safe_fds;
int MaxBackends;
int num_pmchild_slots;
@@ -744,6 +745,7 @@ save_backend_variables(BackendParameters *param,
param->redirection_done = redirection_done;
param->IsBinaryUpgrade = IsBinaryUpgrade;
param->query_id_enabled = query_id_enabled;
+ param->query_id_squash_values = query_id_squash_values;
param->max_safe_fds = max_safe_fds;
param->MaxBackends = MaxBackends;
@@ -1004,6 +1006,7 @@ restore_backend_variables(BackendParameters *param)
redirection_done = param->redirection_done;
IsBinaryUpgrade = param->IsBinaryUpgrade;
query_id_enabled = param->query_id_enabled;
+ query_id_squash_values = param->query_id_squash_values;
max_safe_fds = param->max_safe_fds;
MaxBackends = param->MaxBackends;
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 226af43fe2..eca30ba1a8 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -2106,6 +2106,16 @@ struct config_bool ConfigureNamesBool[] =
NULL, NULL, NULL
},
+ {
+ {"query_id_squash_values", PGC_USERSET, STATS_MONITORING,
+ gettext_noop("Allows to merge constants in a list when computing "
+ "query_id."),
+ },
+ &query_id_squash_values,
+ false,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index d472987ed4..15939394bc 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -645,12 +645,12 @@
# - Monitoring -
#compute_query_id = auto
+#query_id_squash_values = off
#log_statement_stats = off
#log_parser_stats = off
#log_planner_stats = off
#log_executor_stats = off
-
#------------------------------------------------------------------------------
# VACUUMING
#------------------------------------------------------------------------------
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 580238bfab..5656302544 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -108,6 +108,9 @@ typedef enum NodeTag
* - query_jumble_location: Mark the field as a location to track. This is
* only allowed for integer fields that include "location" in their name.
*
+ * - query_jumble_merge: Allow to merge the field values for the query
+ * jumbling.
+ *
* - read_as(VALUE): In nodeRead(), replace the field's value with VALUE.
*
* - read_write_ignore: Ignore the field for read/write. This is only allowed
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 839e71d52f..89587d4c10 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1394,7 +1394,7 @@ typedef struct ArrayExpr
/* common type of array elements */
Oid element_typeid pg_node_attr(query_jumble_ignore);
/* the array elements or sub-arrays */
- List *elements;
+ List *elements pg_node_attr(query_jumble_merge);
/* true if elements are sub-arrays */
bool multidims pg_node_attr(query_jumble_ignore);
/* token location, or -1 if unknown */
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 50eb956658..d2f1c1e310 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -23,6 +23,12 @@ typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+
+ /*
+ * Indicates the constant represents the beginning or the end of a merged
+ * constants interval.
+ */
+ bool merged;
} LocationLen;
/*
@@ -62,12 +68,12 @@ enum ComputeQueryIdType
/* GUC parameters */
extern PGDLLIMPORT int compute_query_id;
-
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query);
extern void EnableQueryId(void);
extern PGDLLIMPORT bool query_id_enabled;
+extern PGDLLIMPORT bool query_id_squash_values;
/*
* Returns whether query identifier computation has been enabled, either
base-commit: 773c51dd39ada5f107a3656377a9611ff89132f1
--
2.45.1
On Fri, Feb 14, 2025 at 03:20:24PM +0100, Dmitry Dolgov wrote:
Btw, there was another mistake in the last version introducing
"$1 /*, ... */" format, the constant position has to be of course
calculated as usual.
I'm not sure what you mean here, but just in case:
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) AND data = 2; + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) AND data = 2; + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) AND data = 2; + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +--------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1 /*, ... */) AND data = $3 | 3 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows)
There seems to be an off-by-1 error in parameter numbering when merging them.
Note that the query text as-is can still be successfully be used in an EXPLAIN
(GENERIC_PLAN), but it might cause problem to third party tools that try to do
something smarter about the parameters.
On Fri, Feb 14, 2025 at 10:39:45PM GMT, Julien Rouhaud wrote:
There seems to be an off-by-1 error in parameter numbering when merging them.
There are indeed three constants, but the second is not visible in the
query text. Maybe makes sense to adjust the number in this case, let me
try.
Note that the query text as-is can still be successfully be used in an EXPLAIN
(GENERIC_PLAN), but it might cause problem to third party tools that try to do
something smarter about the parameters.
Since the normalized query will be a valid one now, I hope that such
cases will be rare. On top of that it always will be option to not
enable constants squashing and avoid any troubles. Or do you have some
particular scenario of what might be problematic?
I do see the discussion here [1], sorry for not noticing it.
I am not sure about this though. At minimum this needs to be documented,
However, I think the prepared statement case is too common of a case to
skip for the first release of tis feature, and users that will likely
benefit from this feature are using prepared statements ( i.e. JDBC, etc ).Right, prepared statements are quite common case. This would be the
first thing I'll take on in the case if this patch will find it's way
into the release. As you can see it's not at all obvious that that will
happen, I estimate chances for that to be higher if moving in smaller
steps.
I think it will be sad to not include this very common case from
the start, because it is going to be one of the most common
cases.
Wouldn't doing something like this inside IsMergeableConst
"""
if (!IsA(arg, Const) && !IsA(arg, Param))
"""
instead of
"""
if (!IsA(arg, Const))
"""
be sufficient?
I can't think of other cases beyond ArrayExpr where this will be needed.
The node that could use this will need to carry constants, but ArrayExpr
is the only case I can think of in which this will be useful for jumbling.
There should be a really good reason IMO to do something other than the
existing pattern of using custom_query_jumble.Well, there are plenty expression nodes that have lists in them, maybe
more will be added in the future. And as before, the idea of using
pg_node_attr was a resonable suggestion from Michael Paquier on top of
the original design (which indeed used custom jumble function for
ArrayExpr).
OK. I don't necessarily agree with this, but it's been discussed [1]/messages/by-id/ZTmuCtymIS3n3fP_@paquier.xyz and
I will not push this point any further.
It's not a functionality regression as far as query execution
or pg_stat_statements counters go, but it is a regression as far as
displaying query text in pg_stat_statements. pg_stat_statements, unlike
pg_stat_acitivty, makes a guaranteee not to trim text as stated in the docs [2]
"The representative query texts are kept in an external disk file,
and do not consume shared memory. Therefore,
even very lengthy query texts can be stored successfully."Just to clarify, the part you reference doesn't say anything about
trimming, doesn't it? In fact, the query text stored in
pg_stat_statements might be as well somewhat different from one that was
executed, due to similar queries having the same query_id and differ
only in e.g. parenthesis.
I perhap meant "missing chunk" instead of "trimming". To me it just
looked like a trimmed text, which was wrong. Looks like v25
deals with that better at least. I am just not sure about all that we are doing
here as I believe it may open up big changes for bugs generating the normalized
query texts. I'm a bit worried about that. IMO, we are better off just
adding a comment
at the start of a query that this query text such as "/*
query_id_squash_values */"
and keeping all the parameter symbols in-place.
[1]: /messages/by-id/ZTmuCtymIS3n3fP_@paquier.xyz
--
Sami
On Fri, Feb 14, 2025 at 03:56:32PM +0100, Dmitry Dolgov wrote:
On Fri, Feb 14, 2025 at 10:39:45PM GMT, Julien Rouhaud wrote:
There seems to be an off-by-1 error in parameter numbering when merging them.There are indeed three constants, but the second is not visible in the
query text. Maybe makes sense to adjust the number in this case, let me
try.
Thanks!
Note that the query text as-is can still be successfully be used in an EXPLAIN
(GENERIC_PLAN), but it might cause problem to third party tools that try to do
something smarter about the parameters.Since the normalized query will be a valid one now, I hope that such
cases will be rare. On top of that it always will be option to not
enable constants squashing and avoid any troubles.
It might not always be an option. I have seen application that create
thousands of duplicated queryids just because they have a non deterministic
amount of parameters they put in such IN () clauses. If that leads to a total
number of unique (dbid, userid, queryid, toplevel) too big for a reasonable
pg_stat_statements.max, they the only choice might be to enable the new merging
parameter or deactivating pg_stat_statements.
Or do you have some
particular scenario of what might be problematic?
I don't have a very specific scenario. It's mostly for things like trying to
"un-jumble" a query, you may need to loop through the parameters and a missing
number could be problematic. But since the overall number of parameters might
change from execution to execution that's probably the least of the problems to
deal with with this merging feature.
On Fri, Feb 14, 2025 at 11:12:25PM GMT, Julien Rouhaud wrote:
There seems to be an off-by-1 error in parameter numbering when merging them.
There are indeed three constants, but the second is not visible in the
query text. Maybe makes sense to adjust the number in this case, let me
try.
This should do it. The last patch for today, otherwise I'll probably add
more bugs than features :)
Attachments:
v27-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/plain; charset=us-asciiDownload
From 122cac8eda36af877ac471322f681aa6ff46d61b Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Tue, 3 Dec 2024 14:55:45 +0100
Subject: [PATCH v27] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on the number of parameters, because every element of
ArrayExpr is jumbled. In certain situations it's undesirable, especially
if the list becomes too large.
Make an array of Const expressions contribute only the first/last
elements to the jumble hash. Allow to enable this behavior via the new
pg_stat_statements parameter query_id_squash_values with the default value off.
Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane,
Michael Paquier, Sergei Kornilov, Alvaro Herrera, David Geier, Sutou Kouhei,
Sami Imseih, Julien Rouhaud
Tested-by: Chengxi Sun, Yasuo Honda
---
contrib/pg_stat_statements/Makefile | 2 +-
.../pg_stat_statements/expected/merging.out | 465 ++++++++++++++++++
contrib/pg_stat_statements/meson.build | 1 +
.../pg_stat_statements/pg_stat_statements.c | 63 ++-
contrib/pg_stat_statements/sql/merging.sql | 180 +++++++
doc/src/sgml/config.sgml | 28 ++
doc/src/sgml/pgstatstatements.sgml | 28 +-
src/backend/nodes/gen_node_support.pl | 21 +-
src/backend/nodes/queryjumblefuncs.c | 166 ++++++-
src/backend/postmaster/launch_backend.c | 3 +
src/backend/utils/misc/guc_tables.c | 10 +
src/backend/utils/misc/postgresql.conf.sample | 2 +-
src/include/nodes/nodes.h | 3 +
src/include/nodes/primnodes.h | 2 +-
src/include/nodes/queryjumble.h | 8 +-
15 files changed, 956 insertions(+), 26 deletions(-)
create mode 100644 contrib/pg_stat_statements/expected/merging.out
create mode 100644 contrib/pg_stat_statements/sql/merging.sql
diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile
index 241c02587b..eef8d69cc4 100644
--- a/contrib/pg_stat_statements/Makefile
+++ b/contrib/pg_stat_statements/Makefile
@@ -20,7 +20,7 @@ LDFLAGS_SL += $(filter -lm, $(LIBS))
REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_statements/pg_stat_statements.conf
REGRESS = select dml cursors utility level_tracking planning \
user_activity wal entry_timestamp privileges extended \
- parallel cleanup oldextversions
+ parallel cleanup oldextversions merging
# Disabled because these tests require "shared_preload_libraries=pg_stat_statements",
# which typical installcheck users do not have (e.g. buildfarm clients).
NO_INSTALLCHECK = 1
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
new file mode 100644
index 0000000000..ecf0a66a6b
--- /dev/null
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -0,0 +1,465 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(4 rows)
+
+-- Normal scenario, too many simple constants for an IN query
+SET query_id_squash_values = on;
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 /*, ... */) | 1
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 /*, ... */) | 4
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) AND data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) AND data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) AND data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+--------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 /*, ... */) AND data = $2 | 3
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Multiple merged intervals
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9)
+ AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
+ AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
+ AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 /*, ... */)+| 3
+ AND data IN ($2 /*, ... */) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- No constants simplification for OpExpr
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+-- In the following two queries the operator expressions (+) and (@) have
+-- different oppno, and will be given different query_id if merged, even though
+-- the normalized query will be the same
+SELECT * FROM test_merge WHERE id IN
+ (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN
+ (@ '-1', @ '-2', @ '-3', @ '-4', @ '-5', @ '-6', @ '-7', @ '-8', @ '-9');
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN +| 1
+ ($1 + $2, $3 + $4, $5 + $6, $7 + $8, $9 + $10, $11 + $12, $13 + $14, $15 + $16, $17 + $18) |
+ SELECT * FROM test_merge WHERE id IN +| 1
+ (@ $1, @ $2, @ $3, @ $4, @ $5, @ $6, @ $7, @ $8, @ $9) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
+
+-- FuncExpr
+-- Verify multiple type representation end up with the same query_id
+CREATE TABLE test_float (data float);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT data FROM test_float WHERE data IN (1, 2);
+ data
+------
+(0 rows)
+
+SELECT data FROM test_float WHERE data IN (1, '2');
+ data
+------
+(0 rows)
+
+SELECT data FROM test_float WHERE data IN ('1', 2);
+ data
+------
+(0 rows)
+
+SELECT data FROM test_float WHERE data IN ('1', '2');
+ data
+------
+(0 rows)
+
+SELECT data FROM test_float WHERE data IN (1.0, 1.0);
+ data
+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-----------------------------------------------------------+-------
+ SELECT data FROM test_float WHERE data IN ($1 /*, ... */) | 5
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Numeric type, implicit cast is merged
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE data IN ($1 /*, ... */) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Bigint, implicit cast is merged
+CREATE TABLE test_merge_bigint (id int, data bigint);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_bigint WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------+-------
+ SELECT * FROM test_merge_bigint WHERE data IN ($1 /*, ... */) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Bigint, explicit cast is not merged
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_bigint WHERE data IN
+ (1::bigint, 2::bigint, 3::bigint, 4::bigint, 5::bigint, 6::bigint,
+ 7::bigint, 8::bigint, 9::bigint, 10::bigint, 11::bigint);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_bigint WHERE data IN +| 1
+ ($1::bigint, $2::bigint, $3::bigint, $4::bigint, $5::bigint, $6::bigint,+|
+ $7::bigint, $8::bigint, $9::bigint, $10::bigint, $11::bigint) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Bigint, long tokens with parenthesis
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_bigint WHERE id IN
+ (abs(100), abs(200), abs(300), abs(400), abs(500), abs(600), abs(700),
+ abs(800), abs(900), abs(1000), ((abs(1100))));
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_bigint WHERE id IN +| 1
+ (abs($1), abs($2), abs($3), abs($4), abs($5), abs($6), abs($7),+|
+ abs($8), abs($9), abs($10), ((abs($11)))) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- CoerceViaIO, SubLink instead of a Const
+CREATE TABLE test_merge_jsonb (id int, data jsonb);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_jsonb WHERE data IN
+ ((SELECT '"1"')::jsonb, (SELECT '"2"')::jsonb, (SELECT '"3"')::jsonb,
+ (SELECT '"4"')::jsonb, (SELECT '"5"')::jsonb, (SELECT '"6"')::jsonb,
+ (SELECT '"7"')::jsonb, (SELECT '"8"')::jsonb, (SELECT '"9"')::jsonb,
+ (SELECT '"10"')::jsonb);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_jsonb WHERE data IN +| 1
+ ((SELECT $1)::jsonb, (SELECT $2)::jsonb, (SELECT $3)::jsonb,+|
+ (SELECT $4)::jsonb, (SELECT $5)::jsonb, (SELECT $6)::jsonb,+|
+ (SELECT $7)::jsonb, (SELECT $8)::jsonb, (SELECT $9)::jsonb,+|
+ (SELECT $10)::jsonb) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- CoerceViaIO
+-- Create some dummy type to force CoerceViaIO
+CREATE TYPE casttesttype;
+CREATE FUNCTION casttesttype_in(cstring)
+ RETURNS casttesttype
+ AS 'textin'
+ LANGUAGE internal STRICT IMMUTABLE;
+NOTICE: return type casttesttype is only a shell
+CREATE FUNCTION casttesttype_out(casttesttype)
+ RETURNS cstring
+ AS 'textout'
+ LANGUAGE internal STRICT IMMUTABLE;
+NOTICE: argument type casttesttype is only a shell
+LINE 1: CREATE FUNCTION casttesttype_out(casttesttype)
+ ^
+CREATE TYPE casttesttype (
+ internallength = variable,
+ input = casttesttype_in,
+ output = casttesttype_out,
+ alignment = int4
+);
+CREATE CAST (int4 AS casttesttype) WITH INOUT;
+CREATE FUNCTION casttesttype_eq(casttesttype, casttesttype)
+returns boolean language sql immutable as $$
+ SELECT true
+$$;
+CREATE OPERATOR = (
+ leftarg = casttesttype,
+ rightarg = casttesttype,
+ procedure = casttesttype_eq,
+ commutator = =);
+CREATE TABLE test_merge_cast (id int, data casttesttype);
+-- Use the introduced type to construct a list of CoerceViaIO around Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_cast WHERE data IN
+ (1::int4::casttesttype, 2::int4::casttesttype, 3::int4::casttesttype,
+ 4::int4::casttesttype, 5::int4::casttesttype, 6::int4::casttesttype,
+ 7::int4::casttesttype, 8::int4::casttesttype, 9::int4::casttesttype,
+ 10::int4::casttesttype, 11::int4::casttesttype);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_merge_cast WHERE data IN +| 1
+ ($1 /*, ... */::int4::casttesttype) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Some casting expression are simplified to Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_jsonb WHERE data IN
+ (('"1"')::jsonb, ('"2"')::jsonb, ('"3"')::jsonb, ('"4"')::jsonb,
+ ( '"5"')::jsonb, ( '"6"')::jsonb, ( '"7"')::jsonb, ( '"8"')::jsonb,
+ ( '"9"')::jsonb, ( '"10"')::jsonb);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_merge_jsonb WHERE data IN +| 1
+ (($1 /*, ... */)::jsonb) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- RelabelType
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 /*, ... */::oid) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Test constants evaluation in a CTE, which was causing issues in the past
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result
+--------
+(0 rows)
+
+-- Simple array would be merged as well
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+ array
+------------------------
+ {1,2,3,4,5,6,7,8,9,10}
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT ARRAY[$1 /*, ... */] | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+RESET query_id_squash_values;
diff --git a/contrib/pg_stat_statements/meson.build b/contrib/pg_stat_statements/meson.build
index 4446af58c5..8a96aff625 100644
--- a/contrib/pg_stat_statements/meson.build
+++ b/contrib/pg_stat_statements/meson.build
@@ -56,6 +56,7 @@ tests += {
'parallel',
'cleanup',
'oldextversions',
+ 'merging',
],
'regress_args': ['--temp-config', files('pg_stat_statements.conf')],
# Disabled because these tests require
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index bebf8134eb..dcc65fc1a6 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -295,7 +295,6 @@ static bool pgss_track_planning = false; /* whether to track planning
* duration */
static bool pgss_save = true; /* whether to save stats across shutdown */
-
#define pgss_enabled(level) \
(!IsParallelWorker() && \
(pgss_track == PGSS_TRACK_ALL || \
@@ -2809,6 +2808,13 @@ generate_normalized_query(JumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool merged_interval = false; /* Currently processed constants
+ belong to a merged constants
+ interval. */
+ int skipped_constants = 0; /* To adjust positions of visible
+ constants in the presense of a merged
+ constanst interval. */
+
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -2832,7 +2838,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
{
int off, /* Offset from start for cur tok */
tok_len; /* Length (in bytes) of that tok */
-
off = jstate->clocations[i].location;
/* Adjust recorded location if we're dealing with partial string */
off -= query_loc;
@@ -2847,13 +2852,57 @@ generate_normalized_query(JumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ /* Normal path, non merged constant */
+ if (!jstate->clocations[i].merged)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id -
+ skipped_constants);
+
+ /* In case previous constants were merged away, stop doing that */
+ merged_interval = false;
+ }
+ else if (!merged_interval)
+ {
+ /*
+ * We are not inside a merged interval yet, which means it is the
+ * the first merged constant.
+ *
+ * A merged constants interval must be represented via two
+ * constants with the merged flag. Currently we are at the first,
+ * verify there is another one.
+ */
+ Assert(i + 1 < jstate->clocations_count);
+ Assert(jstate->clocations[i + 1].merged);
+
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* Remember to skip until a non merged constant appears */
+ merged_interval = true;
+
+ /* Mark the interval in the normalized query */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d /*, ... */",
+ i + 1 + jstate->highest_extern_param_id -
+ skipped_constants);
+
+ skipped_constants++;
+ }
+ else
+ {
+ /*
+ * If it's a merged constant during a merged_interval, it has to
+ * close it.
+ */
+ merged_interval = false;
+ }
+ /* Otherwise the constant is merged away, move forward */
quer_loc = off + tok_len;
last_off = off;
last_tok_len = tok_len;
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
new file mode 100644
index 0000000000..282466f9b9
--- /dev/null
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -0,0 +1,180 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal scenario, too many simple constants for an IN query
+SET query_id_squash_values = on;
+
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) AND data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) AND data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) AND data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Multiple merged intervals
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9)
+ AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
+ AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
+ AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- No constants simplification for OpExpr
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+-- In the following two queries the operator expressions (+) and (@) have
+-- different oppno, and will be given different query_id if merged, even though
+-- the normalized query will be the same
+SELECT * FROM test_merge WHERE id IN
+ (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+SELECT * FROM test_merge WHERE id IN
+ (@ '-1', @ '-2', @ '-3', @ '-4', @ '-5', @ '-6', @ '-7', @ '-8', @ '-9');
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- FuncExpr
+
+-- Verify multiple type representation end up with the same query_id
+CREATE TABLE test_float (data float);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT data FROM test_float WHERE data IN (1, 2);
+SELECT data FROM test_float WHERE data IN (1, '2');
+SELECT data FROM test_float WHERE data IN ('1', 2);
+SELECT data FROM test_float WHERE data IN ('1', '2');
+SELECT data FROM test_float WHERE data IN (1.0, 1.0);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Numeric type, implicit cast is merged
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_numeric WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Bigint, implicit cast is merged
+CREATE TABLE test_merge_bigint (id int, data bigint);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_bigint WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Bigint, explicit cast is not merged
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_bigint WHERE data IN
+ (1::bigint, 2::bigint, 3::bigint, 4::bigint, 5::bigint, 6::bigint,
+ 7::bigint, 8::bigint, 9::bigint, 10::bigint, 11::bigint);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Bigint, long tokens with parenthesis
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_bigint WHERE id IN
+ (abs(100), abs(200), abs(300), abs(400), abs(500), abs(600), abs(700),
+ abs(800), abs(900), abs(1000), ((abs(1100))));
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- CoerceViaIO, SubLink instead of a Const
+CREATE TABLE test_merge_jsonb (id int, data jsonb);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_jsonb WHERE data IN
+ ((SELECT '"1"')::jsonb, (SELECT '"2"')::jsonb, (SELECT '"3"')::jsonb,
+ (SELECT '"4"')::jsonb, (SELECT '"5"')::jsonb, (SELECT '"6"')::jsonb,
+ (SELECT '"7"')::jsonb, (SELECT '"8"')::jsonb, (SELECT '"9"')::jsonb,
+ (SELECT '"10"')::jsonb);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- CoerceViaIO
+
+-- Create some dummy type to force CoerceViaIO
+CREATE TYPE casttesttype;
+
+CREATE FUNCTION casttesttype_in(cstring)
+ RETURNS casttesttype
+ AS 'textin'
+ LANGUAGE internal STRICT IMMUTABLE;
+
+CREATE FUNCTION casttesttype_out(casttesttype)
+ RETURNS cstring
+ AS 'textout'
+ LANGUAGE internal STRICT IMMUTABLE;
+
+CREATE TYPE casttesttype (
+ internallength = variable,
+ input = casttesttype_in,
+ output = casttesttype_out,
+ alignment = int4
+);
+
+CREATE CAST (int4 AS casttesttype) WITH INOUT;
+
+CREATE FUNCTION casttesttype_eq(casttesttype, casttesttype)
+returns boolean language sql immutable as $$
+ SELECT true
+$$;
+
+CREATE OPERATOR = (
+ leftarg = casttesttype,
+ rightarg = casttesttype,
+ procedure = casttesttype_eq,
+ commutator = =);
+
+CREATE TABLE test_merge_cast (id int, data casttesttype);
+
+-- Use the introduced type to construct a list of CoerceViaIO around Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_cast WHERE data IN
+ (1::int4::casttesttype, 2::int4::casttesttype, 3::int4::casttesttype,
+ 4::int4::casttesttype, 5::int4::casttesttype, 6::int4::casttesttype,
+ 7::int4::casttesttype, 8::int4::casttesttype, 9::int4::casttesttype,
+ 10::int4::casttesttype, 11::int4::casttesttype);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Some casting expression are simplified to Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_jsonb WHERE data IN
+ (('"1"')::jsonb, ('"2"')::jsonb, ('"3"')::jsonb, ('"4"')::jsonb,
+ ( '"5"')::jsonb, ( '"6"')::jsonb, ( '"7"')::jsonb, ( '"8"')::jsonb,
+ ( '"9"')::jsonb, ( '"10"')::jsonb);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- RelabelType
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test constants evaluation in a CTE, which was causing issues in the past
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+-- Simple array would be merged as well
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+RESET query_id_squash_values;
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 5e4f201e09..4ef10e9cbd 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8436,6 +8436,34 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
</listitem>
</varlistentry>
+ <varlistentry id="guc-query-id-squash-values" xreflabel="query_id_squash_values">
+ <term><varname>query_id_squash_values</varname> (<type>bool</type>)
+ <indexterm>
+ <primary><varname>query_id_squash_values</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Specifies how an array of constants (e.g. for an <literal>IN</literal>
+ clause) contributes to the query identifier computation. Normally every
+ element of an array contributes to the query identifier, which means the
+ same query will get multiple different identifiers, one for each
+ occurrence with an array of different length.
+
+ If this parameter is on, an array of constants will contribute only the
+ first and the last elements to the query identifier. It means two
+ occurences of the same query, where the only difference is number of
+ constants in the array, are going to get the same query identifier.
+ Such queries are represented in form <literal>'($1 /*, ... */)'</literal>.
+
+ The parameter could be used to reduce amount of repeating data stored
+ via <xref linkend="pgstatstatements"/>. Only constants are affected,
+ bind parameters cannot benefit from this functionality. The default
+ value is <literal>off</literal>.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry id="guc-log-statement-stats">
<term><varname>log_statement_stats</varname> (<type>boolean</type>)
<indexterm>
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 501b468e9a..0ef9785854 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -621,11 +621,28 @@
<para>
In some cases, queries with visibly different texts might get merged into a
- single <structname>pg_stat_statements</structname> entry. Normally this will happen
- only for semantically equivalent queries, but there is a small chance of
- hash collisions causing unrelated queries to be merged into one entry.
- (This cannot happen for queries belonging to different users or databases,
- however.)
+ single <structname>pg_stat_statements</structname> entry. Normally this
+ will happen only for semantically equivalent queries, or if
+ <varname>query_id_squash_values</varname> is enabled and the only difference
+ between queries is the length of an array with constants they contain:
+
+<screen>
+=# SET query_id_squash_values = on;
+=# SELECT pg_stat_statements_reset();
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
+=# SELECT query, calls FROM pg_stat_statements;
+-[ RECORD 1 ]------------------------------
+query | SELECT * FROM test WHERE a IN ($1 /*, ... */)
+calls | 2
+-[ RECORD 2 ]------------------------------
+query | SELECT pg_stat_statements_reset()
+calls | 1
+</screen>
+
+ But there is a small chance of hash collisions causing unrelated queries to
+ be merged into one entry. (This cannot happen for queries belonging to
+ different users or databases, however.)
</para>
<para>
@@ -956,6 +973,7 @@
</para>
</listitem>
</varlistentry>
+
</variablelist>
<para>
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 1a657f7e0a..c421664879 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -476,6 +476,7 @@ foreach my $infile (@ARGV)
equal_ignore_if_zero
query_jumble_ignore
query_jumble_location
+ query_jumble_merge
read_write_ignore
write_only_relids
write_only_nondefault_pathtarget
@@ -1283,6 +1284,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
my @a = @{ $node_type_info{$n}->{field_attrs}{$f} };
my $query_jumble_ignore = $struct_no_query_jumble;
my $query_jumble_location = 0;
+ my $query_jumble_merge = 0;
# extract per-field attributes
foreach my $a (@a)
@@ -1295,21 +1297,34 @@ _jumble${n}(JumbleState *jstate, Node *node)
{
$query_jumble_location = 1;
}
+ elsif ($a eq 'query_jumble_merge')
+ {
+ $query_jumble_merge = 1;
+ }
}
# node type
if (($t =~ /^(\w+)\*$/ or $t =~ /^struct\s+(\w+)\*$/)
and elem $1, @node_types)
{
- print $jff "\tJUMBLE_NODE($f);\n"
- unless $query_jumble_ignore;
+ # Merge constants if requested.
+ if ($query_jumble_merge)
+ {
+ print $jff "\tJUMBLE_ELEMENTS($f);\n"
+ unless $query_jumble_ignore;
+ }
+ else
+ {
+ print $jff "\tJUMBLE_NODE($f);\n"
+ unless $query_jumble_ignore;
+ }
}
elsif ($t eq 'ParseLoc')
{
# Track the node's location only if directly requested.
if ($query_jumble_location)
{
- print $jff "\tJUMBLE_LOCATION($f);\n"
+ print $jff "\tJUMBLE_LOCATION($f, false);\n"
unless $query_jumble_ignore;
}
}
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index b103a28193..c3c5edff32 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -32,9 +32,13 @@
*/
#include "postgres.h"
+#include "access/transam.h"
+#include "catalog/pg_proc.h"
#include "common/hashfn.h"
#include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
#include "nodes/queryjumble.h"
+#include "utils/lsyscache.h"
#include "parser/scansup.h"
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
@@ -42,6 +46,9 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+/* Whether to merge constants in a list when computing query_id */
+bool query_id_squash_values = false;
+
/*
* True when compute_query_id is ON or AUTO, and a module requests them.
*
@@ -53,8 +60,10 @@ bool query_id_enabled = false;
static void AppendJumble(JumbleState *jstate,
const unsigned char *item, Size size);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void RecordConstLocation(JumbleState *jstate,
+ int location, bool merged);
static void _jumbleNode(JumbleState *jstate, Node *node);
+static void _jumbleElements(JumbleState *jstate, List *elements);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
static void _jumbleList(JumbleState *jstate, Node *node);
static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
@@ -198,11 +207,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
}
/*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked.
+ *
+ * Merged argument signals that the constant represents the first or the last
+ * element in a series of merged constants, and everything but the first/last
+ * element contributes nothing to the jumble hash.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -218,15 +231,127 @@ RecordConstLocation(JumbleState *jstate, int location)
}
jstate->clocations[jstate->clocations_count].location = location;
/* initialize lengths to -1 to simplify third-party module usage */
+ jstate->clocations[jstate->clocations_count].merged = merged;
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
}
}
+/*
+ * Verify few simple cases where we can deduce that the expression is a
+ * constant:
+ *
+ * - Simplify the expression, if it's wrapped into RelabelType and CoerceViaIO.
+ * - If it's a FuncExpr, check if the function is an immutable builtin
+ * function doing implicit cast with constant arguments.
+ * - Otherwise test if the expression is a simple Const.
+ *
+ * We could also handle some simple OpExpr here as well, but since such queries
+ * will also have opno jumbled, this might lead to a confusing situation where
+ * two different queries end up with the same normalized query but different
+ * query_id.
+ *
+ * The argument known_immutable_funcs contains known function OIDs that were
+ * already proven to be immutable. If the expression to verify is a FuncExpr,
+ * we first check this list, and only if not found, test the function
+ * volatility and store the result back. Since most of the time constants
+ * merging will be dealing with same type of expressions, this avoids
+ * performing func_volatile over and over for the same functions.
+ *
+ * Note that we intentionally do not recurse on the function arguments and only
+ * test them for being Const expression for simplicity.
+ */
+static bool
+IsMergeableConst(Node *element, List **known_immutable_funcs)
+{
+ if (IsA(element, RelabelType))
+ element = (Node *) ((RelabelType *) element)->arg;
+
+ if (IsA(element, CoerceViaIO))
+ element = (Node *) ((CoerceViaIO *) element)->arg;
+
+ if(IsA(element, FuncExpr))
+ {
+ FuncExpr *func = (FuncExpr *) element;
+ ListCell *temp;
+
+ if (func->funcid > FirstGenbkiObjectId)
+ return false;
+
+ if (func->funcformat != COERCE_IMPLICIT_CAST)
+ return false;
+
+ if (!list_member_oid(*known_immutable_funcs, func->funcid))
+ {
+ /* Not found in the cache, verify and add if needed */
+ if(func_volatile(func->funcid) != PROVOLATILE_IMMUTABLE)
+ return false;
+
+ *known_immutable_funcs = lappend_oid(*known_immutable_funcs,
+ func->funcid);
+ }
+
+ foreach(temp, func->args)
+ {
+ Node *arg = lfirst(temp);
+
+ if (!IsA(arg, Const))
+ return false;
+ }
+
+ return true;
+ }
+
+ if (!IsA(element, Const))
+ return false;
+
+ return true;
+}
+
+/*
+ * Verify if the provided list could be merged down, which means it contains
+ * only constant expressions.
+ *
+ * Return value indicates if merging is possible.
+ *
+ * Note that this function searches only for explicit Const nodes and does not
+ * try to simplify expressions.
+ */
+static bool
+IsMergeableConstList(List *elements, Node **firstExpr, Node **lastExpr)
+{
+ ListCell *temp;
+
+ /* To keep track of immutable functions in elements */
+ List *immutable_funcs = NIL;
+
+ /* A mergeable list needs to contain at least two elements */
+ if (elements == NIL || list_length(elements) < 2)
+ return false;
+
+ if (!query_id_squash_values)
+ {
+ /* Merging is disabled, process everything one by one */
+ return false;
+ }
+
+ foreach(temp, elements)
+ {
+ if (!IsMergeableConst(lfirst(temp), &immutable_funcs))
+ return false;
+ }
+ *firstExpr = linitial(elements);
+ *lastExpr = llast(elements);
+
+ return true;
+}
+
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
-#define JUMBLE_LOCATION(location) \
- RecordConstLocation(jstate, expr->location)
+#define JUMBLE_ELEMENTS(list) \
+ _jumbleElements(jstate, (List *) expr->list)
+#define JUMBLE_LOCATION(location, merged) \
+ RecordConstLocation(jstate, expr->location, merged)
#define JUMBLE_FIELD(item) \
AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
#define JUMBLE_FIELD_SINGLE(item) \
@@ -239,6 +364,33 @@ do { \
#include "queryjumblefuncs.funcs.c"
+static void
+_jumbleElements(JumbleState *jstate, List *elements)
+{
+ Node *first, *last;
+ if (IsMergeableConstList(elements, &first, &last))
+ {
+ /*
+ * Both first and last constants have to be recorded. The first one
+ * will indicate the merged interval, the last one will tell us the
+ * length of the interval within the query text.
+ *
+ * Note that for the last exression we actually need not the expression
+ * location (which is the leftmost expression), but where it ends. For
+ * the limited set of supported cases now (implicit coerce via
+ * FuncExpr, Const) it's fine to use exprLocation, but if more complex
+ * composite expressions will be supported, e.g. OpExpr or FuncExpr as
+ * an explicit call, the rightmost expression will be needed.
+ */
+ RecordConstLocation(jstate, exprLocation(first), true);
+ RecordConstLocation(jstate, exprLocation(last), true);
+ }
+ else
+ {
+ _jumbleNode(jstate, (Node *) elements);
+ }
+}
+
static void
_jumbleNode(JumbleState *jstate, Node *node)
{
@@ -375,5 +527,5 @@ _jumbleVariableSetStmt(JumbleState *jstate, Node *node)
if (expr->jumble_args)
JUMBLE_NODE(args);
JUMBLE_FIELD(is_local);
- JUMBLE_LOCATION(location);
+ JUMBLE_LOCATION(location, false);
}
diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c
index a97a1eda6d..052b700390 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -115,6 +115,7 @@ typedef struct
bool redirection_done;
bool IsBinaryUpgrade;
bool query_id_enabled;
+ bool query_id_squash_values;
int max_safe_fds;
int MaxBackends;
int num_pmchild_slots;
@@ -744,6 +745,7 @@ save_backend_variables(BackendParameters *param,
param->redirection_done = redirection_done;
param->IsBinaryUpgrade = IsBinaryUpgrade;
param->query_id_enabled = query_id_enabled;
+ param->query_id_squash_values = query_id_squash_values;
param->max_safe_fds = max_safe_fds;
param->MaxBackends = MaxBackends;
@@ -1004,6 +1006,7 @@ restore_backend_variables(BackendParameters *param)
redirection_done = param->redirection_done;
IsBinaryUpgrade = param->IsBinaryUpgrade;
query_id_enabled = param->query_id_enabled;
+ query_id_squash_values = param->query_id_squash_values;
max_safe_fds = param->max_safe_fds;
MaxBackends = param->MaxBackends;
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 226af43fe2..eca30ba1a8 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -2106,6 +2106,16 @@ struct config_bool ConfigureNamesBool[] =
NULL, NULL, NULL
},
+ {
+ {"query_id_squash_values", PGC_USERSET, STATS_MONITORING,
+ gettext_noop("Allows to merge constants in a list when computing "
+ "query_id."),
+ },
+ &query_id_squash_values,
+ false,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index d472987ed4..15939394bc 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -645,12 +645,12 @@
# - Monitoring -
#compute_query_id = auto
+#query_id_squash_values = off
#log_statement_stats = off
#log_parser_stats = off
#log_planner_stats = off
#log_executor_stats = off
-
#------------------------------------------------------------------------------
# VACUUMING
#------------------------------------------------------------------------------
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 580238bfab..5656302544 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -108,6 +108,9 @@ typedef enum NodeTag
* - query_jumble_location: Mark the field as a location to track. This is
* only allowed for integer fields that include "location" in their name.
*
+ * - query_jumble_merge: Allow to merge the field values for the query
+ * jumbling.
+ *
* - read_as(VALUE): In nodeRead(), replace the field's value with VALUE.
*
* - read_write_ignore: Ignore the field for read/write. This is only allowed
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 839e71d52f..89587d4c10 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1394,7 +1394,7 @@ typedef struct ArrayExpr
/* common type of array elements */
Oid element_typeid pg_node_attr(query_jumble_ignore);
/* the array elements or sub-arrays */
- List *elements;
+ List *elements pg_node_attr(query_jumble_merge);
/* true if elements are sub-arrays */
bool multidims pg_node_attr(query_jumble_ignore);
/* token location, or -1 if unknown */
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 50eb956658..d2f1c1e310 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -23,6 +23,12 @@ typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+
+ /*
+ * Indicates the constant represents the beginning or the end of a merged
+ * constants interval.
+ */
+ bool merged;
} LocationLen;
/*
@@ -62,12 +68,12 @@ enum ComputeQueryIdType
/* GUC parameters */
extern PGDLLIMPORT int compute_query_id;
-
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query);
extern void EnableQueryId(void);
extern PGDLLIMPORT bool query_id_enabled;
+extern PGDLLIMPORT bool query_id_squash_values;
/*
* Returns whether query identifier computation has been enabled, either
base-commit: 773c51dd39ada5f107a3656377a9611ff89132f1
--
2.45.1
On Fri, Feb 14, 2025 at 09:06:24AM GMT, Sami Imseih wrote:
I think it will be sad to not include this very common case from
the start, because it is going to be one of the most common
cases.Wouldn't doing something like this inside IsMergeableConst
"""
if (!IsA(arg, Const) && !IsA(arg, Param))
"""instead of
"""
if (!IsA(arg, Const))
"""be sufficient?
That's exactly what the original rejected implementation was doing. I
guess to answer this question fully I need to do some more detailed
investigation, I'm probably not aware about everything at this point.
I perhap meant "missing chunk" instead of "trimming". To me it just
looked like a trimmed text, which was wrong. Looks like v25
deals with that better at least. I am just not sure about all that we are doing
here as I believe it may open up big changes for bugs generating the normalized
query texts. I'm a bit worried about that. IMO, we are better off just
adding a comment
at the start of a query that this query text such as "/*
query_id_squash_values */"
and keeping all the parameter symbols in-place.
I see what you mean, but keeping everything in place is partially
defeating purpose of the patch. The idea is not only to make those
queries to have the same query_id, but also to reduce the size of
queries themselves. E.g. the use case scenario that has triggered the
patch was about queries having dozens of thousands of such constants,
so that the size of them was a burden on its own.
I perhap meant "missing chunk" instead of "trimming". To me it just
looked like a trimmed text, which was wrong. Looks like v25
deals with that better at least. I am just not sure about all that we are doing
here as I believe it may open up big changes for bugs generating the normalized
query texts. I'm a bit worried about that. IMO, we are better off just
adding a comment
at the start of a query that this query text such as "/*
query_id_squash_values */"
and keeping all the parameter symbols in-place.I see what you mean, but keeping everything in place is partially
defeating purpose of the patch. The idea is not only to make those
queries to have the same query_id, but also to reduce the size of
queries themselves. E.g. the use case scenario that has triggered the
patch was about queries having dozens of thousands of such constants,
so that the size of them was a burden on its own.
My experience with this issue is not so much the size of the query text,
but it's the fact that similar queries ( with varying length IN-lists ) being
tracked in different entries, causing high deallocation and heavy
garbage collection. This is besides the overall loss of quality of
the data from pg_stat_statements if there is constant deallocation.
But, with what you are doing with this patch, we will now have
a single tracking entry for similar queries with varying IN-lists and
even if the query text is *large*, it's only a single entry tracking
and we are no longer continuously deallocating and garbage
collecting as frequently.
--
Sami
I perhap meant "missing chunk" instead of "trimming". To me it just
looked like a trimmed text, which was wrong. Looks like v25
deals with that better at least. I am just not sure about all that we are doing
here as I believe it may open up big changes for bugs generating the normalized
query texts. I'm a bit worried about that. IMO, we are better off just
adding a comment
at the start of a query that this query text such as "/*
query_id_squash_values */"
and keeping all the parameter symbols in-place.I see what you mean, but keeping everything in place is partially
defeating purpose of the patch. The idea is not only to make those
queries to have the same query_id, but also to reduce the size of
queries themselves. E.g. the use case scenario that has triggered the
patch was about queries having dozens of thousands of such constants,
so that the size of them was a burden on its own.My experience with this issue is not so much the size of the query text,
but it's the fact that similar queries ( with varying length IN-lists ) being
tracked in different entries, causing high deallocation and heavy
garbage collection. This is besides the overall loss of quality of
the data from pg_stat_statements if there is constant deallocation.But, with what you are doing with this patch, we will now have
a single tracking entry for similar queries with varying IN-lists and
even if the query text is *large*, it's only a single entry tracking
and we are no longer continuously deallocating and garbage
collecting as frequently.
Another point, I think if we want to control the size of the query texts,
that could be something that is maybe useful overall for pg_stat_statements,
not just for IN-list type queries.
--
Sami
Wouldn't doing something like this inside IsMergeableConst
"""
if (!IsA(arg, Const) && !IsA(arg, Param))
"""instead of
"""
if (!IsA(arg, Const))
"""be sufficient?
That's exactly what the original rejected implementation was doing. I
guess to answer this question fully I need to do some more detailed
investigation, I'm probably not aware about everything at this point.
I am not sure which rejected implementation you are referring to
as this is a log thread :). But I will just add my findings ( as I
really wanted to try this out )
on top of your latest v27 here. Maybe this is all we need. Essentially
check for a PARAM_EXTERN
as we are scanning through the elements and only consider those types of args,
and the constants of course.
"""
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -295,6 +295,14 @@ IsMergeableConst(Node *element, List
**known_immutable_funcs)
{
Node *arg = lfirst(temp);
+ if (IsA(arg, Param))
+ {
+ Param *p = (Param *) arg;
+
+ if (p->paramkind == PARAM_EXTERN)
+ return true;
+ }
+
if (!IsA(arg, Const))
return false;
}
@@ -302,6 +310,14 @@ IsMergeableConst(Node *element, List
**known_immutable_funcs)
return true;
}
+ if (IsA(element, Param))
+ {
+ Param *p = (Param *) element;
+
+ if (p->paramkind == PARAM_EXTERN)
+ return true;
+ }
+
if (!IsA(element, Const))
return false;
"""
"""
set query_id_squash_values = on;
select pg_stat_statements_reset();
select where 1 in (1, 2, 3);
select where 1 in (1, 2, 3, 4);
prepare prep(int, int, int) as select where 1 in ($1, $2, $3);
execute prep(1, 2, 3);
deallocate prep;
prepare prep(int, int, int, int) as select where 1 in ($1, $2, $3, $4);
execute prep(1, 2, 3, 4);
deallocate prep;
-- mixed constants and parameters
prepare prep(int, int, int) as select where 1 in ($1, $2, $3, 4);
execute prep(1, 2, 3);
deallocate prep;
prepare prep(int, int, int, int) as select where 1 in ($1, $2, $3, 4, $4);
execute prep(1, 2, 3, 5);
deallocate prep;
select where 1 in ($1, $2, $3) \bind 1 2 3
;
select where 1 in ($1, $2, $3, $4) \bind 1 2 3 4
;
-- mixed constants and parameters
select where 1 in ($1, $2, $3, 4) \bind 1 2 3
;
select where 1 in ($1, $2, $3, 4, $4) \bind 1 2 3 5
;
select query, queryid, calls from pg_stat_statements;
postgres=# select query, queryid, calls from pg_stat_statements;
query | queryid | calls
------------------------------------+----------------------+-------
select pg_stat_statements_reset() | 522241623491678666 | 1
deallocate $1 | -3638851837470664936 | 4
select where $1 in ($2 /*, ... */) | -7657972370536959080 | 10
(3 rows)
"""
---
Sami
This should do it. The last patch for today,
I looked at v27 today and have a few comments.
1/ It looks like the CTE test is missing a check for results.
"""
-- Test constants evaluation in a CTE, which was causing issues in the past
WITH cte AS (
SELECT 'const' as const FROM test_merge
)
SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
FROM cte;
-- Simple array would be merged as well
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
"""
2/ Looking at IsMergeableConst, I am not sure why we care about
things like function volatility, implicit cast or funcid > FirstGenbkiObjectId?
+ if (func->funcid > FirstGenbkiObjectId)
+ return false;
+
+ if (func->funcformat != COERCE_IMPLICIT_CAST)
+ return false;
+
+ if (!list_member_oid(*known_immutable_funcs, func->funcid))
+ {
+ /* Not found in the cache, verify and add if needed */
+ if(func_volatile(func->funcid) != PROVOLATILE_IMMUTABLE)
+ return false;
+
+ *known_immutable_funcs =
lappend_oid(*known_immutable_funcs,
+
func->funcid);
+ }
Shouldn't we just be looking for Constants (or PARAM_EXTERNAL) and if so
record the location, and for all other conditions, simply call _jumbleNode? Why
wouldn't that be enough?
3/ Here, this looks wrong as we could end up traversing an elements list
twice. Once inside IsMergeableConstList and if that call returns false, we end
up traversing through the elements list again in _jumbleNode.
+ Node *first, *last;
+ if (IsMergeableConstList(elements, &first, &last))
+ {
+ /*
+ * Both first and last constants have to be recorded.
The first one
+ * will indicate the merged interval, the last one
will tell us the
+ * length of the interval within the query text.
+ *
+ * Note that for the last expression we actually need
not the expression
+ * location (which is the leftmost expression), but
where it ends. For
+ * the limited set of supported cases now (implicit coerce via
+ * FuncExpr, Const) it's fine to use exprLocation, but
if more complex
+ * composite expressions will be supported, e.g.
OpExpr or FuncExpr as
+ * an explicit call, the rightmost expression will be needed.
+ */
+ RecordConstLocation(jstate, exprLocation(first), true);
+ RecordConstLocation(jstate, exprLocation(last), true);
+ }
+ else
+ {
+ _jumbleNode(jstate, (Node *) elements);
+ }
+}
Regards,
Sami
On Mon, Feb 17, 2025 at 09:51:32AM GMT, Sami Imseih wrote:
This should do it. The last patch for today,
I looked at v27 today and have a few comments.
1/ It looks like the CTE test is missing a check for results.
This test was to catch a crash that was happening in older version of
the patch, so it doesn't have to verify the actual pgss entry.
2/ Looking at IsMergeableConst, I am not sure why we care about
things like function volatility, implicit cast or funcid > FirstGenbkiObjectId?
Function volatility is important to establish how constant is the
result, for now we would like to exclude not immutable functions. The
implicit cast and builtin check are there to limit squashing and exclude
explicit or user-created functions (the second is probably an overkill,
but this could be gradually relatex later). Or are you not sure about
something different?
3/ Here, this looks wrong as we could end up traversing an elements list
twice. Once inside IsMergeableConstList and if that call returns false, we end
up traversing through the elements list again in _jumbleNode.
IsMergeableConstList and _jumbleNode serve different purposes, so it's
probably unwise to try to replace one with another. E.g.
IsMergeableConstList will stop at the first non-constant expression, so
it's not a full traversal.
This test was to catch a crash that was happening in older version of
the patch, so it doesn't have to verify the actual pgss entry.
It seems odd to keep this test because of crash behavior experienced
in a previous version of the patch. if the crash reason was understood
and resolved, why keep it?
2/ Looking at IsMergeableConst, I am not sure why we care about
things like function volatility, implicit cast or funcid > FirstGenbkiObjectId?Function volatility is important to establish how constant is the
result, for now we would like to exclude not immutable functions. The
implicit cast and builtin check are there to limit squashing and exclude
explicit or user-created functions (the second is probably an overkill,
but this could be gradually relatex later). Or are you not sure about
something different?
My thoughts are when dealing with FuncExpr, if the first arg in the list of
func->args is a Const, shouldn't that be enough to tell us that we have
a mergeable value. If it's not a Const, it may be another FuncExpr, so
that tells us we don't have a mergeable list. Why would this not be enough?
See the attached 0001-experiement-on-top-of-v27.patch
which applies on top of v27 and produces the results like below.
postgres=# explain verbose select from test_merge where id in (1, 2,
3, 4, 5, 6::bigint);
QUERY PLAN
-------------------------------------------------------------------
Seq Scan on public.test_merge (cost=0.00..49.55 rows=68 width=0)
Filter: (test_merge.id = ANY ('{1,2,3,4,5,6}'::bigint[]))
Query Identifier: 9190277587190463639
(3 rows)
postgres=# explain verbose select from test_merge where id in (1, 2,
3, 4, 5, 6::bigint, 7);
QUERY PLAN
-------------------------------------------------------------------
Seq Scan on public.test_merge (cost=0.00..52.38 rows=79 width=0)
Filter: (test_merge.id = ANY ('{1,2,3,4,5,6,7}'::bigint[]))
Query Identifier: 9190277587190463639
(3 rows)
postgres=# explain verbose select from test_merge where id in (1, 2,
3, 4, 5, 6::bigint, 7, testf5(1));
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------
Seq Scan on public.test_merge (cost=0.00..625.85 rows=90 width=0)
Filter: (test_merge.id = ANY (ARRAY['1'::bigint, '2'::bigint,
'3'::bigint, '4'::bigint, '5'::bigint, '6'::bigint, '7'::bigint,
(testf5(1))::bigint]))
Query Identifier: 4874022288496461916
(3 rows)
--
Sami
Attachments:
0001-experiement-on-top-of-v27.patchapplication/octet-stream; name=0001-experiement-on-top-of-v27.patchDownload
From 405911ce6e0a2e1de1b138e2ec5f24adaf52272d Mon Sep 17 00:00:00 2001
From: "Sami Imseih (AWS)"
<simseih@dev-dsk-simseih-1d-3940b79e.us-east-1.amazon.com>
Date: Mon, 17 Feb 2025 19:47:26 +0000
Subject: [PATCH 1/1] experiement on top of v27
---
src/backend/nodes/queryjumblefuncs.c | 40 +++++++++-------------------
1 file changed, 12 insertions(+), 28 deletions(-)
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index c3c5edff32..0c03bf0dbf 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -262,7 +262,7 @@ RecordConstLocation(JumbleState *jstate, int location, bool merged)
* test them for being Const expression for simplicity.
*/
static bool
-IsMergeableConst(Node *element, List **known_immutable_funcs)
+IsMergeableConst(Node *element)
{
if (IsA(element, RelabelType))
element = (Node *) ((RelabelType *) element)->arg;
@@ -275,37 +275,24 @@ IsMergeableConst(Node *element, List **known_immutable_funcs)
FuncExpr *func = (FuncExpr *) element;
ListCell *temp;
- if (func->funcid > FirstGenbkiObjectId)
- return false;
-
- if (func->funcformat != COERCE_IMPLICIT_CAST)
- return false;
-
- if (!list_member_oid(*known_immutable_funcs, func->funcid))
- {
- /* Not found in the cache, verify and add if needed */
- if(func_volatile(func->funcid) != PROVOLATILE_IMMUTABLE)
- return false;
-
- *known_immutable_funcs = lappend_oid(*known_immutable_funcs,
- func->funcid);
- }
-
foreach(temp, func->args)
{
Node *arg = lfirst(temp);
- if (!IsA(arg, Const))
- return false;
+ if (IsA(arg, Const))
+ return true;
}
-
+ } else if (IsA(element, Const))
return true;
- }
+ else if (IsA(element, Param))
+ {
+ Param *p = (Param *) element;
- if (!IsA(element, Const))
- return false;
+ if (p->paramkind == PARAM_EXTERN)
+ return true;
+ }
- return true;
+ return false;
}
/*
@@ -322,9 +309,6 @@ IsMergeableConstList(List *elements, Node **firstExpr, Node **lastExpr)
{
ListCell *temp;
- /* To keep track of immutable functions in elements */
- List *immutable_funcs = NIL;
-
/* A mergeable list needs to contain at least two elements */
if (elements == NIL || list_length(elements) < 2)
return false;
@@ -337,7 +321,7 @@ IsMergeableConstList(List *elements, Node **firstExpr, Node **lastExpr)
foreach(temp, elements)
{
- if (!IsMergeableConst(lfirst(temp), &immutable_funcs))
+ if (!IsMergeableConst(lfirst(temp)))
return false;
}
*firstExpr = linitial(elements);
--
2.47.1
On Mon, Feb 17, 2025 at 01:50:00PM GMT, Sami Imseih wrote:
This test was to catch a crash that was happening in older version of
the patch, so it doesn't have to verify the actual pgss entry.It seems odd to keep this test because of crash behavior experienced
in a previous version of the patch. if the crash reason was understood
and resolved, why keep it?
A preventive measure. As you could notice, the patch has long history,
and certain issues could be accidentally reintroduced.
2/ Looking at IsMergeableConst, I am not sure why we care about
things like function volatility, implicit cast or funcid > FirstGenbkiObjectId?Function volatility is important to establish how constant is the
result, for now we would like to exclude not immutable functions. The
implicit cast and builtin check are there to limit squashing and exclude
explicit or user-created functions (the second is probably an overkill,
but this could be gradually relatex later). Or are you not sure about
something different?My thoughts are when dealing with FuncExpr, if the first arg in the list of
func->args is a Const, shouldn't that be enough to tell us that we have
a mergeable value. If it's not a Const, it may be another FuncExpr, so
that tells us we don't have a mergeable list. Why would this not be enough?
It's not a question about whether it's possible to implement this, but
about whether it makes sense. In case of plain constants it's
straightforward -- they will not change anything meaningfully and hence
could be squashed from the query. Now for a function, that might return
different values for the same set of constant arguments, it's much less
obvious and omitting such expressions might have unexpected consequences.
See the attached 0001-experiement-on-top-of-v27.patch
which applies on top of v27 and produces the results like below.
Btw, if you would like to share a code delta, please do not post it as a
patch or diff. This hijacks the CI pipeline, because CFbot thinks that's
a new version of the original patch.
This test was to catch a crash that was happening in older version of
the patch, so it doesn't have to verify the actual pgss entry.It seems odd to keep this test because of crash behavior experienced
in a previous version of the patch. if the crash reason was understood
and resolved, why keep it?A preventive measure. As you could notice, the patch has long history,
and certain issues could be accidentally reintroduced.
This test on its own is valuable to test that we don't merge Var's, so if we
keep it we should at least have such a test with verification.
2/ Looking at IsMergeableConst, I am not sure why we care about
things like function volatility, implicit cast or funcid > FirstGenbkiObjectId?Function volatility is important to establish how constant is the
result, for now we would like to exclude not immutable functions. The
implicit cast and builtin check are there to limit squashing and exclude
explicit or user-created functions (the second is probably an overkill,
but this could be gradually relatex later). Or are you not sure about
something different?My thoughts are when dealing with FuncExpr, if the first arg in the list of
func->args is a Const, shouldn't that be enough to tell us that we have
a mergeable value. If it's not a Const, it may be another FuncExpr, so
that tells us we don't have a mergeable list. Why would this not be enough?It's not a question about whether it's possible to implement this, but
about whether it makes sense. In case of plain constants it's
straightforward -- they will not change anything meaningfully and hence
could be squashed from the query. Now for a function, that might return
different values for the same set of constant arguments, it's much less
obvious and omitting such expressions might have unexpected consequences.
query jumbling should not care about the behavior of the function. If we
take a regular call to a volatile function, we will generate the same
queryId for
every call regardless of the input to the function. Why does the in-list case
need to care about the volatility of the function?
The way I see it is we need to merge constants that are either simple
or potentially wrapped in a cast.
We can detect functions that are explicitly called ( and potentially
wrapped in a cast),
and we ought to skip merging in that situation.
See the attached 0001-experiement-on-top-of-v27.patch
which applies on top of v27 and produces the results like below.Btw, if you would like to share a code delta, please do not post it as a
patch or diff. This hijacks the CI pipeline, because CFbot thinks that's
a new version of the original patch.
You're right. Sorry about that.
--
Sami
On Tue, Feb 18, 2025 at 08:48:43AM -0600, Sami Imseih wrote:
Btw, if you would like to share a code delta, please do not post it as a
patch or diff. This hijacks the CI pipeline, because CFbot thinks that's
a new version of the original patch.You're right. Sorry about that.
Exchanging patches can be a good idea to show what you're thinking on
a code-basis. If you post something as one or more .txt files, the CF
app should ignore what you send.
--
Michael
On 2025-Feb-18, Sami Imseih wrote:
It's not a question about whether it's possible to implement this,
but about whether it makes sense. In case of plain constants it's
straightforward -- they will not change anything meaningfully and
hence could be squashed from the query. Now for a function, that
might return different values for the same set of constant
arguments, it's much less obvious and omitting such expressions
might have unexpected consequences.query jumbling should not care about the behavior of the function. If
we take a regular call to a volatile function, we will generate the
same queryId for every call regardless of the input to the function.
Why does the in-list case need to care about the volatility of the
function?
I feel quite insecure about this idea TBH. At least with immutable
functions I don't expect the system to behave wildly different than with
actual constants. What non-immutable functions do you have in mind that
would be useful to fold as if they were constants in the IN list in such
a query?
In the meantime, here's v28 which is Dmitry's v27 plus pgindent. No
other changes. Dmitry, were you planning to submit a new version?
--
Álvaro Herrera Breisgau, Deutschland — https://www.EnterpriseDB.com/
"The problem with the future is that it keeps turning into the present"
(Hobbes)
Attachments:
v28-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/x-diff; charset=utf-8Download
From 05e8d7a960ebaf12e1a5798fb1a8941b55771fcc Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Tue, 3 Dec 2024 14:55:45 +0100
Subject: [PATCH v28] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on the number of parameters, because every element of
ArrayExpr is jumbled. In certain situations it's undesirable, especially
if the list becomes too large.
Make an array of Const expressions contribute only the first/last
elements to the jumble hash. Allow to enable this behavior via the new
pg_stat_statements parameter query_id_squash_values with the default value off.
Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane,
Michael Paquier, Sergei Kornilov, Alvaro Herrera, David Geier, Sutou Kouhei,
Sami Imseih, Julien Rouhaud
Tested-by: Chengxi Sun, Yasuo Honda
---
contrib/pg_stat_statements/Makefile | 2 +-
.../pg_stat_statements/expected/merging.out | 465 ++++++++++++++++++
contrib/pg_stat_statements/meson.build | 1 +
.../pg_stat_statements/pg_stat_statements.c | 62 ++-
contrib/pg_stat_statements/sql/merging.sql | 180 +++++++
doc/src/sgml/config.sgml | 28 ++
doc/src/sgml/pgstatstatements.sgml | 28 +-
src/backend/nodes/gen_node_support.pl | 21 +-
src/backend/nodes/queryjumblefuncs.c | 169 ++++++-
src/backend/postmaster/launch_backend.c | 3 +
src/backend/utils/misc/guc_tables.c | 10 +
src/backend/utils/misc/postgresql.conf.sample | 2 +-
src/include/nodes/nodes.h | 3 +
src/include/nodes/primnodes.h | 2 +-
src/include/nodes/queryjumble.h | 8 +-
15 files changed, 959 insertions(+), 25 deletions(-)
create mode 100644 contrib/pg_stat_statements/expected/merging.out
create mode 100644 contrib/pg_stat_statements/sql/merging.sql
diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile
index 241c02587bc..eef8d69cc45 100644
--- a/contrib/pg_stat_statements/Makefile
+++ b/contrib/pg_stat_statements/Makefile
@@ -20,7 +20,7 @@ LDFLAGS_SL += $(filter -lm, $(LIBS))
REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_statements/pg_stat_statements.conf
REGRESS = select dml cursors utility level_tracking planning \
user_activity wal entry_timestamp privileges extended \
- parallel cleanup oldextversions
+ parallel cleanup oldextversions merging
# Disabled because these tests require "shared_preload_libraries=pg_stat_statements",
# which typical installcheck users do not have (e.g. buildfarm clients).
NO_INSTALLCHECK = 1
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
new file mode 100644
index 00000000000..ecf0a66a6b6
--- /dev/null
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -0,0 +1,465 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(4 rows)
+
+-- Normal scenario, too many simple constants for an IN query
+SET query_id_squash_values = on;
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 /*, ... */) | 1
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 /*, ... */) | 4
+ SELECT * FROM test_merge WHERE id IN ($1) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) AND data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) AND data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) AND data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+--------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 /*, ... */) AND data = $2 | 3
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Multiple merged intervals
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9)
+ AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
+ AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
+ AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 /*, ... */)+| 3
+ AND data IN ($2 /*, ... */) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- No constants simplification for OpExpr
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+-- In the following two queries the operator expressions (+) and (@) have
+-- different oppno, and will be given different query_id if merged, even though
+-- the normalized query will be the same
+SELECT * FROM test_merge WHERE id IN
+ (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN
+ (@ '-1', @ '-2', @ '-3', @ '-4', @ '-5', @ '-6', @ '-7', @ '-8', @ '-9');
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN +| 1
+ ($1 + $2, $3 + $4, $5 + $6, $7 + $8, $9 + $10, $11 + $12, $13 + $14, $15 + $16, $17 + $18) |
+ SELECT * FROM test_merge WHERE id IN +| 1
+ (@ $1, @ $2, @ $3, @ $4, @ $5, @ $6, @ $7, @ $8, @ $9) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
+
+-- FuncExpr
+-- Verify multiple type representation end up with the same query_id
+CREATE TABLE test_float (data float);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT data FROM test_float WHERE data IN (1, 2);
+ data
+------
+(0 rows)
+
+SELECT data FROM test_float WHERE data IN (1, '2');
+ data
+------
+(0 rows)
+
+SELECT data FROM test_float WHERE data IN ('1', 2);
+ data
+------
+(0 rows)
+
+SELECT data FROM test_float WHERE data IN ('1', '2');
+ data
+------
+(0 rows)
+
+SELECT data FROM test_float WHERE data IN (1.0, 1.0);
+ data
+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-----------------------------------------------------------+-------
+ SELECT data FROM test_float WHERE data IN ($1 /*, ... */) | 5
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Numeric type, implicit cast is merged
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE data IN ($1 /*, ... */) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Bigint, implicit cast is merged
+CREATE TABLE test_merge_bigint (id int, data bigint);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_bigint WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------+-------
+ SELECT * FROM test_merge_bigint WHERE data IN ($1 /*, ... */) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Bigint, explicit cast is not merged
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_bigint WHERE data IN
+ (1::bigint, 2::bigint, 3::bigint, 4::bigint, 5::bigint, 6::bigint,
+ 7::bigint, 8::bigint, 9::bigint, 10::bigint, 11::bigint);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_bigint WHERE data IN +| 1
+ ($1::bigint, $2::bigint, $3::bigint, $4::bigint, $5::bigint, $6::bigint,+|
+ $7::bigint, $8::bigint, $9::bigint, $10::bigint, $11::bigint) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Bigint, long tokens with parenthesis
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_bigint WHERE id IN
+ (abs(100), abs(200), abs(300), abs(400), abs(500), abs(600), abs(700),
+ abs(800), abs(900), abs(1000), ((abs(1100))));
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_bigint WHERE id IN +| 1
+ (abs($1), abs($2), abs($3), abs($4), abs($5), abs($6), abs($7),+|
+ abs($8), abs($9), abs($10), ((abs($11)))) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- CoerceViaIO, SubLink instead of a Const
+CREATE TABLE test_merge_jsonb (id int, data jsonb);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_jsonb WHERE data IN
+ ((SELECT '"1"')::jsonb, (SELECT '"2"')::jsonb, (SELECT '"3"')::jsonb,
+ (SELECT '"4"')::jsonb, (SELECT '"5"')::jsonb, (SELECT '"6"')::jsonb,
+ (SELECT '"7"')::jsonb, (SELECT '"8"')::jsonb, (SELECT '"9"')::jsonb,
+ (SELECT '"10"')::jsonb);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_jsonb WHERE data IN +| 1
+ ((SELECT $1)::jsonb, (SELECT $2)::jsonb, (SELECT $3)::jsonb,+|
+ (SELECT $4)::jsonb, (SELECT $5)::jsonb, (SELECT $6)::jsonb,+|
+ (SELECT $7)::jsonb, (SELECT $8)::jsonb, (SELECT $9)::jsonb,+|
+ (SELECT $10)::jsonb) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- CoerceViaIO
+-- Create some dummy type to force CoerceViaIO
+CREATE TYPE casttesttype;
+CREATE FUNCTION casttesttype_in(cstring)
+ RETURNS casttesttype
+ AS 'textin'
+ LANGUAGE internal STRICT IMMUTABLE;
+NOTICE: return type casttesttype is only a shell
+CREATE FUNCTION casttesttype_out(casttesttype)
+ RETURNS cstring
+ AS 'textout'
+ LANGUAGE internal STRICT IMMUTABLE;
+NOTICE: argument type casttesttype is only a shell
+LINE 1: CREATE FUNCTION casttesttype_out(casttesttype)
+ ^
+CREATE TYPE casttesttype (
+ internallength = variable,
+ input = casttesttype_in,
+ output = casttesttype_out,
+ alignment = int4
+);
+CREATE CAST (int4 AS casttesttype) WITH INOUT;
+CREATE FUNCTION casttesttype_eq(casttesttype, casttesttype)
+returns boolean language sql immutable as $$
+ SELECT true
+$$;
+CREATE OPERATOR = (
+ leftarg = casttesttype,
+ rightarg = casttesttype,
+ procedure = casttesttype_eq,
+ commutator = =);
+CREATE TABLE test_merge_cast (id int, data casttesttype);
+-- Use the introduced type to construct a list of CoerceViaIO around Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_cast WHERE data IN
+ (1::int4::casttesttype, 2::int4::casttesttype, 3::int4::casttesttype,
+ 4::int4::casttesttype, 5::int4::casttesttype, 6::int4::casttesttype,
+ 7::int4::casttesttype, 8::int4::casttesttype, 9::int4::casttesttype,
+ 10::int4::casttesttype, 11::int4::casttesttype);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_merge_cast WHERE data IN +| 1
+ ($1 /*, ... */::int4::casttesttype) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Some casting expression are simplified to Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_jsonb WHERE data IN
+ (('"1"')::jsonb, ('"2"')::jsonb, ('"3"')::jsonb, ('"4"')::jsonb,
+ ( '"5"')::jsonb, ( '"6"')::jsonb, ( '"7"')::jsonb, ( '"8"')::jsonb,
+ ( '"9"')::jsonb, ( '"10"')::jsonb);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_merge_jsonb WHERE data IN +| 1
+ (($1 /*, ... */)::jsonb) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- RelabelType
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 /*, ... */::oid) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Test constants evaluation in a CTE, which was causing issues in the past
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result
+--------
+(0 rows)
+
+-- Simple array would be merged as well
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+ array
+------------------------
+ {1,2,3,4,5,6,7,8,9,10}
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT ARRAY[$1 /*, ... */] | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+RESET query_id_squash_values;
diff --git a/contrib/pg_stat_statements/meson.build b/contrib/pg_stat_statements/meson.build
index 4446af58c58..8a96aff625b 100644
--- a/contrib/pg_stat_statements/meson.build
+++ b/contrib/pg_stat_statements/meson.build
@@ -56,6 +56,7 @@ tests += {
'parallel',
'cleanup',
'oldextversions',
+ 'merging',
],
'regress_args': ['--temp-config', files('pg_stat_statements.conf')],
# Disabled because these tests require
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index b245d04097d..bebf0e8f1d2 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -296,7 +296,6 @@ static bool pgss_track_planning = false; /* whether to track planning
* duration */
static bool pgss_save = true; /* whether to save stats across shutdown */
-
#define pgss_enabled(level) \
(!IsParallelWorker() && \
(pgss_track == PGSS_TRACK_ALL || \
@@ -2823,6 +2822,13 @@ generate_normalized_query(JumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool merged_interval = false; /* Currently processed constants
+ * belong to a merged constants
+ * interval. */
+ int skipped_constants = 0; /* To adjust positions of visible
+ * constants in the presense of a
+ * merged constanst interval. */
+
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -2861,13 +2867,57 @@ generate_normalized_query(JumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ /* Normal path, non merged constant */
+ if (!jstate->clocations[i].merged)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id -
+ skipped_constants);
+
+ /* In case previous constants were merged away, stop doing that */
+ merged_interval = false;
+ }
+ else if (!merged_interval)
+ {
+ /*
+ * We are not inside a merged interval yet, which means it is the
+ * the first merged constant.
+ *
+ * A merged constants interval must be represented via two
+ * constants with the merged flag. Currently we are at the first,
+ * verify there is another one.
+ */
+ Assert(i + 1 < jstate->clocations_count);
+ Assert(jstate->clocations[i + 1].merged);
+
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* Remember to skip until a non merged constant appears */
+ merged_interval = true;
+
+ /* Mark the interval in the normalized query */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d /*, ... */",
+ i + 1 + jstate->highest_extern_param_id -
+ skipped_constants);
+
+ skipped_constants++;
+ }
+ else
+ {
+ /*
+ * If it's a merged constant during a merged_interval, it has to
+ * close it.
+ */
+ merged_interval = false;
+ }
+
+ /* Otherwise the constant is merged away, move forward */
quer_loc = off + tok_len;
last_off = off;
last_tok_len = tok_len;
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
new file mode 100644
index 00000000000..282466f9b9a
--- /dev/null
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -0,0 +1,180 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal scenario, too many simple constants for an IN query
+SET query_id_squash_values = on;
+
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) AND data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) AND data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) AND data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Multiple merged intervals
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9)
+ AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
+ AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
+ AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- No constants simplification for OpExpr
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+-- In the following two queries the operator expressions (+) and (@) have
+-- different oppno, and will be given different query_id if merged, even though
+-- the normalized query will be the same
+SELECT * FROM test_merge WHERE id IN
+ (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+SELECT * FROM test_merge WHERE id IN
+ (@ '-1', @ '-2', @ '-3', @ '-4', @ '-5', @ '-6', @ '-7', @ '-8', @ '-9');
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- FuncExpr
+
+-- Verify multiple type representation end up with the same query_id
+CREATE TABLE test_float (data float);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT data FROM test_float WHERE data IN (1, 2);
+SELECT data FROM test_float WHERE data IN (1, '2');
+SELECT data FROM test_float WHERE data IN ('1', 2);
+SELECT data FROM test_float WHERE data IN ('1', '2');
+SELECT data FROM test_float WHERE data IN (1.0, 1.0);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Numeric type, implicit cast is merged
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_numeric WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Bigint, implicit cast is merged
+CREATE TABLE test_merge_bigint (id int, data bigint);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_bigint WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Bigint, explicit cast is not merged
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_bigint WHERE data IN
+ (1::bigint, 2::bigint, 3::bigint, 4::bigint, 5::bigint, 6::bigint,
+ 7::bigint, 8::bigint, 9::bigint, 10::bigint, 11::bigint);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Bigint, long tokens with parenthesis
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_bigint WHERE id IN
+ (abs(100), abs(200), abs(300), abs(400), abs(500), abs(600), abs(700),
+ abs(800), abs(900), abs(1000), ((abs(1100))));
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- CoerceViaIO, SubLink instead of a Const
+CREATE TABLE test_merge_jsonb (id int, data jsonb);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_jsonb WHERE data IN
+ ((SELECT '"1"')::jsonb, (SELECT '"2"')::jsonb, (SELECT '"3"')::jsonb,
+ (SELECT '"4"')::jsonb, (SELECT '"5"')::jsonb, (SELECT '"6"')::jsonb,
+ (SELECT '"7"')::jsonb, (SELECT '"8"')::jsonb, (SELECT '"9"')::jsonb,
+ (SELECT '"10"')::jsonb);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- CoerceViaIO
+
+-- Create some dummy type to force CoerceViaIO
+CREATE TYPE casttesttype;
+
+CREATE FUNCTION casttesttype_in(cstring)
+ RETURNS casttesttype
+ AS 'textin'
+ LANGUAGE internal STRICT IMMUTABLE;
+
+CREATE FUNCTION casttesttype_out(casttesttype)
+ RETURNS cstring
+ AS 'textout'
+ LANGUAGE internal STRICT IMMUTABLE;
+
+CREATE TYPE casttesttype (
+ internallength = variable,
+ input = casttesttype_in,
+ output = casttesttype_out,
+ alignment = int4
+);
+
+CREATE CAST (int4 AS casttesttype) WITH INOUT;
+
+CREATE FUNCTION casttesttype_eq(casttesttype, casttesttype)
+returns boolean language sql immutable as $$
+ SELECT true
+$$;
+
+CREATE OPERATOR = (
+ leftarg = casttesttype,
+ rightarg = casttesttype,
+ procedure = casttesttype_eq,
+ commutator = =);
+
+CREATE TABLE test_merge_cast (id int, data casttesttype);
+
+-- Use the introduced type to construct a list of CoerceViaIO around Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_cast WHERE data IN
+ (1::int4::casttesttype, 2::int4::casttesttype, 3::int4::casttesttype,
+ 4::int4::casttesttype, 5::int4::casttesttype, 6::int4::casttesttype,
+ 7::int4::casttesttype, 8::int4::casttesttype, 9::int4::casttesttype,
+ 10::int4::casttesttype, 11::int4::casttesttype);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Some casting expression are simplified to Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_jsonb WHERE data IN
+ (('"1"')::jsonb, ('"2"')::jsonb, ('"3"')::jsonb, ('"4"')::jsonb,
+ ( '"5"')::jsonb, ( '"6"')::jsonb, ( '"7"')::jsonb, ( '"8"')::jsonb,
+ ( '"9"')::jsonb, ( '"10"')::jsonb);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- RelabelType
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test constants evaluation in a CTE, which was causing issues in the past
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+-- Simple array would be merged as well
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+RESET query_id_squash_values;
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index e55700f35b8..710c52ba0d8 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8527,6 +8527,34 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
</listitem>
</varlistentry>
+ <varlistentry id="guc-query-id-squash-values" xreflabel="query_id_squash_values">
+ <term><varname>query_id_squash_values</varname> (<type>bool</type>)
+ <indexterm>
+ <primary><varname>query_id_squash_values</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Specifies how an array of constants (e.g. for an <literal>IN</literal>
+ clause) contributes to the query identifier computation. Normally every
+ element of an array contributes to the query identifier, which means the
+ same query will get multiple different identifiers, one for each
+ occurrence with an array of different length.
+
+ If this parameter is on, an array of constants will contribute only the
+ first and the last elements to the query identifier. It means two
+ occurences of the same query, where the only difference is number of
+ constants in the array, are going to get the same query identifier.
+ Such queries are represented in form <literal>'($1 /*, ... */)'</literal>.
+
+ The parameter could be used to reduce amount of repeating data stored
+ via <xref linkend="pgstatstatements"/>. Only constants are affected,
+ bind parameters cannot benefit from this functionality. The default
+ value is <literal>off</literal>.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry id="guc-log-statement-stats">
<term><varname>log_statement_stats</varname> (<type>boolean</type>)
<indexterm>
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index e2ac1c2d501..f6ddafbd85c 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -630,11 +630,28 @@
<para>
In some cases, queries with visibly different texts might get merged into a
- single <structname>pg_stat_statements</structname> entry. Normally this will happen
- only for semantically equivalent queries, but there is a small chance of
- hash collisions causing unrelated queries to be merged into one entry.
- (This cannot happen for queries belonging to different users or databases,
- however.)
+ single <structname>pg_stat_statements</structname> entry. Normally this
+ will happen only for semantically equivalent queries, or if
+ <varname>query_id_squash_values</varname> is enabled and the only difference
+ between queries is the length of an array with constants they contain:
+
+<screen>
+=# SET query_id_squash_values = on;
+=# SELECT pg_stat_statements_reset();
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
+=# SELECT query, calls FROM pg_stat_statements;
+-[ RECORD 1 ]------------------------------
+query | SELECT * FROM test WHERE a IN ($1 /*, ... */)
+calls | 2
+-[ RECORD 2 ]------------------------------
+query | SELECT pg_stat_statements_reset()
+calls | 1
+</screen>
+
+ But there is a small chance of hash collisions causing unrelated queries to
+ be merged into one entry. (This cannot happen for queries belonging to
+ different users or databases, however.)
</para>
<para>
@@ -965,6 +982,7 @@
</para>
</listitem>
</varlistentry>
+
</variablelist>
<para>
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 1a657f7e0ae..c4216648794 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -476,6 +476,7 @@ foreach my $infile (@ARGV)
equal_ignore_if_zero
query_jumble_ignore
query_jumble_location
+ query_jumble_merge
read_write_ignore
write_only_relids
write_only_nondefault_pathtarget
@@ -1283,6 +1284,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
my @a = @{ $node_type_info{$n}->{field_attrs}{$f} };
my $query_jumble_ignore = $struct_no_query_jumble;
my $query_jumble_location = 0;
+ my $query_jumble_merge = 0;
# extract per-field attributes
foreach my $a (@a)
@@ -1295,21 +1297,34 @@ _jumble${n}(JumbleState *jstate, Node *node)
{
$query_jumble_location = 1;
}
+ elsif ($a eq 'query_jumble_merge')
+ {
+ $query_jumble_merge = 1;
+ }
}
# node type
if (($t =~ /^(\w+)\*$/ or $t =~ /^struct\s+(\w+)\*$/)
and elem $1, @node_types)
{
- print $jff "\tJUMBLE_NODE($f);\n"
- unless $query_jumble_ignore;
+ # Merge constants if requested.
+ if ($query_jumble_merge)
+ {
+ print $jff "\tJUMBLE_ELEMENTS($f);\n"
+ unless $query_jumble_ignore;
+ }
+ else
+ {
+ print $jff "\tJUMBLE_NODE($f);\n"
+ unless $query_jumble_ignore;
+ }
}
elsif ($t eq 'ParseLoc')
{
# Track the node's location only if directly requested.
if ($query_jumble_location)
{
- print $jff "\tJUMBLE_LOCATION($f);\n"
+ print $jff "\tJUMBLE_LOCATION($f, false);\n"
unless $query_jumble_ignore;
}
}
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index b103a281936..f8dd2cfd327 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -32,9 +32,13 @@
*/
#include "postgres.h"
+#include "access/transam.h"
+#include "catalog/pg_proc.h"
#include "common/hashfn.h"
#include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
#include "nodes/queryjumble.h"
+#include "utils/lsyscache.h"
#include "parser/scansup.h"
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
@@ -42,6 +46,9 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+/* Whether to merge constants in a list when computing query_id */
+bool query_id_squash_values = false;
+
/*
* True when compute_query_id is ON or AUTO, and a module requests them.
*
@@ -53,8 +60,10 @@ bool query_id_enabled = false;
static void AppendJumble(JumbleState *jstate,
const unsigned char *item, Size size);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void RecordConstLocation(JumbleState *jstate,
+ int location, bool merged);
static void _jumbleNode(JumbleState *jstate, Node *node);
+static void _jumbleElements(JumbleState *jstate, List *elements);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
static void _jumbleList(JumbleState *jstate, Node *node);
static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
@@ -198,11 +207,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
}
/*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked.
+ *
+ * Merged argument signals that the constant represents the first or the last
+ * element in a series of merged constants, and everything but the first/last
+ * element contributes nothing to the jumble hash.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -218,15 +231,127 @@ RecordConstLocation(JumbleState *jstate, int location)
}
jstate->clocations[jstate->clocations_count].location = location;
/* initialize lengths to -1 to simplify third-party module usage */
+ jstate->clocations[jstate->clocations_count].merged = merged;
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
}
}
+/*
+ * Verify few simple cases where we can deduce that the expression is a
+ * constant:
+ *
+ * - Simplify the expression, if it's wrapped into RelabelType and CoerceViaIO.
+ * - If it's a FuncExpr, check if the function is an immutable builtin
+ * function doing implicit cast with constant arguments.
+ * - Otherwise test if the expression is a simple Const.
+ *
+ * We could also handle some simple OpExpr here as well, but since such queries
+ * will also have opno jumbled, this might lead to a confusing situation where
+ * two different queries end up with the same normalized query but different
+ * query_id.
+ *
+ * The argument known_immutable_funcs contains known function OIDs that were
+ * already proven to be immutable. If the expression to verify is a FuncExpr,
+ * we first check this list, and only if not found, test the function
+ * volatility and store the result back. Since most of the time constants
+ * merging will be dealing with same type of expressions, this avoids
+ * performing func_volatile over and over for the same functions.
+ *
+ * Note that we intentionally do not recurse on the function arguments and only
+ * test them for being Const expression for simplicity.
+ */
+static bool
+IsMergeableConst(Node *element, List **known_immutable_funcs)
+{
+ if (IsA(element, RelabelType))
+ element = (Node *) ((RelabelType *) element)->arg;
+
+ if (IsA(element, CoerceViaIO))
+ element = (Node *) ((CoerceViaIO *) element)->arg;
+
+ if (IsA(element, FuncExpr))
+ {
+ FuncExpr *func = (FuncExpr *) element;
+ ListCell *temp;
+
+ if (func->funcid > FirstGenbkiObjectId)
+ return false;
+
+ if (func->funcformat != COERCE_IMPLICIT_CAST)
+ return false;
+
+ if (!list_member_oid(*known_immutable_funcs, func->funcid))
+ {
+ /* Not found in the cache, verify and add if needed */
+ if (func_volatile(func->funcid) != PROVOLATILE_IMMUTABLE)
+ return false;
+
+ *known_immutable_funcs = lappend_oid(*known_immutable_funcs,
+ func->funcid);
+ }
+
+ foreach(temp, func->args)
+ {
+ Node *arg = lfirst(temp);
+
+ if (!IsA(arg, Const))
+ return false;
+ }
+
+ return true;
+ }
+
+ if (!IsA(element, Const))
+ return false;
+
+ return true;
+}
+
+/*
+ * Verify if the provided list could be merged down, which means it contains
+ * only constant expressions.
+ *
+ * Return value indicates if merging is possible.
+ *
+ * Note that this function searches only for explicit Const nodes and does not
+ * try to simplify expressions.
+ */
+static bool
+IsMergeableConstList(List *elements, Node **firstExpr, Node **lastExpr)
+{
+ ListCell *temp;
+
+ /* To keep track of immutable functions in elements */
+ List *immutable_funcs = NIL;
+
+ /* A mergeable list needs to contain at least two elements */
+ if (elements == NIL || list_length(elements) < 2)
+ return false;
+
+ if (!query_id_squash_values)
+ {
+ /* Merging is disabled, process everything one by one */
+ return false;
+ }
+
+ foreach(temp, elements)
+ {
+ if (!IsMergeableConst(lfirst(temp), &immutable_funcs))
+ return false;
+ }
+ *firstExpr = linitial(elements);
+ *lastExpr = llast(elements);
+
+ return true;
+}
+
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
-#define JUMBLE_LOCATION(location) \
- RecordConstLocation(jstate, expr->location)
+#define JUMBLE_ELEMENTS(list) \
+ _jumbleElements(jstate, (List *) expr->list)
+#define JUMBLE_LOCATION(location, merged) \
+ RecordConstLocation(jstate, expr->location, merged)
#define JUMBLE_FIELD(item) \
AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
#define JUMBLE_FIELD_SINGLE(item) \
@@ -239,6 +364,36 @@ do { \
#include "queryjumblefuncs.funcs.c"
+static void
+_jumbleElements(JumbleState *jstate, List *elements)
+{
+ Node *first,
+ *last;
+
+ if (IsMergeableConstList(elements, &first, &last))
+ {
+ /*
+ * Both first and last constants have to be recorded. The first one
+ * will indicate the merged interval, the last one will tell us the
+ * length of the interval within the query text.
+ *
+ * Note that for the last exression we actually need not the
+ * expression location (which is the leftmost expression), but where
+ * it ends. For the limited set of supported cases now (implicit
+ * coerce via FuncExpr, Const) it's fine to use exprLocation, but if
+ * more complex composite expressions will be supported, e.g. OpExpr
+ * or FuncExpr as an explicit call, the rightmost expression will be
+ * needed.
+ */
+ RecordConstLocation(jstate, exprLocation(first), true);
+ RecordConstLocation(jstate, exprLocation(last), true);
+ }
+ else
+ {
+ _jumbleNode(jstate, (Node *) elements);
+ }
+}
+
static void
_jumbleNode(JumbleState *jstate, Node *node)
{
@@ -375,5 +530,5 @@ _jumbleVariableSetStmt(JumbleState *jstate, Node *node)
if (expr->jumble_args)
JUMBLE_NODE(args);
JUMBLE_FIELD(is_local);
- JUMBLE_LOCATION(location);
+ JUMBLE_LOCATION(location, false);
}
diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c
index 47375e5bfaa..575d592c09f 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -115,6 +115,7 @@ typedef struct
bool redirection_done;
bool IsBinaryUpgrade;
bool query_id_enabled;
+ bool query_id_squash_values;
int max_safe_fds;
int MaxBackends;
int num_pmchild_slots;
@@ -744,6 +745,7 @@ save_backend_variables(BackendParameters *param,
param->redirection_done = redirection_done;
param->IsBinaryUpgrade = IsBinaryUpgrade;
param->query_id_enabled = query_id_enabled;
+ param->query_id_squash_values = query_id_squash_values;
param->max_safe_fds = max_safe_fds;
param->MaxBackends = MaxBackends;
@@ -1004,6 +1006,7 @@ restore_backend_variables(BackendParameters *param)
redirection_done = param->redirection_done;
IsBinaryUpgrade = param->IsBinaryUpgrade;
query_id_enabled = param->query_id_enabled;
+ query_id_squash_values = param->query_id_squash_values;
max_safe_fds = param->max_safe_fds;
MaxBackends = param->MaxBackends;
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index ad25cbb39c5..69f0308d67a 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -2118,6 +2118,16 @@ struct config_bool ConfigureNamesBool[] =
NULL, NULL, NULL
},
+ {
+ {"query_id_squash_values", PGC_USERSET, STATS_MONITORING,
+ gettext_noop("Allows to merge constants in a list when computing "
+ "query_id."),
+ },
+ &query_id_squash_values,
+ false,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 5362ff80519..f2058f31ca9 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -649,12 +649,12 @@
# - Monitoring -
#compute_query_id = auto
+#query_id_squash_values = off
#log_statement_stats = off
#log_parser_stats = off
#log_planner_stats = off
#log_executor_stats = off
-
#------------------------------------------------------------------------------
# VACUUMING
#------------------------------------------------------------------------------
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 580238bfab1..56563025446 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -108,6 +108,9 @@ typedef enum NodeTag
* - query_jumble_location: Mark the field as a location to track. This is
* only allowed for integer fields that include "location" in their name.
*
+ * - query_jumble_merge: Allow to merge the field values for the query
+ * jumbling.
+ *
* - read_as(VALUE): In nodeRead(), replace the field's value with VALUE.
*
* - read_write_ignore: Ignore the field for read/write. This is only allowed
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index d0576da3e25..fea8ef30359 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1394,7 +1394,7 @@ typedef struct ArrayExpr
/* common type of array elements */
Oid element_typeid pg_node_attr(query_jumble_ignore);
/* the array elements or sub-arrays */
- List *elements;
+ List *elements pg_node_attr(query_jumble_merge);
/* true if elements are sub-arrays */
bool multidims pg_node_attr(query_jumble_ignore);
/* token location, or -1 if unknown */
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 50eb9566587..d2f1c1e3105 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -23,6 +23,12 @@ typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+
+ /*
+ * Indicates the constant represents the beginning or the end of a merged
+ * constants interval.
+ */
+ bool merged;
} LocationLen;
/*
@@ -62,12 +68,12 @@ enum ComputeQueryIdType
/* GUC parameters */
extern PGDLLIMPORT int compute_query_id;
-
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query);
extern void EnableQueryId(void);
extern PGDLLIMPORT bool query_id_enabled;
+extern PGDLLIMPORT bool query_id_squash_values;
/*
* Returns whether query identifier computation has been enabled, either
--
2.39.5
On Mon, Mar 03, 2025 at 12:56:24PM GMT, �lvaro Herrera wrote:
In the meantime, here's v28 which is Dmitry's v27 plus pgindent. No
other changes. Dmitry, were you planning to submit a new version?
Nope, there was nothing I wanted to add so far.
It's not a question about whether it's possible to implement this,
but about whether it makes sense. In case of plain constants it's
straightforward -- they will not change anything meaningfully and
hence could be squashed from the query. Now for a function, that
might return different values for the same set of constant
arguments, it's much less obvious and omitting such expressions
might have unexpected consequences.query jumbling should not care about the behavior of the function. If
we take a regular call to a volatile function, we will generate the
same queryId for every call regardless of the input to the function.
Why does the in-list case need to care about the volatility of the
function?I feel quite insecure about this idea TBH. At least with immutable
functions I don't expect the system to behave wildly different than with
actual constants. What non-immutable functions do you have in mind that
would be useful to fold as if they were constants in the IN list in such
a query?
I don't have an example of non-immutable functions that should be folded,
but I also don't think query jumbling should care about the
function's volatility at all.
When dealing with FuncExpr, Why can't we simply skip merging when
func->funcformat != COERCE_IMPLICIT_CALL and the arguments
to the function are not constants? Meaning that anytime we have either
explicit casts or explicit function calls, this makes the list not eligible
for merging. See the attached .txt file to demonstrate what I am thinking.
This attached passes all the v28 test cases, but we will need to add tests
for implicit casts and external parameters.
--
Sami
Attachments:
On Mon, 3 Mar 2025 at 17:26, Álvaro Herrera <alvherre@alvh.no-ip.org> wrote:
On 2025-Feb-18, Sami Imseih wrote:
It's not a question about whether it's possible to implement this,
but about whether it makes sense. In case of plain constants it's
straightforward -- they will not change anything meaningfully and
hence could be squashed from the query. Now for a function, that
might return different values for the same set of constant
arguments, it's much less obvious and omitting such expressions
might have unexpected consequences.query jumbling should not care about the behavior of the function. If
we take a regular call to a volatile function, we will generate the
same queryId for every call regardless of the input to the function.
Why does the in-list case need to care about the volatility of the
function?I feel quite insecure about this idea TBH. At least with immutable
functions I don't expect the system to behave wildly different than with
actual constants. What non-immutable functions do you have in mind that
would be useful to fold as if they were constants in the IN list in such
a query?In the meantime, here's v28 which is Dmitry's v27 plus pgindent. No
other changes. Dmitry, were you planning to submit a new version?
I noticed that the feedback from Sami at [1]/messages/by-id/CAA5RZ0vt29Om+tKFOcUNhXV+kKpNnj0yj6OFho3-wngcMHWnAQ@mail.gmail.com has not yet been
addressed, I have changed the status to Waiting on Author, kindly
address them and update the status to Needs review.
[1]: /messages/by-id/CAA5RZ0vt29Om+tKFOcUNhXV+kKpNnj0yj6OFho3-wngcMHWnAQ@mail.gmail.com
Regards,
Vignesh
On Mon, Mar 17, 2025 at 12:07:44PM GMT, vignesh C wrote:
I noticed that the feedback from Sami at [1] has not yet been
addressed, I have changed the status to Waiting on Author, kindly
address them and update the status to Needs review.
[1] - /messages/by-id/CAA5RZ0vt29Om+tKFOcUNhXV+kKpNnj0yj6OFho3-wngcMHWnAQ@mail.gmail.com
I'm afraid there is a disagreement about this part of the feedback. I'm
not yet convinced about the idea suggested over there (treating mutable
functions in the same way as constants) and not planning to change
anything, at least not in the current version of the patch.
On Mon, 17 Mar 2025 at 13:42, Dmitry Dolgov <9erthalion6@gmail.com> wrote:
On Mon, Mar 17, 2025 at 12:07:44PM GMT, vignesh C wrote:
I noticed that the feedback from Sami at [1] has not yet been
addressed, I have changed the status to Waiting on Author, kindly
address them and update the status to Needs review.
[1] - /messages/by-id/CAA5RZ0vt29Om+tKFOcUNhXV+kKpNnj0yj6OFho3-wngcMHWnAQ@mail.gmail.comI'm afraid there is a disagreement about this part of the feedback. I'm
not yet convinced about the idea suggested over there (treating mutable
functions in the same way as constants) and not planning to change
anything, at least not in the current version of the patch.
@Sami Imseih Do you have any other suggestions to solve this?
Others: Any thoughts on which way is better in this case?
Regards,
Vignesh
On 2025-Mar-17, Dmitry Dolgov wrote:
I'm afraid there is a disagreement about this part of the feedback. I'm
not yet convinced about the idea suggested over there (treating mutable
functions in the same way as constants) and not planning to change
anything, at least not in the current version of the patch.
I have to admit that I am leaning towards removing the immutability
constraint. The reason is that we already require the function to be
boostrapped (due to the OID test) and to have implicit cast form, so
that limits which functions are recognized; the only ones there that are
not immutable are:
castsource │ casttarget │ castfunc
─────────────────────────────┼──────────────────────────┼──────────────────────────────────────────
text │ regclass │ regclass(text)
character varying │ regclass │ regclass(text)
date │ timestamp with time zone │ timestamptz(date)
time without time zone │ time with time zone │ timetz(time without time zone)
timestamp without time zone │ timestamp with time zone │ timestamptz(timestamp without time zone)
Looking at this list, it seems rather random to me to say that we should
not squash arrays with types using these casts. Should we really
consider two queries to be different because they run with different
search_path or TimeZone settings?
But kindly do not submit a new version of the patch, as I already have
some changes of my own (mostly on removing the term "merge" from code
and comments to replace with "squash", as well as adding some more
comments). I'll post it soon.
--
Álvaro Herrera Breisgau, Deutschland — https://www.EnterpriseDB.com/
On 2025-Feb-14, Dmitry Dolgov wrote:
This should do it. The last patch for today, otherwise I'll probably add
more bugs than features :)
Thank you. I've spent some time with this patch in the last few days,
and I propose a few changes. I renamed everything from "merge" to
"squash"; apart from that, it's mostly docs and code comments changes,
but I also removed the addition of a boolean argument to JUMBLE_LOCATION
which AFAICS is unnecessary, and did away with the business of checking
for function immutability. I also changed the code layout of
generate_normalized_query(); no functional changes, I just reordered the
code blocks (which caused a couple of lines to appear repeated that
weren't before).
You can see my patch on top of yours here:
https://github.com/alvherre/postgres/commits/query_id_squash_values/
and the CI run here:
https://cirrus-ci.com/build/5660053472018432
In addition, here I attach the complete patch on top of current master.
Unless there's some opposition to this, I intend to push this tomorrow.
I have to admit that looking at this part of the test,
+SELECT * FROM test_squash_cast WHERE data IN
+ (1::int4::casttesttype, 2::int4::casttesttype, 3::int4::casttesttype,
+ 4::int4::casttesttype, 5::int4::casttesttype, 6::int4::casttesttype,
+ 7::int4::casttesttype, 8::int4::casttesttype, 9::int4::casttesttype,
+ 10::int4::casttesttype, 11::int4::casttesttype);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_squash_cast WHERE data IN +| 1
+ ($1 /*, ... */::int4::casttesttype) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
and
+SELECT * FROM test_squash WHERE id IN (1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------+-------
+ SELECT * FROM test_squash WHERE id IN ($1 /*, ... */::oid) | 1
I am tempted to say that explicit casts should also be considered
squashable (that is, in IsSquashableConst() also allow the case of
func->funcformat == COERCE_EXPLICIT_CAST). That would also squash
queries such as this one:
+SELECT * FROM test_squash_bigint WHERE data IN
+ (1::bigint, 2::bigint, 3::bigint, 4::bigint, 5::bigint, 6::bigint,
+ 7::bigint, 8::bigint, 9::bigint, 10::bigint, 11::bigint);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------------------------+-------
+ SELECT * FROM test_squash_bigint WHERE data IN +| 1
+ ($1::bigint, $2::bigint, $3::bigint, $4::bigint, $5::bigint, $6::bigint,+|
+ $7::bigint, $8::bigint, $9::bigint, $10::bigint, $11::bigint) |
I, frankly, see little argument for making a distinction here. We can
still discuss whether we prefer it one way or the other; we don't need
that decision to prevent me from pushing the patch I here attach, I think.
--
Álvaro Herrera Breisgau, Deutschland — https://www.EnterpriseDB.com/
"The saddest aspect of life right now is that science gathers knowledge faster
than society gathers wisdom." (Isaac Asimov)
Attachments:
v29-0001-Prevent-jumbling-of-every-element-in-ArrayExpr.patchtext/x-diff; charset=utf-8Download
From aef36d3822ed6e9e5cfd709c452109f7de793894 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Tue, 3 Dec 2024 14:55:45 +0100
Subject: [PATCH v29] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on the number of parameters, because every element of
ArrayExpr is jumbled. In certain situations it's undesirable, especially
if the list becomes too large.
Make an array of Const expressions contribute only the first/last
elements to the jumble hash. Allow to enable this behavior via the new
pg_stat_statements parameter query_id_squash_values with the default value off.
Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane,
Michael Paquier, Sergei Kornilov, Alvaro Herrera, David Geier, Sutou Kouhei,
Sami Imseih, Julien Rouhaud
Tested-by: Chengxi Sun, Yasuo Honda
---
contrib/pg_stat_statements/Makefile | 2 +-
.../pg_stat_statements/expected/squashing.out | 465 ++++++++++++++++++
contrib/pg_stat_statements/meson.build | 1 +
.../pg_stat_statements/pg_stat_statements.c | 76 ++-
contrib/pg_stat_statements/sql/squashing.sql | 180 +++++++
doc/src/sgml/config.sgml | 29 ++
doc/src/sgml/pgstatstatements.sgml | 26 +-
src/backend/nodes/gen_node_support.pl | 19 +-
src/backend/nodes/queryjumblefuncs.c | 145 +++++-
src/backend/postmaster/launch_backend.c | 3 +
src/backend/utils/misc/guc_tables.c | 10 +
src/backend/utils/misc/postgresql.conf.sample | 1 +
src/include/nodes/nodes.h | 2 +
src/include/nodes/primnodes.h | 2 +-
src/include/nodes/queryjumble.h | 7 +
15 files changed, 945 insertions(+), 23 deletions(-)
create mode 100644 contrib/pg_stat_statements/expected/squashing.out
create mode 100644 contrib/pg_stat_statements/sql/squashing.sql
diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile
index 241c02587bc..b2bd8794d2a 100644
--- a/contrib/pg_stat_statements/Makefile
+++ b/contrib/pg_stat_statements/Makefile
@@ -20,7 +20,7 @@ LDFLAGS_SL += $(filter -lm, $(LIBS))
REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_statements/pg_stat_statements.conf
REGRESS = select dml cursors utility level_tracking planning \
user_activity wal entry_timestamp privileges extended \
- parallel cleanup oldextversions
+ parallel cleanup oldextversions squashing
# Disabled because these tests require "shared_preload_libraries=pg_stat_statements",
# which typical installcheck users do not have (e.g. buildfarm clients).
NO_INSTALLCHECK = 1
diff --git a/contrib/pg_stat_statements/expected/squashing.out b/contrib/pg_stat_statements/expected/squashing.out
new file mode 100644
index 00000000000..f7bcba2388c
--- /dev/null
+++ b/contrib/pg_stat_statements/expected/squashing.out
@@ -0,0 +1,465 @@
+--
+-- Const squashing functionality
+--
+CREATE EXTENSION pg_stat_statements;
+CREATE TABLE test_squash (id int, data int);
+-- IN queries
+-- No squashing is performed, as a baseline result
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+--------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1
+ SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(4 rows)
+
+-- Normal scenario, too many simple constants for an IN query
+SET query_id_squash_values = on;
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_squash WHERE id IN (1);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_squash WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------+-------
+ SELECT * FROM test_squash WHERE id IN ($1 /*, ... */) | 1
+ SELECT * FROM test_squash WHERE id IN ($1) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
+
+SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_squash WHERE id IN ($1 /*, ... */) | 4
+ SELECT * FROM test_squash WHERE id IN ($1) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) AND data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) AND data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) AND data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------------+-------
+ SELECT * FROM test_squash WHERE id IN ($1 /*, ... */) AND data = $2 | 3
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Multiple squashed intervals
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9)
+ AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
+ AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
+ AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------+-------
+ SELECT * FROM test_squash WHERE id IN ($1 /*, ... */)+| 3
+ AND data IN ($2 /*, ... */) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- No constants simplification for OpExpr
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+-- In the following two queries the operator expressions (+) and (@) have
+-- different oppno, and will be given different query_id if squashed, even though
+-- the normalized query will be the same
+SELECT * FROM test_squash WHERE id IN
+ (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_squash WHERE id IN
+ (@ '-1', @ '-2', @ '-3', @ '-4', @ '-5', @ '-6', @ '-7', @ '-8', @ '-9');
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_squash WHERE id IN +| 1
+ ($1 + $2, $3 + $4, $5 + $6, $7 + $8, $9 + $10, $11 + $12, $13 + $14, $15 + $16, $17 + $18) |
+ SELECT * FROM test_squash WHERE id IN +| 1
+ (@ $1, @ $2, @ $3, @ $4, @ $5, @ $6, @ $7, @ $8, @ $9) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
+
+-- FuncExpr
+-- Verify multiple type representation end up with the same query_id
+CREATE TABLE test_float (data float);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT data FROM test_float WHERE data IN (1, 2);
+ data
+------
+(0 rows)
+
+SELECT data FROM test_float WHERE data IN (1, '2');
+ data
+------
+(0 rows)
+
+SELECT data FROM test_float WHERE data IN ('1', 2);
+ data
+------
+(0 rows)
+
+SELECT data FROM test_float WHERE data IN ('1', '2');
+ data
+------
+(0 rows)
+
+SELECT data FROM test_float WHERE data IN (1.0, 1.0);
+ data
+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-----------------------------------------------------------+-------
+ SELECT data FROM test_float WHERE data IN ($1 /*, ... */) | 5
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Numeric type, implicit cast is squashed
+CREATE TABLE test_squash_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_squash_numeric WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-----------------------------------------------------------------+-------
+ SELECT * FROM test_squash_numeric WHERE data IN ($1 /*, ... */) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Bigint, implicit cast is squashed
+CREATE TABLE test_squash_bigint (id int, data bigint);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_squash_bigint WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------+-------
+ SELECT * FROM test_squash_bigint WHERE data IN ($1 /*, ... */) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Bigint, explicit cast is not squashed
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_squash_bigint WHERE data IN
+ (1::bigint, 2::bigint, 3::bigint, 4::bigint, 5::bigint, 6::bigint,
+ 7::bigint, 8::bigint, 9::bigint, 10::bigint, 11::bigint);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------------------------+-------
+ SELECT * FROM test_squash_bigint WHERE data IN +| 1
+ ($1::bigint, $2::bigint, $3::bigint, $4::bigint, $5::bigint, $6::bigint,+|
+ $7::bigint, $8::bigint, $9::bigint, $10::bigint, $11::bigint) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Bigint, long tokens with parenthesis
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_squash_bigint WHERE id IN
+ (abs(100), abs(200), abs(300), abs(400), abs(500), abs(600), abs(700),
+ abs(800), abs(900), abs(1000), ((abs(1100))));
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------------------------+-------
+ SELECT * FROM test_squash_bigint WHERE id IN +| 1
+ (abs($1), abs($2), abs($3), abs($4), abs($5), abs($6), abs($7),+|
+ abs($8), abs($9), abs($10), ((abs($11)))) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- CoerceViaIO, SubLink instead of a Const
+CREATE TABLE test_squash_jsonb (id int, data jsonb);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_squash_jsonb WHERE data IN
+ ((SELECT '"1"')::jsonb, (SELECT '"2"')::jsonb, (SELECT '"3"')::jsonb,
+ (SELECT '"4"')::jsonb, (SELECT '"5"')::jsonb, (SELECT '"6"')::jsonb,
+ (SELECT '"7"')::jsonb, (SELECT '"8"')::jsonb, (SELECT '"9"')::jsonb,
+ (SELECT '"10"')::jsonb);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------------+-------
+ SELECT * FROM test_squash_jsonb WHERE data IN +| 1
+ ((SELECT $1)::jsonb, (SELECT $2)::jsonb, (SELECT $3)::jsonb,+|
+ (SELECT $4)::jsonb, (SELECT $5)::jsonb, (SELECT $6)::jsonb,+|
+ (SELECT $7)::jsonb, (SELECT $8)::jsonb, (SELECT $9)::jsonb,+|
+ (SELECT $10)::jsonb) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- CoerceViaIO
+-- Create some dummy type to force CoerceViaIO
+CREATE TYPE casttesttype;
+CREATE FUNCTION casttesttype_in(cstring)
+ RETURNS casttesttype
+ AS 'textin'
+ LANGUAGE internal STRICT IMMUTABLE;
+NOTICE: return type casttesttype is only a shell
+CREATE FUNCTION casttesttype_out(casttesttype)
+ RETURNS cstring
+ AS 'textout'
+ LANGUAGE internal STRICT IMMUTABLE;
+NOTICE: argument type casttesttype is only a shell
+LINE 1: CREATE FUNCTION casttesttype_out(casttesttype)
+ ^
+CREATE TYPE casttesttype (
+ internallength = variable,
+ input = casttesttype_in,
+ output = casttesttype_out,
+ alignment = int4
+);
+CREATE CAST (int4 AS casttesttype) WITH INOUT;
+CREATE FUNCTION casttesttype_eq(casttesttype, casttesttype)
+returns boolean language sql immutable as $$
+ SELECT true
+$$;
+CREATE OPERATOR = (
+ leftarg = casttesttype,
+ rightarg = casttesttype,
+ procedure = casttesttype_eq,
+ commutator = =);
+CREATE TABLE test_squash_cast (id int, data casttesttype);
+-- Use the introduced type to construct a list of CoerceViaIO around Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_squash_cast WHERE data IN
+ (1::int4::casttesttype, 2::int4::casttesttype, 3::int4::casttesttype,
+ 4::int4::casttesttype, 5::int4::casttesttype, 6::int4::casttesttype,
+ 7::int4::casttesttype, 8::int4::casttesttype, 9::int4::casttesttype,
+ 10::int4::casttesttype, 11::int4::casttesttype);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_squash_cast WHERE data IN +| 1
+ ($1 /*, ... */::int4::casttesttype) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Some casting expression are simplified to Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_squash_jsonb WHERE data IN
+ (('"1"')::jsonb, ('"2"')::jsonb, ('"3"')::jsonb, ('"4"')::jsonb,
+ ( '"5"')::jsonb, ( '"6"')::jsonb, ( '"7"')::jsonb, ( '"8"')::jsonb,
+ ( '"9"')::jsonb, ( '"10"')::jsonb);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT * FROM test_squash_jsonb WHERE data IN +| 1
+ (($1 /*, ... */)::jsonb) |
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- RelabelType
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_squash WHERE id IN (1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------+-------
+ SELECT * FROM test_squash WHERE id IN ($1 /*, ... */::oid) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+-- Test constants evaluation in a CTE, which was causing issues in the past
+WITH cte AS (
+ SELECT 'const' as const FROM test_squash
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result
+--------
+(0 rows)
+
+-- Simple array would be squashed as well
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+ array
+------------------------
+ {1,2,3,4,5,6,7,8,9,10}
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT ARRAY[$1 /*, ... */] | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+RESET query_id_squash_values;
diff --git a/contrib/pg_stat_statements/meson.build b/contrib/pg_stat_statements/meson.build
index 4446af58c58..01a6cbdcf61 100644
--- a/contrib/pg_stat_statements/meson.build
+++ b/contrib/pg_stat_statements/meson.build
@@ -56,6 +56,7 @@ tests += {
'parallel',
'cleanup',
'oldextversions',
+ 'squashing',
],
'regress_args': ['--temp-config', files('pg_stat_statements.conf')],
# Disabled because these tests require
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index b245d04097d..8ab9ad58e1c 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -296,7 +296,6 @@ static bool pgss_track_planning = false; /* whether to track planning
* duration */
static bool pgss_save = true; /* whether to save stats across shutdown */
-
#define pgss_enabled(level) \
(!IsParallelWorker() && \
(pgss_track == PGSS_TRACK_ALL || \
@@ -2823,6 +2822,10 @@ generate_normalized_query(JumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool in_squashed = false; /* in a run of squashed consts? */
+ int skipped_constants = 0; /* Position adjustment of later
+ * constants after squashed ones */
+
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -2836,6 +2839,9 @@ generate_normalized_query(JumbleState *jstate, const char *query,
* certainly isn't more than 11 bytes, even if n reaches INT_MAX. We
* could refine that limit based on the max value of n for the current
* query, but it hardly seems worth any extra effort to do so.
+ *
+ * Note this also gives enough room for the commented-out ", ..." list
+ * syntax used by constant squashing.
*/
norm_query_buflen = query_len + jstate->clocations_count * 10;
@@ -2848,6 +2854,7 @@ generate_normalized_query(JumbleState *jstate, const char *query,
tok_len; /* Length (in bytes) of that tok */
off = jstate->clocations[i].location;
+
/* Adjust recorded location if we're dealing with partial string */
off -= query_loc;
@@ -2856,18 +2863,67 @@ generate_normalized_query(JumbleState *jstate, const char *query,
if (tok_len < 0)
continue; /* ignore any duplicates */
- /* Copy next chunk (what precedes the next constant) */
- len_to_wrt = off - last_off;
- len_to_wrt -= last_tok_len;
+ /*
+ * What to do next depends on whether we're squashing constant lists,
+ * and whether we're already in a run of such constants.
+ */
+ if (!jstate->clocations[i].squashed)
+ {
+ /*
+ * This location corresponds to a constant not to be squashed.
+ * Print what comes before the constant ...
+ */
+ len_to_wrt = off - last_off;
+ len_to_wrt -= last_tok_len;
- Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
+ Assert(len_to_wrt >= 0);
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+ /* ... and then a param symbol replacing the constant itself */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id - skipped_constants);
+
+ /* In case previous constants were merged away, stop doing that */
+ in_squashed = false;
+ }
+ else if (!in_squashed)
+ {
+ /*
+ * This location is the start position of a run of constants to be
+ * squashed, so we need to print the representation of starting a
+ * group of stashed constants.
+ *
+ * Print what comes before the constant ...
+ */
+ len_to_wrt = off - last_off;
+ len_to_wrt -= last_tok_len;
+ Assert(len_to_wrt >= 0);
+ Assert(i + 1 < jstate->clocations_count);
+ Assert(jstate->clocations[i + 1].squashed);
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* ... and then start a run of squashed constants */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d /*, ... */",
+ i + 1 + jstate->highest_extern_param_id - skipped_constants);
+
+ /* The next location will match the block below, to end the run */
+ in_squashed = true;
+
+ skipped_constants++;
+ }
+ else
+ {
+ /*
+ * The second location of a run of squashable elements; this
+ * indicates its end.
+ */
+ in_squashed = false;
+ }
+
+ /* Otherwise the constant is squashed away -- move forward */
quer_loc = off + tok_len;
last_off = off;
last_tok_len = tok_len;
diff --git a/contrib/pg_stat_statements/sql/squashing.sql b/contrib/pg_stat_statements/sql/squashing.sql
new file mode 100644
index 00000000000..56ee8ccb9a1
--- /dev/null
+++ b/contrib/pg_stat_statements/sql/squashing.sql
@@ -0,0 +1,180 @@
+--
+-- Const squashing functionality
+--
+CREATE EXTENSION pg_stat_statements;
+
+CREATE TABLE test_squash (id int, data int);
+
+-- IN queries
+
+-- No squashing is performed, as a baseline result
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal scenario, too many simple constants for an IN query
+SET query_id_squash_values = on;
+
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_squash WHERE id IN (1);
+SELECT * FROM test_squash WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) AND data = 2;
+SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) AND data = 2;
+SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) AND data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Multiple squashed intervals
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9)
+ AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
+ AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_squash WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
+ AND data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- No constants simplification for OpExpr
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+-- In the following two queries the operator expressions (+) and (@) have
+-- different oppno, and will be given different query_id if squashed, even though
+-- the normalized query will be the same
+SELECT * FROM test_squash WHERE id IN
+ (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+SELECT * FROM test_squash WHERE id IN
+ (@ '-1', @ '-2', @ '-3', @ '-4', @ '-5', @ '-6', @ '-7', @ '-8', @ '-9');
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- FuncExpr
+
+-- Verify multiple type representation end up with the same query_id
+CREATE TABLE test_float (data float);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT data FROM test_float WHERE data IN (1, 2);
+SELECT data FROM test_float WHERE data IN (1, '2');
+SELECT data FROM test_float WHERE data IN ('1', 2);
+SELECT data FROM test_float WHERE data IN ('1', '2');
+SELECT data FROM test_float WHERE data IN (1.0, 1.0);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Numeric type, implicit cast is squashed
+CREATE TABLE test_squash_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_squash_numeric WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Bigint, implicit cast is squashed
+CREATE TABLE test_squash_bigint (id int, data bigint);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_squash_bigint WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Bigint, explicit cast is not squashed
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_squash_bigint WHERE data IN
+ (1::bigint, 2::bigint, 3::bigint, 4::bigint, 5::bigint, 6::bigint,
+ 7::bigint, 8::bigint, 9::bigint, 10::bigint, 11::bigint);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Bigint, long tokens with parenthesis
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_squash_bigint WHERE id IN
+ (abs(100), abs(200), abs(300), abs(400), abs(500), abs(600), abs(700),
+ abs(800), abs(900), abs(1000), ((abs(1100))));
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- CoerceViaIO, SubLink instead of a Const
+CREATE TABLE test_squash_jsonb (id int, data jsonb);
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_squash_jsonb WHERE data IN
+ ((SELECT '"1"')::jsonb, (SELECT '"2"')::jsonb, (SELECT '"3"')::jsonb,
+ (SELECT '"4"')::jsonb, (SELECT '"5"')::jsonb, (SELECT '"6"')::jsonb,
+ (SELECT '"7"')::jsonb, (SELECT '"8"')::jsonb, (SELECT '"9"')::jsonb,
+ (SELECT '"10"')::jsonb);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- CoerceViaIO
+
+-- Create some dummy type to force CoerceViaIO
+CREATE TYPE casttesttype;
+
+CREATE FUNCTION casttesttype_in(cstring)
+ RETURNS casttesttype
+ AS 'textin'
+ LANGUAGE internal STRICT IMMUTABLE;
+
+CREATE FUNCTION casttesttype_out(casttesttype)
+ RETURNS cstring
+ AS 'textout'
+ LANGUAGE internal STRICT IMMUTABLE;
+
+CREATE TYPE casttesttype (
+ internallength = variable,
+ input = casttesttype_in,
+ output = casttesttype_out,
+ alignment = int4
+);
+
+CREATE CAST (int4 AS casttesttype) WITH INOUT;
+
+CREATE FUNCTION casttesttype_eq(casttesttype, casttesttype)
+returns boolean language sql immutable as $$
+ SELECT true
+$$;
+
+CREATE OPERATOR = (
+ leftarg = casttesttype,
+ rightarg = casttesttype,
+ procedure = casttesttype_eq,
+ commutator = =);
+
+CREATE TABLE test_squash_cast (id int, data casttesttype);
+
+-- Use the introduced type to construct a list of CoerceViaIO around Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_squash_cast WHERE data IN
+ (1::int4::casttesttype, 2::int4::casttesttype, 3::int4::casttesttype,
+ 4::int4::casttesttype, 5::int4::casttesttype, 6::int4::casttesttype,
+ 7::int4::casttesttype, 8::int4::casttesttype, 9::int4::casttesttype,
+ 10::int4::casttesttype, 11::int4::casttesttype);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Some casting expression are simplified to Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_squash_jsonb WHERE data IN
+ (('"1"')::jsonb, ('"2"')::jsonb, ('"3"')::jsonb, ('"4"')::jsonb,
+ ( '"5"')::jsonb, ( '"6"')::jsonb, ( '"7"')::jsonb, ( '"8"')::jsonb,
+ ( '"9"')::jsonb, ( '"10"')::jsonb);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- RelabelType
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_squash WHERE id IN (1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test constants evaluation in a CTE, which was causing issues in the past
+WITH cte AS (
+ SELECT 'const' as const FROM test_squash
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+-- Simple array would be squashed as well
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+RESET query_id_squash_values;
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 3d62c8bd274..3140d922e0b 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8626,6 +8626,35 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
</listitem>
</varlistentry>
+ <varlistentry id="guc-query-id-squash-values" xreflabel="query_id_squash_values">
+ <term><varname>query_id_squash_values</varname> (<type>bool</type>)
+ <indexterm>
+ <primary><varname>query_id_squash_values</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Specifies how a list of constants (e.g., for an <literal>IN</literal>
+ clause) contributes to the query identifier computation.
+ Normally, every element of such a list contributes to the query
+ identifier separately, which means that two queries that only differ
+ in the number of elements in such a list would get different query
+ identifiers.
+ If this parameter is on, a list of constants will not contribute
+ to the query identifier. This means that two queries whose only
+ difference is the number of constants in such a list are going to get the
+ same query identifier.
+ </para>
+ <para>
+ Only constants are affected; bind parameters do not benefit from this
+ functionality. The default value is <literal>off</literal>.
+ </para>
+ <para>
+ This parameter also affects how <xref linkend="pgstatstatements"/>
+ generates normalized query texts.
+ </listitem>
+ </varlistentry>
+
<varlistentry id="guc-log-statement-stats">
<term><varname>log_statement_stats</varname> (<type>boolean</type>)
<indexterm>
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index e2ac1c2d501..c4e04cbb7f8 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -630,11 +630,29 @@
<para>
In some cases, queries with visibly different texts might get merged into a
- single <structname>pg_stat_statements</structname> entry. Normally this will happen
- only for semantically equivalent queries, but there is a small chance of
- hash collisions causing unrelated queries to be merged into one entry.
+ single <structname>pg_stat_statements</structname> entry; as explained above,
+ this is expected to happen for semantically equivalent queries.
+ In addition, if <varname>query_id_squash_values</varname> is enabled
+ and the only difference between queries is the number of elements in a list
+ of constants, the list will get squashed down to a single element but shown
+ with a commented-out list indicator:
+
+<screen>
+=# SET query_id_squash_values = on;
+=# SELECT pg_stat_statements_reset();
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7);
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8);
+=# SELECT query, calls FROM pg_stat_statements
+ WHERE query LIKE 'SELECT%';
+-[ RECORD 1 ]------------------------------
+query | SELECT * FROM test WHERE a IN ($1 /*, ... */)
+calls | 2
+</screen>
+
+ In addition to these cases, there is a small chance of hash collisions
+ causing unrelated queries to be merged into one entry.
(This cannot happen for queries belonging to different users or databases,
- however.)
+ however.)
</para>
<para>
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 1a657f7e0ae..7e3f335ac09 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -476,6 +476,7 @@ foreach my $infile (@ARGV)
equal_ignore_if_zero
query_jumble_ignore
query_jumble_location
+ query_jumble_squash
read_write_ignore
write_only_relids
write_only_nondefault_pathtarget
@@ -1283,6 +1284,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
my @a = @{ $node_type_info{$n}->{field_attrs}{$f} };
my $query_jumble_ignore = $struct_no_query_jumble;
my $query_jumble_location = 0;
+ my $query_jumble_squash = 0;
# extract per-field attributes
foreach my $a (@a)
@@ -1295,14 +1297,27 @@ _jumble${n}(JumbleState *jstate, Node *node)
{
$query_jumble_location = 1;
}
+ elsif ($a eq 'query_jumble_squash')
+ {
+ $query_jumble_squash = 1;
+ }
}
# node type
if (($t =~ /^(\w+)\*$/ or $t =~ /^struct\s+(\w+)\*$/)
and elem $1, @node_types)
{
- print $jff "\tJUMBLE_NODE($f);\n"
- unless $query_jumble_ignore;
+ # Squash constants if requested.
+ if ($query_jumble_squash)
+ {
+ print $jff "\tJUMBLE_ELEMENTS($f);\n"
+ unless $query_jumble_ignore;
+ }
+ else
+ {
+ print $jff "\tJUMBLE_NODE($f);\n"
+ unless $query_jumble_ignore;
+ }
}
elsif ($t eq 'ParseLoc')
{
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index b103a281936..b2394066404 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -32,9 +32,13 @@
*/
#include "postgres.h"
+#include "access/transam.h"
+#include "catalog/pg_proc.h"
#include "common/hashfn.h"
#include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
#include "nodes/queryjumble.h"
+#include "utils/lsyscache.h"
#include "parser/scansup.h"
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
@@ -42,6 +46,9 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+/* Whether to merge constants in a list when computing query_id */
+bool query_id_squash_values = false;
+
/*
* True when compute_query_id is ON or AUTO, and a module requests them.
*
@@ -53,8 +60,10 @@ bool query_id_enabled = false;
static void AppendJumble(JumbleState *jstate,
const unsigned char *item, Size size);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void RecordConstLocation(JumbleState *jstate,
+ int location, bool merged);
static void _jumbleNode(JumbleState *jstate, Node *node);
+static void _jumbleElements(JumbleState *jstate, List *elements);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
static void _jumbleList(JumbleState *jstate, Node *node);
static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
@@ -198,11 +207,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
}
/*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked.
+ *
+ * 'squashed' signals that the constant represents the first or the last
+ * element in a series of merged constants, and everything but the first/last
+ * element contributes nothing to the jumble hash.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool squashed)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -218,15 +231,98 @@ RecordConstLocation(JumbleState *jstate, int location)
}
jstate->clocations[jstate->clocations_count].location = location;
/* initialize lengths to -1 to simplify third-party module usage */
+ jstate->clocations[jstate->clocations_count].squashed = squashed;
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
}
}
+/*
+ * Subroutine for _jumbleElements: Verify a few simple cases where we can
+ * deduce that the expression is a constant:
+ *
+ * - Ignore a possible wrapping RelabelType and CoerceViaIO.
+ * - If it's a FuncExpr, check that the function is an implicit
+ * cast and its arguments are Const.
+ * - Otherwise test if the expression is a simple Const.
+ */
+static bool
+IsSquashableConst(Node *element)
+{
+ if (IsA(element, RelabelType))
+ element = (Node *) ((RelabelType *) element)->arg;
+
+ if (IsA(element, CoerceViaIO))
+ element = (Node *) ((CoerceViaIO *) element)->arg;
+
+ if (IsA(element, FuncExpr))
+ {
+ FuncExpr *func = (FuncExpr *) element;
+ ListCell *temp;
+
+ if (func->funcformat != COERCE_IMPLICIT_CAST)
+ return false;
+
+ if (func->funcid > FirstGenbkiObjectId)
+ return false;
+
+ foreach(temp, func->args)
+ {
+ Node *arg = lfirst(temp);
+
+ if (!IsA(arg, Const)) /* XXX we could recurse here instead */
+ return false;
+ }
+
+ return true;
+ }
+
+ if (!IsA(element, Const))
+ return false;
+
+ return true;
+}
+
+/*
+ * Subroutine for _jumbleElements: Verify whether the provided list
+ * can be squashed, meaning it contains only constant expressions.
+ *
+ * Return value indicates if squashing is possible.
+ *
+ * Note that this function searches only for explicit Const nodes with
+ * possibly very simple decorations on top, and does not try to simplify
+ * expressions.
+ */
+static bool
+IsSquashableConstList(List *elements, Node **firstExpr, Node **lastExpr)
+{
+ ListCell *temp;
+
+ /*
+ * If squashing is disabled, or the list is too short, we don't try to
+ * squash it.
+ */
+ if (!query_id_squash_values || list_length(elements) < 2)
+ return false;
+
+ foreach(temp, elements)
+ {
+ if (!IsSquashableConst(lfirst(temp)))
+ return false;
+ }
+
+ *firstExpr = linitial(elements);
+ *lastExpr = llast(elements);
+
+ return true;
+}
+
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
+#define JUMBLE_ELEMENTS(list) \
+ _jumbleElements(jstate, (List *) expr->list)
#define JUMBLE_LOCATION(location) \
- RecordConstLocation(jstate, expr->location)
+ RecordConstLocation(jstate, expr->location, false)
#define JUMBLE_FIELD(item) \
AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
#define JUMBLE_FIELD_SINGLE(item) \
@@ -239,6 +335,45 @@ do { \
#include "queryjumblefuncs.funcs.c"
+/*
+ * When query_id_squash_values is enabled, we jumble lists of constant
+ * elements as one individual item regardless of how many elements are
+ * in the list. This means different queries jumble to the same query_id,
+ * if the only difference is the number of elements in the list.
+ *
+ * If query_id_squash_values is disabled or the list is not "simple
+ * enough", we jumble each element normally.
+ */
+static void
+_jumbleElements(JumbleState *jstate, List *elements)
+{
+ Node *first,
+ *last;
+
+ if (IsSquashableConstList(elements, &first, &last))
+ {
+ /*
+ * If this list of elements is squashable, keep track of the location
+ * of its first and last elements. When reading back the locations
+ * array, we'll see two consecutive locations with ->squashed set to
+ * true, indicating the location of initial and final elements of this
+ * list.
+ *
+ * For the limited set of cases we support now (implicit coerce via
+ * FuncExpr, Const) it's fine to use exprLocation of the 'last'
+ * expression, but if more complex composite expressions are to be
+ * supported (e.g., OpExpr or FuncExpr as an explicit call), more
+ * sophisticated tracking will be needed.
+ */
+ RecordConstLocation(jstate, exprLocation(first), true);
+ RecordConstLocation(jstate, exprLocation(last), true);
+ }
+ else
+ {
+ _jumbleNode(jstate, (Node *) elements);
+ }
+}
+
static void
_jumbleNode(JumbleState *jstate, Node *node)
{
diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c
index 77fb877dbad..78d8b652e04 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -115,6 +115,7 @@ typedef struct
bool redirection_done;
bool IsBinaryUpgrade;
bool query_id_enabled;
+ bool query_id_squash_values;
int max_safe_fds;
int MaxBackends;
int num_pmchild_slots;
@@ -775,6 +776,7 @@ save_backend_variables(BackendParameters *param,
param->redirection_done = redirection_done;
param->IsBinaryUpgrade = IsBinaryUpgrade;
param->query_id_enabled = query_id_enabled;
+ param->query_id_squash_values = query_id_squash_values;
param->max_safe_fds = max_safe_fds;
param->MaxBackends = MaxBackends;
@@ -1035,6 +1037,7 @@ restore_backend_variables(BackendParameters *param)
redirection_done = param->redirection_done;
IsBinaryUpgrade = param->IsBinaryUpgrade;
query_id_enabled = param->query_id_enabled;
+ query_id_squash_values = param->query_id_squash_values;
max_safe_fds = param->max_safe_fds;
MaxBackends = param->MaxBackends;
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 9c0b10ad4dc..fb7515a3156 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -2118,6 +2118,16 @@ struct config_bool ConfigureNamesBool[] =
NULL, NULL, NULL
},
+ {
+ {"query_id_squash_values", PGC_USERSET, STATS_MONITORING,
+ gettext_noop("Allows to merge constants in a list when computing "
+ "query_id."),
+ },
+ &query_id_squash_values,
+ false,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 8de86e0c945..c223338302c 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -652,6 +652,7 @@
# - Monitoring -
#compute_query_id = auto
+#query_id_squash_values = off
#log_statement_stats = off
#log_parser_stats = off
#log_planner_stats = off
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 580238bfab1..d18044b4e65 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -105,6 +105,8 @@ typedef enum NodeTag
* that typmod and collation information are usually irrelevant for the
* query jumbling.
*
+ * - query_jumble_squash: Squash multiple values during query jumbling.
+ *
* - query_jumble_location: Mark the field as a location to track. This is
* only allowed for integer fields that include "location" in their name.
*
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index d0576da3e25..7d3b4198f26 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1394,7 +1394,7 @@ typedef struct ArrayExpr
/* common type of array elements */
Oid element_typeid pg_node_attr(query_jumble_ignore);
/* the array elements or sub-arrays */
- List *elements;
+ List *elements pg_node_attr(query_jumble_squash);
/* true if elements are sub-arrays */
bool multidims pg_node_attr(query_jumble_ignore);
/* token location, or -1 if unknown */
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 50eb9566587..905f66bc0bd 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -23,6 +23,12 @@ typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+
+ /*
+ * Indicates that this location represents the beginning or end of a run
+ * of squashed constants.
+ */
+ bool squashed;
} LocationLen;
/*
@@ -68,6 +74,7 @@ extern JumbleState *JumbleQuery(Query *query);
extern void EnableQueryId(void);
extern PGDLLIMPORT bool query_id_enabled;
+extern PGDLLIMPORT bool query_id_squash_values;
/*
* Returns whether query identifier computation has been enabled, either
--
2.39.5
On 2025-Mar-17, Álvaro Herrera wrote:
You can see my patch on top of yours here:
https://github.com/alvherre/postgres/commits/query_id_squash_values/
and the CI run here:
https://cirrus-ci.com/build/5660053472018432
Heh, this blew up on bogus SGML markup :-( Fixed and running again:
https://cirrus-ci.com/build/4822893680394240
--
Álvaro Herrera PostgreSQL Developer — https://www.EnterpriseDB.com/
"Once again, thank you and all of the developers for your hard work on
PostgreSQL. This is by far the most pleasant management experience of
any database I've worked on." (Dan Harris)
http://archives.postgresql.org/pgsql-performance/2006-04/msg00247.php
On Mon, Mar 17, 2025 at 08:14:16PM GMT, Álvaro Herrera wrote:
On 2025-Mar-17, Álvaro Herrera wrote:You can see my patch on top of yours here:
https://github.com/alvherre/postgres/commits/query_id_squash_values/
and the CI run here:
https://cirrus-ci.com/build/5660053472018432Heh, this blew up on bogus SGML markup :-( Fixed and running again:
https://cirrus-ci.com/build/4822893680394240
Thanks, much appreciated! I've inspected the diff between patches and run few
tests, at the first glance everything looks fine.
I am tempted to say that explicit casts should also be considered
squashable (that is, in IsSquashableConst() also allow the case of
func->funcformat == COERCE_EXPLICIT_CAST).
Well, I admit I may have been burned too much by the initial reception of the
patch and handled it too conservatively in this regard. Originally I also had a
concern about normalized queries representation for explicit cast case, but it
was resolved by Julien's suggestion to switch to the /* ... */ format.
I have to admit that I am leaning towards removing the immutability
constraint. The reason is that we already require the function to be
boostrapped (due to the OID test) and to have implicit cast form, so
that limits which functions are recognized; the only ones there that are
not immutable are:castsource │ casttarget │ castfunc
─────────────────────────────┼──────────────────────────┼──────────────────────────────────────────
text │ regclass │ regclass(text)
character varying │ regclass │ regclass(text)
date │ timestamp with time zone │ timestamptz(date)
time without time zone │ time with time zone │ timetz(time without time zone)
timestamp without time zone │ timestamp with time zone │ timestamptz(timestamp without time zone)
Agree, when put together with the OID limitation it doesn't look so bad.
Somehow I was thinking about the Sami's proposal and the discussion in more
abstract terms, as if we talk about any arbitrary mutable functions to squash
-- I still would be cautious about hiding non-bootstrapped mutable functions.
On 2025-Mar-18, Dmitry Dolgov wrote:
Thanks, much appreciated! I've inspected the diff between patches and
run few tests, at the first glance everything looks fine.
Thanks for looking once more.
I am tempted to say that explicit casts should also be considered
squashable (that is, in IsSquashableConst() also allow the case of
func->funcformat == COERCE_EXPLICIT_CAST).Well, I admit I may have been burned too much by the initial reception
of the patch and handled it too conservatively in this regard.
I can totally understand that. I have added this and pushed. Hopefully
nobody will hate this too much, and some people might even like it.
By the way, I'm still open to adding the 'powers' mechanism that was
discussed earlier and other simple additions on top of what's there now,
if you have some spare energy to spend on this. (For full disclosure:
the "powers" thing was discussed in a developer's meeting last year, and
several people said they'd prefer to stick with 0001 and forget about
powers, on the grounds that it produced query texts that were weird and
invalid SQL. But now that we have the commented-out syntax, it's no
longer invalid SQL, and maybe it's not so weird either.)
But there are other proposals such as handling Params and whatnot. At
this point, what we'd need for that is a more extensive test suite to
show that we're not breaking other things ...
Agree, when put together with the OID limitation it doesn't look so bad.
Somehow I was thinking about the Sami's proposal and the discussion in more
abstract terms,
Yeah, that happened to me too, and then I checked again and realized
that my initial impression was wrong.
as if we talk about any arbitrary mutable functions to squash -- I
still would be cautious about hiding non-bootstrapped mutable
functions.
Yeah, absolutely.
One thing I noticed while going over the code, is that we say in some
code comments that the list of elements only contributes the first and
last elements to the jumble. But this is not true -- the list actually
contributes _nothing at all_ to the jumble. I don't think this causes
any terrible problems, but maybe somebody will discover that I'm wrong
on that. This isn't trivial to solve, because if you try to add
anything to the jumble from there, you'd break the first/last location
pair matching. We could maybe fix this by returning the actual
bottommost Const node from IsSquashableConstList() instead of whatever
is wrapping it, and then arrange for _jumbleConst() to receive a boolean
that turns off jumbling of the location.
However, contributing nothing already makes such a query different from
another query that has exactly one element, because that one jumbles
that element. It could only be confused (in the sense of identical
query_ids) with another list that has zero elements.
Anyway, something to play with.
BTW, it's fun to execute a query that's literally
select col from tab where col in (1 /*, ... */);
and then
select col from tab where col in (1, 2);
because now you have two entries in pg_stat_statements with visibly the
same query text, but two different query_ids. I'm not terribly worried
about this, because who uses a literal "/*, ... */" in a query anyway?
And even if they do, it's easily explained. But jesters could probably
get a good laugh messing about with these reports.
Thanks for keeping at this for so long!
--
Álvaro Herrera PostgreSQL Developer — https://www.EnterpriseDB.com/
I want to mention that the current patch does not deal
with external parameters ( prepared statements ) [0]/messages/by-id/CAA5RZ0uGfxXyzhp9N5nfsS+ZqF5ngEMC3YtBPtLoeK8EPsjHbw@mail.gmail.com [1]/messages/by-id/blauoky77sash2qzrbnz6ilfyi7odtvxtdr4ifg4hq4bpqp2uk@6z5yjfsxtcnl. This could be a
follow-up, as it may need some further discussion. It is important to
address this case, IMO.
[0]: /messages/by-id/CAA5RZ0uGfxXyzhp9N5nfsS+ZqF5ngEMC3YtBPtLoeK8EPsjHbw@mail.gmail.com
/messages/by-id/CAA5RZ0uGfxXyzhp9N5nfsS+ZqF5ngEMC3YtBPtLoeK8EPsjHbw@mail.gmail.com
[1]: /messages/by-id/blauoky77sash2qzrbnz6ilfyi7odtvxtdr4ifg4hq4bpqp2uk@6z5yjfsxtcnl
/messages/by-id/blauoky77sash2qzrbnz6ilfyi7odtvxtdr4ifg4hq4bpqp2uk@6z5yjfsxtcnl
Regards,
Sami
On 2025-Mar-18, Sami Imseih wrote:
I want to mention that the current patch does not deal
with external parameters ( prepared statements ) [0] [1]. This could be a
follow-up, as it may need some further discussion. It is important to
address this case, IMO.
Yes, I realize that. Feel free to send a patch.
--
Álvaro Herrera 48°01'N 7°57'E — https://www.EnterpriseDB.com/
"E pur si muove" (Galileo Galilei)
On Tue, Mar 18, 2025 at 07:33:25PM GMT, �lvaro Herrera wrote:
By the way, I'm still open to adding the 'powers' mechanism that was
discussed earlier and other simple additions on top of what's there now,
if you have some spare energy to spend on this. (For full disclosure:
the "powers" thing was discussed in a developer's meeting last year, and
several people said they'd prefer to stick with 0001 and forget about
powers, on the grounds that it produced query texts that were weird and
invalid SQL. But now that we have the commented-out syntax, it's no
longer invalid SQL, and maybe it's not so weird either.)But there are other proposals such as handling Params and whatnot. At
this point, what we'd need for that is a more extensive test suite to
show that we're not breaking other things ...
Yes, I'm planning to continue working on this topic, there are still
plenty of things that could be improved.
One thing I noticed while going over the code, is that we say in some
code comments that the list of elements only contributes the first and
last elements to the jumble. But this is not true -- the list actually
contributes _nothing at all_ to the jumble. I don't think this causes
any terrible problems, but maybe somebody will discover that I'm wrong
on that. This isn't trivial to solve, because if you try to add
anything to the jumble from there, you'd break the first/last location
pair matching. We could maybe fix this by returning the actual
bottommost Const node from IsSquashableConstList() instead of whatever
is wrapping it, and then arrange for _jumbleConst() to receive a boolean
that turns off jumbling of the location.However, contributing nothing already makes such a query different from
another query that has exactly one element, because that one jumbles
that element. It could only be confused (in the sense of identical
query_ids) with another list that has zero elements.Anyway, something to play with.
Yep, I don't see this as an immediate problem as well, but will do some
experiments with that.
On Tue, Mar 18, 2025 at 02:54:18PM GMT, Sami Imseih wrote:
I want to mention that the current patch does not deal
with external parameters ( prepared statements ) [0] [1]. This could be a
follow-up, as it may need some further discussion. It is important to
address this case, IMO.
Sure, it's important and I'm planning to tackle this next. If you want,
we can collaborate on a patch for that.
Sure, it's important and I'm planning to tackle this next. If you want,
we can collaborate on a patch for that.
I spent some time looking some more at this, and I believe all that needs
to be done is check for a PRAM node with a type of PARAM_EXTERN.
During planning the planner turns the Param into a Const during
eval_const_expressions_mutator.
If it's as simple as I think it is, I hope we can get this committed for 18.
If not, and a longer discussion is needed, a new thread can be started
for this.
--
Sami Imseih
Amazon Web Services (AWS)
Attachments:
v1-0001-Allow-query-jumble-to-squash-a-list-external-paramet.patchapplication/octet-stream; name=v1-0001-Allow-query-jumble-to-squash-a-list-external-paramet.patchDownload
From 76afc3c74f222d3bf5551e7b45cf736453ae91c9 Mon Sep 17 00:00:00 2001
From: "Sami Imseih (AWS)"
<simseih@dev-dsk-simseih-1d-3940b79e.us-east-1.amazon.com>
Date: Fri, 21 Mar 2025 05:37:59 +0000
Subject: [PATCH 1/1] Allow query jumble to squash a list external parameters
62d712ecf now allows query jumbling to squash a list of constants,
but not constants that are passed as external parameters. This patch
now allows the squashing of constant values supplied as external parameters
(e.g., $1, $2), as is the case with prepared statements.
---
.../pg_stat_statements/expected/squashing.out | 38 +++++++++++++++++++
contrib/pg_stat_statements/sql/squashing.sql | 12 ++++++
src/backend/nodes/queryjumblefuncs.c | 20 ++++++++--
3 files changed, 66 insertions(+), 4 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/squashing.out b/contrib/pg_stat_statements/expected/squashing.out
index 55aa5109433..370d91642d4 100644
--- a/contrib/pg_stat_statements/expected/squashing.out
+++ b/contrib/pg_stat_statements/expected/squashing.out
@@ -333,6 +333,44 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
(2 rows)
+-- Test bind parameters
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_squash_bigint WHERE data IN ($1, $2, $3) \bind 1 2 3
+;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_squash_bigint WHERE data IN ($1, $2, $3, $4) \bind 1 2 3 4
+;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_squash_bigint WHERE data IN
+ ($1::bigint, $2::bigint, $3::bigint, $4::bigint) \bind 1 2 3 4
+;
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_squash_bigint WHERE data IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------------------+-------
+ SELECT * FROM test_squash_bigint WHERE data IN ($1 /*, ... */) | 4
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
-- CoerceViaIO
-- Create some dummy type to force CoerceViaIO
CREATE TYPE casttesttype;
diff --git a/contrib/pg_stat_statements/sql/squashing.sql b/contrib/pg_stat_statements/sql/squashing.sql
index 56ee8ccb9a1..3ff251a59ee 100644
--- a/contrib/pg_stat_statements/sql/squashing.sql
+++ b/contrib/pg_stat_statements/sql/squashing.sql
@@ -106,6 +106,18 @@ SELECT * FROM test_squash_jsonb WHERE data IN
(SELECT '"10"')::jsonb);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+-- Test bind parameters
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_squash_bigint WHERE data IN ($1, $2, $3) \bind 1 2 3
+;
+SELECT * FROM test_squash_bigint WHERE data IN ($1, $2, $3, $4) \bind 1 2 3 4
+;
+SELECT * FROM test_squash_bigint WHERE data IN
+ ($1::bigint, $2::bigint, $3::bigint, $4::bigint) \bind 1 2 3 4
+;
+SELECT * FROM test_squash_bigint WHERE data IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
-- CoerceViaIO
-- Create some dummy type to force CoerceViaIO
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index 189bfda610a..de405ab08f3 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -244,7 +244,8 @@ RecordConstLocation(JumbleState *jstate, int location, bool squashed)
* - Ignore a possible wrapping RelabelType and CoerceViaIO.
* - If it's a FuncExpr, check that the function is an implicit
* cast and its arguments are Const.
- * - Otherwise test if the expression is a simple Const.
+ * - Otherwise test if the expression is a simple Const or an
+ * external parameter.
*/
static bool
IsSquashableConst(Node *element)
@@ -278,10 +279,21 @@ IsSquashableConst(Node *element)
return true;
}
- if (!IsA(element, Const))
- return false;
+ switch (nodeTag(element))
+ {
+ case T_Const:
+ return true;
+ case T_Param:
+ {
+ Param *param = (Param *) element;
- return true;
+ return param->paramkind == PARAM_EXTERN;
+ }
+ default:
+ break;
+ }
+
+ return false;
}
/*
--
2.39.5 (Apple Git-154)