[PROPOSAL] timestamp informations to pg_stat_statements

Started by Jun Cheol Gimover 9 years ago10 messages
#1Jun Cheol Gim
dialogbox@gmail.com
1 attachment(s)

Hi hackers!

Following is a proposal to add timestamp informations to
`pg_stat_statements`.

# Use case
- If we want to gather list and stats for queries executed at least once
last 1 hour, we had to reset a hours ago. There is no way if we didn't.
- If we found some strange query from `pg_stat_statments`, we might want to
identify when it ran firstly.

If we have timestamp of first and last executed, we can easily gather thess
informations and there are tons of more use cases.

# Implementations
Firstly, I added API version 1.5 to add additional fields and I added two
fields to Counters structure. Now it has 25 fields in total.

```
@@ -156,6 +158,8 @@ typedef struct Counters
  double    blk_read_time;  /* time spent reading, in msec */
  double    blk_write_time; /* time spent writing, in msec */
  double    usage;      /* usage factor */
+ TimestampTz   created;  /* timestamp of created time */
+ TimestampTz   last_updated; /* timestamp of last updated */
 } Counters;

/*
```

The `created` field is filled at the first time the entry will added to
hash table.

```
@@ -1663,6 +1690,8 @@ entry_alloc(pgssHashKey *key, Size query_offset, int
query_len, int encoding,

    /* reset the statistics */
    memset(&entry->counters, 0, sizeof(Counters));
+   /* set the created timestamp */
+    entry->counters.created = GetCurrentTimestamp();
    /* set the appropriate initial usage count */
    entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT;
    /* re-initialize the mutex each time ... we assume no one using it */
```

The `last_updated` will be updated every time `pgss_store()` updates stats.

```
@@ -1251,6 +1256,7 @@ pgss_store(const char *query, uint32 queryId,
    e->counters.blk_read_time +=
INSTR_TIME_GET_MILLISEC(bufusage->blk_read_time);
    e->counters.blk_write_time +=
INSTR_TIME_GET_MILLISEC(bufusage->blk_write_time);
    e->counters.usage += USAGE_EXEC(total_time);
+   e->counters.last_updated = GetCurrentTimestamp();

SpinLockRelease(&e->mutex);
}
```

The attached is my first implementation.

Regards,
Jason Kim.

Attachments:

pg_stat_statements_with_timestamp_v1.patchapplication/octet-stream; name=pg_stat_statements_with_timestamp_v1.patchDownload
From 6d8f2d39af797e079bc5e8f8323d4081e4742d5e Mon Sep 17 00:00:00 2001
From: Jason Kim <dialogbox@gmail.com>
Date: Sun, 17 Jul 2016 15:14:39 +0900
Subject: [PATCH] I added columns to track timestamp of first executed and last
 execute. There are many use case of these informations. Without these, we
 have to reset whole stats to gather similar infomations.

---
 contrib/pg_stat_statements/Makefile                |  7 +--
 .../pg_stat_statements--1.4--1.5.sql               | 58 ++++++++++++++++++++++
 .../pg_stat_statements/pg_stat_statements--1.4.sql | 48 ------------------
 .../pg_stat_statements/pg_stat_statements--1.5.sql | 50 +++++++++++++++++++
 contrib/pg_stat_statements/pg_stat_statements.c    | 33 +++++++++++-
 .../pg_stat_statements/pg_stat_statements.control  |  2 +-
 6 files changed, 144 insertions(+), 54 deletions(-)
 create mode 100644 contrib/pg_stat_statements/pg_stat_statements--1.4--1.5.sql
 delete mode 100644 contrib/pg_stat_statements/pg_stat_statements--1.4.sql
 create mode 100644 contrib/pg_stat_statements/pg_stat_statements--1.5.sql

diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile
index ddcdb10..20aafaa 100644
--- a/contrib/pg_stat_statements/Makefile
+++ b/contrib/pg_stat_statements/Makefile
@@ -4,9 +4,10 @@ MODULE_big = pg_stat_statements
 OBJS = pg_stat_statements.o $(WIN32RES)
 
 EXTENSION = pg_stat_statements
-DATA = pg_stat_statements--1.4.sql pg_stat_statements--1.3--1.4.sql \
-	pg_stat_statements--1.2--1.3.sql pg_stat_statements--1.1--1.2.sql \
-	pg_stat_statements--1.0--1.1.sql pg_stat_statements--unpackaged--1.0.sql
+DATA = pg_stat_statements--1.5.sql pg_stat_statements--1.3--1.4.sql \
+	pg_stat_statements--1.3--1.4.sql pg_stat_statements--1.2--1.3.sql \
+	pg_stat_statements--1.1--1.2.sql pg_stat_statements--1.0--1.1.sql \
+	pg_stat_statements--unpackaged--1.0.sql
 PGFILEDESC = "pg_stat_statements - execution statistics of SQL statements"
 
 LDFLAGS_SL += $(filter -lm, $(LIBS))
diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.4--1.5.sql b/contrib/pg_stat_statements/pg_stat_statements--1.4--1.5.sql
new file mode 100644
index 0000000..c54a735
--- /dev/null
+++ b/contrib/pg_stat_statements/pg_stat_statements--1.4--1.5.sql
@@ -0,0 +1,58 @@
+/* contrib/pg_stat_statements/pg_stat_statements--1.4--1.5.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION pg_stat_statements UPDATE TO '1.5'" to load this file. \quit
+
+/* First we have to remove them from the extension */
+ALTER EXTENSION pg_stat_statements DROP VIEW pg_stat_statements;
+ALTER EXTENSION pg_stat_statements DROP FUNCTION pg_stat_statements(boolean);
+
+/* Then we can drop them */
+DROP VIEW pg_stat_statements;
+DROP FUNCTION pg_stat_statements(boolean);
+
+/* Now redefine */
+CREATE FUNCTION pg_stat_statements_reset()
+RETURNS void
+AS 'MODULE_PATHNAME'
+LANGUAGE C PARALLEL SAFE;
+
+CREATE FUNCTION pg_stat_statements(IN showtext boolean,
+    OUT userid oid,
+    OUT dbid oid,
+    OUT queryid bigint,
+    OUT query text,
+    OUT calls int8,
+    OUT total_time float8,
+    OUT min_time float8,
+    OUT max_time float8,
+    OUT mean_time float8,
+    OUT stddev_time float8,
+    OUT rows int8,
+    OUT shared_blks_hit int8,
+    OUT shared_blks_read int8,
+    OUT shared_blks_dirtied int8,
+    OUT shared_blks_written int8,
+    OUT local_blks_hit int8,
+    OUT local_blks_read int8,
+    OUT local_blks_dirtied int8,
+    OUT local_blks_written int8,
+    OUT temp_blks_read int8,
+    OUT temp_blks_written int8,
+    OUT blk_read_time float8,
+    OUT blk_write_time float8,
+    OUT created timestamp with time zone,
+    OUT last_updated timestamp with time zone
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'pg_stat_statements_1_5'
+LANGUAGE C STRICT VOLATILE PARALLEL SAFE;
+
+-- Register a view on the function for ease of use.
+CREATE VIEW pg_stat_statements AS
+  SELECT * FROM pg_stat_statements(true);
+
+GRANT SELECT ON pg_stat_statements TO PUBLIC;
+
+-- Don't want this to be available to non-superusers.
+REVOKE ALL ON FUNCTION pg_stat_statements_reset() FROM PUBLIC;
diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql b/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
deleted file mode 100644
index 58cdf60..0000000
--- a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
+++ /dev/null
@@ -1,48 +0,0 @@
-/* contrib/pg_stat_statements/pg_stat_statements--1.4.sql */
-
--- complain if script is sourced in psql, rather than via CREATE EXTENSION
-\echo Use "CREATE EXTENSION pg_stat_statements" to load this file. \quit
-
--- Register functions.
-CREATE FUNCTION pg_stat_statements_reset()
-RETURNS void
-AS 'MODULE_PATHNAME'
-LANGUAGE C PARALLEL SAFE;
-
-CREATE FUNCTION pg_stat_statements(IN showtext boolean,
-    OUT userid oid,
-    OUT dbid oid,
-    OUT queryid bigint,
-    OUT query text,
-    OUT calls int8,
-    OUT total_time float8,
-    OUT min_time float8,
-    OUT max_time float8,
-    OUT mean_time float8,
-    OUT stddev_time float8,
-    OUT rows int8,
-    OUT shared_blks_hit int8,
-    OUT shared_blks_read int8,
-    OUT shared_blks_dirtied int8,
-    OUT shared_blks_written int8,
-    OUT local_blks_hit int8,
-    OUT local_blks_read int8,
-    OUT local_blks_dirtied int8,
-    OUT local_blks_written int8,
-    OUT temp_blks_read int8,
-    OUT temp_blks_written int8,
-    OUT blk_read_time float8,
-    OUT blk_write_time float8
-)
-RETURNS SETOF record
-AS 'MODULE_PATHNAME', 'pg_stat_statements_1_3'
-LANGUAGE C STRICT VOLATILE PARALLEL SAFE;
-
--- Register a view on the function for ease of use.
-CREATE VIEW pg_stat_statements AS
-  SELECT * FROM pg_stat_statements(true);
-
-GRANT SELECT ON pg_stat_statements TO PUBLIC;
-
--- Don't want this to be available to non-superusers.
-REVOKE ALL ON FUNCTION pg_stat_statements_reset() FROM PUBLIC;
diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.5.sql b/contrib/pg_stat_statements/pg_stat_statements--1.5.sql
new file mode 100644
index 0000000..957706a
--- /dev/null
+++ b/contrib/pg_stat_statements/pg_stat_statements--1.5.sql
@@ -0,0 +1,50 @@
+/* contrib/pg_stat_statements/pg_stat_statements--1.5.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION pg_stat_statements" to load this file. \quit
+
+-- Register functions.
+CREATE FUNCTION pg_stat_statements_reset()
+RETURNS void
+AS 'MODULE_PATHNAME'
+LANGUAGE C PARALLEL SAFE;
+
+CREATE FUNCTION pg_stat_statements(IN showtext boolean,
+    OUT userid oid,
+    OUT dbid oid,
+    OUT queryid bigint,
+    OUT query text,
+    OUT calls int8,
+    OUT total_time float8,
+    OUT min_time float8,
+    OUT max_time float8,
+    OUT mean_time float8,
+    OUT stddev_time float8,
+    OUT rows int8,
+    OUT shared_blks_hit int8,
+    OUT shared_blks_read int8,
+    OUT shared_blks_dirtied int8,
+    OUT shared_blks_written int8,
+    OUT local_blks_hit int8,
+    OUT local_blks_read int8,
+    OUT local_blks_dirtied int8,
+    OUT local_blks_written int8,
+    OUT temp_blks_read int8,
+    OUT temp_blks_written int8,
+    OUT blk_read_time float8,
+    OUT blk_write_time float8,
+    OUT created timestamp with time zone,
+    OUT last_updated timestamp with time zone
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'pg_stat_statements_1_5'
+LANGUAGE C STRICT VOLATILE PARALLEL SAFE;
+
+-- Register a view on the function for ease of use.
+CREATE VIEW pg_stat_statements AS
+  SELECT * FROM pg_stat_statements(true);
+
+GRANT SELECT ON pg_stat_statements TO PUBLIC;
+
+-- Don't want this to be available to non-superusers.
+REVOKE ALL ON FUNCTION pg_stat_statements_reset() FROM PUBLIC;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 3d9b8e4..a6a017b 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -76,6 +76,7 @@
 #include "tcop/utility.h"
 #include "utils/builtins.h"
 #include "utils/memutils.h"
+#include "utils/timestamp.h"
 
 PG_MODULE_MAGIC;
 
@@ -117,7 +118,8 @@ typedef enum pgssVersion
 	PGSS_V1_0 = 0,
 	PGSS_V1_1,
 	PGSS_V1_2,
-	PGSS_V1_3
+	PGSS_V1_3,
+	PGSS_V1_5
 } pgssVersion;
 
 /*
@@ -156,6 +158,8 @@ typedef struct Counters
 	double		blk_read_time;	/* time spent reading, in msec */
 	double		blk_write_time; /* time spent writing, in msec */
 	double		usage;			/* usage factor */
+	TimestampTz		created;	/* timestamp of created time */
+	TimestampTz		last_updated;	/* timestamp of last updated */
 } Counters;
 
 /*
@@ -281,6 +285,7 @@ void		_PG_fini(void);
 PG_FUNCTION_INFO_V1(pg_stat_statements_reset);
 PG_FUNCTION_INFO_V1(pg_stat_statements_1_2);
 PG_FUNCTION_INFO_V1(pg_stat_statements_1_3);
+PG_FUNCTION_INFO_V1(pg_stat_statements_1_5);
 PG_FUNCTION_INFO_V1(pg_stat_statements);
 
 static void pgss_shmem_startup(void);
@@ -1251,6 +1256,7 @@ pgss_store(const char *query, uint32 queryId,
 		e->counters.blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_read_time);
 		e->counters.blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_write_time);
 		e->counters.usage += USAGE_EXEC(total_time);
+		e->counters.last_updated = GetCurrentTimestamp();
 
 		SpinLockRelease(&e->mutex);
 	}
@@ -1282,7 +1288,8 @@ pg_stat_statements_reset(PG_FUNCTION_ARGS)
 #define PG_STAT_STATEMENTS_COLS_V1_1	18
 #define PG_STAT_STATEMENTS_COLS_V1_2	19
 #define PG_STAT_STATEMENTS_COLS_V1_3	23
-#define PG_STAT_STATEMENTS_COLS			23		/* maximum of above */
+#define PG_STAT_STATEMENTS_COLS_V1_5	25
+#define PG_STAT_STATEMENTS_COLS			25		/* maximum of above */
 
 /*
  * Retrieve statement statistics.
@@ -1295,6 +1302,16 @@ pg_stat_statements_reset(PG_FUNCTION_ARGS)
  * function.  Unfortunately we weren't bright enough to do that for 1.1.
  */
 Datum
+pg_stat_statements_1_5(PG_FUNCTION_ARGS)
+{
+	bool		showtext = PG_GETARG_BOOL(0);
+
+	pg_stat_statements_internal(fcinfo, PGSS_V1_5, showtext);
+
+	return (Datum) 0;
+}
+
+Datum
 pg_stat_statements_1_3(PG_FUNCTION_ARGS)
 {
 	bool		showtext = PG_GETARG_BOOL(0);
@@ -1397,6 +1414,10 @@ pg_stat_statements_internal(FunctionCallInfo fcinfo,
 			if (api_version != PGSS_V1_3)
 				elog(ERROR, "incorrect number of output arguments");
 			break;
+		case PG_STAT_STATEMENTS_COLS_V1_5:
+			if (api_version != PGSS_V1_5)
+				elog(ERROR, "incorrect number of output arguments");
+			break;
 		default:
 			elog(ERROR, "incorrect number of output arguments");
 	}
@@ -1593,11 +1614,17 @@ pg_stat_statements_internal(FunctionCallInfo fcinfo,
 			values[i++] = Float8GetDatumFast(tmp.blk_read_time);
 			values[i++] = Float8GetDatumFast(tmp.blk_write_time);
 		}
+		if (api_version >= PGSS_V1_5)
+		{
+			values[i++] = Int64GetDatumFast(tmp.created);
+			values[i++] = Int64GetDatumFast(tmp.last_updated);
+		}
 
 		Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
 					 api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
 					 api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
 					 api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 :
+					 api_version == PGSS_V1_5 ? PG_STAT_STATEMENTS_COLS_V1_5 :
 					 -1 /* fail if you forget to update this assert */ ));
 
 		tuplestore_putvalues(tupstore, tupdesc, values, nulls);
@@ -1663,6 +1690,8 @@ entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding,
 
 		/* reset the statistics */
 		memset(&entry->counters, 0, sizeof(Counters));
+		/* set the created timestamp */
+    entry->counters.created = GetCurrentTimestamp();
 		/* set the appropriate initial usage count */
 		entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT;
 		/* re-initialize the mutex each time ... we assume no one using it */
diff --git a/contrib/pg_stat_statements/pg_stat_statements.control b/contrib/pg_stat_statements/pg_stat_statements.control
index 24038f5..193fcdf 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.control
+++ b/contrib/pg_stat_statements/pg_stat_statements.control
@@ -1,5 +1,5 @@
 # pg_stat_statements extension
 comment = 'track execution statistics of all SQL statements executed'
-default_version = '1.4'
+default_version = '1.5'
 module_pathname = '$libdir/pg_stat_statements'
 relocatable = true
-- 
2.7.4 (Apple Git-66)

#2Peter Geoghegan
pg@heroku.com
In reply to: Jun Cheol Gim (#1)
Re: [PROPOSAL] timestamp informations to pg_stat_statements

On Sun, Jul 17, 2016 at 12:22 AM, Jun Cheol Gim <dialogbox@gmail.com> wrote:

If we have timestamp of first and last executed, we can easily gather thess
informations and there are tons of more use cases.

-1 from me.

I think that this is the job of a tool that aggregates things from
pg_stat_statements. It's unfortunate that there isn't a good
third-party tool that does that, but there is nothing that prevents
it.

--
Peter Geoghegan

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#3Tom Lane
tgl@sss.pgh.pa.us
In reply to: Peter Geoghegan (#2)
Re: [PROPOSAL] timestamp informations to pg_stat_statements

Peter Geoghegan <pg@heroku.com> writes:

On Sun, Jul 17, 2016 at 12:22 AM, Jun Cheol Gim <dialogbox@gmail.com> wrote:

If we have timestamp of first and last executed, we can easily gather thess
informations and there are tons of more use cases.

-1 from me.

I think that this is the job of a tool that aggregates things from
pg_stat_statements. It's unfortunate that there isn't a good
third-party tool that does that, but there is nothing that prevents
it.

The concern I've got about this proposal is that the results get very
questionable as soon as we start dropping statement entries for lack
of space. last_executed would be okay, perhaps, but first_executed
not so much.

regards, tom lane

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#4Peter Geoghegan
pg@heroku.com
In reply to: Tom Lane (#3)
Re: [PROPOSAL] timestamp informations to pg_stat_statements

On Sun, Jul 17, 2016 at 4:15 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:

The concern I've got about this proposal is that the results get very
questionable as soon as we start dropping statement entries for lack
of space. last_executed would be okay, perhaps, but first_executed
not so much.

Agreed.

Also, for what it's worth, I should point out to Jun that
GetCurrentTimestamp() should definitely not be called when a spinlock
is held like that.

--
Peter Geoghegan

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#5Julien Rouhaud
julien.rouhaud@dalibo.com
In reply to: Peter Geoghegan (#2)
Re: [PROPOSAL] timestamp informations to pg_stat_statements

On 18/07/2016 01:06, Peter Geoghegan wrote:

On Sun, Jul 17, 2016 at 12:22 AM, Jun Cheol Gim <dialogbox@gmail.com> wrote:

If we have timestamp of first and last executed, we can easily gather thess
informations and there are tons of more use cases.

-1 from me.

I think that this is the job of a tool that aggregates things from
pg_stat_statements. It's unfortunate that there isn't a good
third-party tool that does that, but there is nothing that prevents
it.

FWIW there's https://github.com/dalibo/powa-archivist which does that,
assuming you're using a 9.4+ server.

--
Julien Rouhaud
http://dalibo.com - http://dalibo.org

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#6Jason Kim
dialogbox@gmail.com
In reply to: Peter Geoghegan (#4)
Re: [PROPOSAL] timestamp informations to pg_stat_statements

Hi guys,
Thank you for feedbacks.

I think that this is the job of a tool that aggregates things from
pg_stat_statements. It's unfortunate that there isn't a good
third-party tool that does that, but there is nothing that prevents
it.

Right. We can do this if we aggregate it frequently enough. However,
third-parties can do it better if we have more informations.
I think these are fundamental informations to strong third-parties could be.

The concern I've got about this proposal is that the results get very
questionable as soon as we start dropping statement entries for lack
of space. last_executed would be okay, perhaps, but first_executed
not so much.

If we set pg_stat_statements.max to large enough, there could be long
lived entries and short lived ones simultaneously. In this case, per call
statistics could be accurate but per seconds stats can not.
The idea of I named it as created and last_updated (not
first_executed and last_executed) was this. It represents timestamp of
stats are created and updated, so we can calculate per second stats with
simple math.

Also, for what it's worth, I should point out to Jun that
GetCurrentTimestamp() should definitely not be called when a spinlock
is held like that.

I moved it outside of spinlock.

@@ -1204,6 +1209,11 @@ pgss_store(const char *query, uint32 queryId,
*/
volatile pgssEntry *e = (volatile pgssEntry *) entry;

+   /*
+    * Read a timestamp before grab the spinlock
+    */
+   TimestampTz last_updated = GetCurrentTimestamp();
+
    SpinLockAcquire(&e->mutex);
    /* "Unstick" entry if it was previously sticky */
@@ -1251,6 +1261,7 @@ pgss_store(const char *query, uint32 queryId,
    e->counters.blk_read_time +=
INSTR_TIME_GET_MILLISEC(bufusage->blk_read_time);
    e->counters.blk_write_time +=
INSTR_TIME_GET_MILLISEC(bufusage->blk_write_time);
    e->counters.usage += USAGE_EXEC(total_time);
+   e->counters.last_updated = last_updated;

SpinLockRelease(&e->mutex);
}

pg_stat_statements_with_timestamp_v2.patch
<http://postgresql.nabble.com/file/n5912461/pg_stat_statements_with_timestamp_v2.patch&gt;

Regards,
Jason Kim.

--
View this message in context: http://postgresql.nabble.com/PROPOSAL-timestamp-informations-to-pg-stat-statements-tp5912306p5912461.html
Sent from the PostgreSQL - hackers mailing list archive at Nabble.com.

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#7Robert Haas
robertmhaas@gmail.com
In reply to: Tom Lane (#3)
Re: [PROPOSAL] timestamp informations to pg_stat_statements

On Sun, Jul 17, 2016 at 7:15 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:

Peter Geoghegan <pg@heroku.com> writes:

On Sun, Jul 17, 2016 at 12:22 AM, Jun Cheol Gim <dialogbox@gmail.com> wrote:

If we have timestamp of first and last executed, we can easily gather thess
informations and there are tons of more use cases.

-1 from me.

I think that this is the job of a tool that aggregates things from
pg_stat_statements. It's unfortunate that there isn't a good
third-party tool that does that, but there is nothing that prevents
it.

The concern I've got about this proposal is that the results get very
questionable as soon as we start dropping statement entries for lack
of space. last_executed would be okay, perhaps, but first_executed
not so much.

ISTM that last_executed is useful - you can then see for yourself
which of the statements that you see in the pg_stat_statements output
have been issued recently, and which are older. I mean, you could
also do that, as Peter says, with an additional tool that takes
periodic snapshots of the data and then figures out an approximate
last_executed time, but if you had this, you wouldn't need an
additional tool, at least not for simple cases. Better yet, the data
would be more exact. I dunno what's not to like about that, unless
we're worried that tracking it will incur too much overhead.

first_executed doesn't seem as useful as last_executed, but it isn't
useless either. It can't properly be read as the first time that
statement was ever executed, but it can be properly read as the amount
of time that has passed during which that statement has been executed
frequently enough to stay in the hash table, which is something that
someone might want to know.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#8legrand legrand
legrand_legrand@hotmail.com
In reply to: Robert Haas (#7)
Re: [PROPOSAL] timestamp informations to pg_stat_statements

+1
Having the time of first occurence of a statement is very usefull for
trouble shouting,
it permits for exemple to retrieve the order of operations in some complex
cases (and thoses informations aren't taken by any third party collecting
tool, that will only be able to provide a time range of occurence).

I thougth that pgss rows where removed randomly when max rows was reached,
wouldn't having last_executed information permit a kind of LRU removal ?

Regards
PAscal

--
Sent from: http://www.postgresql-archive.org/PostgreSQL-hackers-f1928748.html

#9Tomas Vondra
tomas.vondra@2ndquadrant.com
In reply to: legrand legrand (#8)
Re: [PROPOSAL] timestamp informations to pg_stat_statements

On 03/10/2018 04:43 PM, legrand legrand wrote:

+1
Having the time of first occurence of a statement is very usefull
for trouble shouting, it permits for exemple to retrieve the order of
operations in some complex cases (and thoses informations aren't
taken by any third party collecting tool, that will only be able to
provide a time range of occurence).

I really don't see how this would be useful in reconstructing order of
operations, particularly in complex cases where I'd expect the queries
to be executed repeatedly / interleaved in different ways. Furthermore,
it would only work for the very first execution of all statements, so
you would probably have to reset the stats over and over - which seems
to directly contradict the purpose of pg_stat_statements (aggregation of
data over longer periods of time).

So unfortunately this seems rather useless, and log_statements=all seems
like a much better / reliable approach to achieve that.

I also doubt it really allows computation of averages, e.g. queries per
second, because practical workloads usually have daily/weekly patterns
(and different queries may follow different patterns).

So I agree with Peter Geoghegan that tools regularly snapshotting
pg_stat_statements and processing that are a better approach. I've seen
a bunch of simple scripts doing just that, actually.

I thougth that pgss rows where removed randomly when max rows was
reached, wouldn't having last_executed information permit a kind of
LRU removal ?

No, the entries are not removed randomly. We track "usage" for each
entry (essentially +1 for each time the query got executed, with a decay
factor applied on each eviction (and we evict 5% at a time).

It's not immediately obvious why something based on time of the
first/last execution would be better than the current algorithm.

regards

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#10legrand legrand
legrand_legrand@hotmail.com
In reply to: Tomas Vondra (#9)
Re: [PROPOSAL] timestamp informations to pg_stat_statements

No, the entries are not removed randomly. We track "usage" for each
entry (essentially +1 for each time the query got executed, with a decay
factor applied on each eviction (and we evict 5% at a time).

OK I apologize, I hadn't looked in source code in details, and effectively
the "Usage" algorithm based on execution_time and calls will keep the
longest queries (and that's a good thing). In some cases (after an
application release or a postgres patch for exemple) we may be interested
in leastiests ones.

I'm still convinced that adding those kinds of informations
(with a planid as done in pg_stat_plans) would be an improvement.

Maybe because having used v$sql, first_load_time, last_active_time
and plan_hash_value for (too) many years).

It's always important to know when a new plan (good or bad) occurs, and pgss
could
give this kind of informations even without aggregation collector.

Last point (for developer), after testing this patch, in most cases when
calls=1: created and
last_updated values are identical, they don't even differ to reflect
execution (nor planing) duration. Is that a precision or coding problem ?

Regards
PAscal

--
Sent from: http://www.postgresql-archive.org/PostgreSQL-hackers-f1928748.html