Add session statistics to pg_stat_database
Here is a patch that adds the following to pg_stat_database:
- number of connections
- number of sessions that were not disconnected regularly
- total time spent in database sessions
- total time spent executing queries
- total idle in transaction time
This is useful to check if connection pooling is working.
It also helps to estimate the size of the connection pool
required to keep the database busy, which depends on the
percentage of the transaction time that is spent idling.
Yours,
Laurenz Albe
Attachments:
0001-Add-session-statistics-to-pg_stat_database.patchtext/x-patch; charset=UTF-8; name=0001-Add-session-statistics-to-pg_stat_database.patchDownload
From: Laurenz Albe <laurenz.albe@cybertec.at>
Date: Wed, 8 Jul 2020 13:12:42 +0200
Subject: [PATCH] Add session statistics to pg_stat_database
If "track_counts" is active, track the following per database:
- number of connections
- number of sessions that were not disconnected regularly
- total time spent in database sessions
- total time spent executing queries
- total idle in transaction time
This is useful to check if connection pooling is working.
It also helps to estimate the size of the connection pool
required to keep the database busy, which depends on the
percentage of the transaction time that is spent idling.
---
doc/src/sgml/monitoring.sgml | 46 +++++++++
src/backend/catalog/system_views.sql | 5 +
src/backend/postmaster/pgstat.c | 138 ++++++++++++++++++++++++++-
src/backend/tcop/postgres.c | 5 +
src/backend/utils/adt/pgstatfuncs.c | 78 +++++++++++++++
src/include/catalog/pg_proc.dat | 20 ++++
src/include/pgstat.h | 29 +++++-
src/test/regress/expected/rules.out | 5 +
8 files changed, 324 insertions(+), 2 deletions(-)
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index dfa9d0d641..da66808f02 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -3519,6 +3519,52 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
</para></entry>
</row>
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>session_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent in database sessions in this database, in milliseconds.
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>active_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent executing SQL statements in this database, in milliseconds.
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>idle_in_transaction_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent idling while in a transaction in this database, in milliseconds.
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>connections</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of connections established to this database.
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>aborted_sessions</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of database sessions to this database that did not end
+ with a regular client disconnection.
+ </para></entry>
+ </row>
+
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>stats_reset</structfield> <type>timestamp with time zone</type>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 5314e9348f..64a4e5f0d4 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -909,6 +909,11 @@ CREATE VIEW pg_stat_database AS
pg_stat_get_db_checksum_last_failure(D.oid) AS checksum_last_failure,
pg_stat_get_db_blk_read_time(D.oid) AS blk_read_time,
pg_stat_get_db_blk_write_time(D.oid) AS blk_write_time,
+ pg_stat_get_db_session_time(D.oid) AS session_time,
+ pg_stat_get_db_active_time(D.oid) AS active_time,
+ pg_stat_get_db_idle_in_transaction_time(D.oid) AS idle_in_transaction_time,
+ pg_stat_get_db_connections(D.oid) AS connections,
+ pg_stat_get_db_aborted_sessions(D.oid) AS aborted_sessions,
pg_stat_get_db_stat_reset_time(D.oid) AS stats_reset
FROM (
SELECT 0 AS oid, NULL::name AS datname
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index c022597bc0..7b62028358 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -247,6 +247,11 @@ static int pgStatXactCommit = 0;
static int pgStatXactRollback = 0;
PgStat_Counter pgStatBlockReadTime = 0;
PgStat_Counter pgStatBlockWriteTime = 0;
+static TimestampTz pgStatActiveStart = DT_NOBEGIN;
+static PgStat_Counter pgStatActiveTime = 0;
+static TimestampTz pgStatTransactionIdleStart = DT_NOBEGIN;
+static PgStat_Counter pgStatTransactionIdleTime = 0;
+bool pgStatSessionDisconnected = false;
/* Record that's written to 2PC state file when pgstat state is persisted */
typedef struct TwoPhasePgStatRecord
@@ -326,6 +331,7 @@ static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
static void pgstat_send_funcstats(void);
static void pgstat_send_slru(void);
static HTAB *pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid);
+static void pgstat_send_connstats(bool force);
static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared);
@@ -359,6 +365,7 @@ static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len);
static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len);
static void pgstat_recv_checksum_failure(PgStat_MsgChecksumFailure *msg, int len);
+static void pgstat_recv_connection(PgStat_MsgConn *msg, int len);
static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len);
/* ------------------------------------------------------------
@@ -851,7 +858,7 @@ pgstat_report_stat(bool force)
/* Don't expend a clock check if nothing to do */
if ((pgStatTabList == NULL || pgStatTabList->tsa_used == 0) &&
pgStatXactCommit == 0 && pgStatXactRollback == 0 &&
- !have_function_stats)
+ !have_function_stats && !force)
return;
/*
@@ -938,6 +945,10 @@ pgstat_report_stat(bool force)
/* Now, send function statistics */
pgstat_send_funcstats();
+ /* for backends, send connection statistics */
+ if (MyBackendType == B_BACKEND)
+ pgstat_send_connstats(force);
+
/* Finally send SLRU statistics */
pgstat_send_slru();
}
@@ -1327,6 +1338,54 @@ pgstat_drop_relation(Oid relid)
#endif /* NOT_USED */
+/* ----------
+ * pgstat_send_connstats() -
+ *
+ * Tell the collector about session statistics.
+ * The parameter "force" will be true when the session ends,
+ * so we report total session time and count only if it is true.
+ * ----------
+ */
+static void
+pgstat_send_connstats(bool force)
+{
+ PgStat_MsgConn msg;
+
+ if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
+ return;
+
+ pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_CONNECTION);
+ msg.m_databaseid = MyDatabaseId;
+
+ if (force)
+ {
+ long secs;
+ int usecs;
+
+ TimestampDifference(MyStartTimestamp,
+ GetCurrentTimestamp(),
+ &secs, &usecs);
+
+ msg.m_count = 1;
+ msg.m_session_time = secs * 1000000 + usecs;
+ msg.m_aborted = pgStatSessionDisconnected ? 0 : 1;
+ }
+ else
+ {
+ msg.m_count = 0;
+ msg.m_session_time = 0;
+ msg.m_aborted = 0;
+ }
+
+ msg.m_active_time = pgStatActiveTime;
+ pgStatActiveTime = 0;
+ msg.m_idle_in_xact_time = pgStatTransactionIdleTime;
+ pgStatTransactionIdleTime = 0;
+
+ pgstat_send(&msg, sizeof(PgStat_MsgConn));
+}
+
+
/* ----------
* pgstat_reset_counters() -
*
@@ -3197,6 +3256,54 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
}
PGSTAT_END_WRITE_ACTIVITY(beentry);
+
+ /*
+ * If the state has changed to "active" or "idle in transaction",
+ * record the start time.
+ * If the state has changed away from these, calculate the duration.
+ */
+ if (state == STATE_RUNNING)
+ {
+ if (pgStatActiveStart == DT_NOBEGIN)
+ pgStatActiveStart = current_timestamp;
+ }
+ else
+ {
+ if (pgStatActiveStart != DT_NOBEGIN)
+ {
+ long secs;
+ int usecs;
+
+ TimestampDifference(pgStatActiveStart,
+ GetCurrentTimestamp(),
+ &secs, &usecs);
+
+ pgStatActiveTime += secs * 1000000 + usecs;
+ pgStatActiveStart = DT_NOBEGIN;
+ }
+ }
+
+ if (state == STATE_IDLEINTRANSACTION ||
+ state == STATE_IDLEINTRANSACTION_ABORTED)
+ {
+ if (pgStatTransactionIdleStart == DT_NOBEGIN)
+ pgStatTransactionIdleStart = current_timestamp;
+ }
+ else
+ {
+ if (pgStatTransactionIdleStart != DT_NOBEGIN)
+ {
+ long secs;
+ int usecs;
+
+ TimestampDifference(pgStatTransactionIdleStart,
+ GetCurrentTimestamp(),
+ &secs, &usecs);
+
+ pgStatTransactionIdleTime += secs * 1000000 + usecs;
+ pgStatTransactionIdleStart = DT_NOBEGIN;
+ }
+ }
}
/*-----------
@@ -4665,6 +4772,10 @@ PgstatCollectorMain(int argc, char *argv[])
len);
break;
+ case PGSTAT_MTYPE_CONNECTION:
+ pgstat_recv_connection(&msg.msg_conn, len);
+ break;
+
default:
break;
}
@@ -4743,6 +4854,11 @@ reset_dbentry_counters(PgStat_StatDBEntry *dbentry)
dbentry->last_checksum_failure = 0;
dbentry->n_block_read_time = 0;
dbentry->n_block_write_time = 0;
+ dbentry->n_connections = 0;
+ dbentry->n_session_time = 0;
+ dbentry->n_active_time = 0;
+ dbentry->n_idle_in_xact_time = 0;
+ dbentry->n_aborted = 0;
dbentry->stat_reset_timestamp = GetCurrentTimestamp();
dbentry->stats_timestamp = 0;
@@ -6494,6 +6610,26 @@ pgstat_recv_checksum_failure(PgStat_MsgChecksumFailure *msg, int len)
dbentry->last_checksum_failure = msg->m_failure_time;
}
+/* ----------
+ * pgstat_recv_connection() -
+ *
+ * Process connection information.
+ * ----------
+ */
+static void
+pgstat_recv_connection(PgStat_MsgConn *msg, int len)
+{
+ PgStat_StatDBEntry *dbentry;
+
+ dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
+
+ dbentry->n_connections += msg->m_count;
+ dbentry->n_session_time += msg->m_session_time;
+ dbentry->n_active_time += msg->m_active_time;
+ dbentry->n_idle_in_xact_time += msg->m_idle_in_xact_time;
+ dbentry->n_aborted += msg->m_aborted;
+}
+
/* ----------
* pgstat_recv_tempfile() -
*
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index c9424f167c..79d3d236d7 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -4504,6 +4504,11 @@ PostgresMain(int argc, char *argv[],
* perform normal shutdown.
*/
case 'X':
+ /* report as normal client disconnection */
+ pgStatSessionDisconnected = true;
+
+ /* FALLTHROUGH */
+
case EOF:
/*
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 2aff739466..d1cee7ecee 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1623,6 +1623,84 @@ pg_stat_get_db_blk_write_time(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(result);
}
+Datum
+pg_stat_get_db_session_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = ((double) dbentry->n_session_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_active_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = ((double) dbentry->n_active_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_idle_in_transaction_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = ((double) dbentry->n_idle_in_xact_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_connections(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_connections);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_aborted_sessions(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_aborted);
+
+ PG_RETURN_INT64(result);
+}
+
Datum
pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS)
{
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 38295aca48..ac3f9b87d0 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5416,6 +5416,26 @@
proname => 'pg_stat_get_db_blk_write_time', provolatile => 's',
proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
prosrc => 'pg_stat_get_db_blk_write_time' },
+{ oid => '9575', descr => 'statistics: session time, in seconds',
+ proname => 'pg_stat_get_db_session_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_session_time' },
+{ oid => '9576', descr => 'statistics: session active time, in seconds',
+ proname => 'pg_stat_get_db_active_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_active_time' },
+{ oid => '9577', descr => 'statistics: session idle in transaction time, in seconds',
+ proname => 'pg_stat_get_db_idle_in_transaction_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_idle_in_transaction_time' },
+{ oid => '9578', descr => 'statistics: number of connections',
+ proname => 'pg_stat_get_db_connections', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_connections' },
+{ oid => '9579', descr => 'statistics: number of aborted sessions',
+ proname => 'pg_stat_get_db_aborted_sessions', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_aborted_sessions' },
{ oid => '3195', descr => 'statistics: information about WAL archiver',
proname => 'pg_stat_get_archiver', proisstrict => 'f', provolatile => 's',
proparallel => 'r', prorettype => 'record', proargtypes => '',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 1387201382..ff2f6ac17e 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -67,7 +67,8 @@ typedef enum StatMsgType
PGSTAT_MTYPE_RECOVERYCONFLICT,
PGSTAT_MTYPE_TEMPFILE,
PGSTAT_MTYPE_DEADLOCK,
- PGSTAT_MTYPE_CHECKSUMFAILURE
+ PGSTAT_MTYPE_CHECKSUMFAILURE,
+ PGSTAT_MTYPE_CONNECTION
} StatMsgType;
/* ----------
@@ -574,6 +575,21 @@ typedef struct PgStat_MsgChecksumFailure
TimestampTz m_failure_time;
} PgStat_MsgChecksumFailure;
+/* ----------
+ * PgStat_MsgConn Sent by pgstat_connection to update connection statistics.
+ * ----------
+ */
+typedef struct PgStat_MsgConn
+{
+ PgStat_MsgHdr m_hdr;
+ Oid m_databaseid;
+ PgStat_Counter m_count;
+ PgStat_Counter m_session_time;
+ PgStat_Counter m_active_time;
+ PgStat_Counter m_idle_in_xact_time;
+ PgStat_Counter m_aborted;
+} PgStat_MsgConn;
+
/* ----------
* PgStat_Msg Union over all possible messages.
@@ -603,6 +619,7 @@ typedef union PgStat_Msg
PgStat_MsgDeadlock msg_deadlock;
PgStat_MsgTempFile msg_tempfile;
PgStat_MsgChecksumFailure msg_checksumfailure;
+ PgStat_MsgConn msg_conn;
} PgStat_Msg;
@@ -645,6 +662,11 @@ typedef struct PgStat_StatDBEntry
TimestampTz last_checksum_failure;
PgStat_Counter n_block_read_time; /* times in microseconds */
PgStat_Counter n_block_write_time;
+ PgStat_Counter n_connections;
+ PgStat_Counter n_session_time;
+ PgStat_Counter n_active_time;
+ PgStat_Counter n_idle_in_xact_time;
+ PgStat_Counter n_aborted;
TimestampTz stat_reset_timestamp;
TimestampTz stats_timestamp; /* time of db stats file update */
@@ -1266,6 +1288,11 @@ extern PgStat_MsgBgWriter BgWriterStats;
extern PgStat_Counter pgStatBlockReadTime;
extern PgStat_Counter pgStatBlockWriteTime;
+/*
+ * Updated in PostgresMain upon disconnect.
+ */
+extern bool pgStatSessionDisconnected;
+
/* ----------
* Functions called from postmaster
* ----------
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index b813e32215..6a725692a2 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1836,6 +1836,11 @@ pg_stat_database| SELECT d.oid AS datid,
pg_stat_get_db_checksum_last_failure(d.oid) AS checksum_last_failure,
pg_stat_get_db_blk_read_time(d.oid) AS blk_read_time,
pg_stat_get_db_blk_write_time(d.oid) AS blk_write_time,
+ pg_stat_get_db_session_time(d.oid) AS session_time,
+ pg_stat_get_db_active_time(d.oid) AS active_time,
+ pg_stat_get_db_idle_in_transaction_time(d.oid) AS idle_in_transaction_time,
+ pg_stat_get_db_connections(d.oid) AS connections,
+ pg_stat_get_db_aborted_sessions(d.oid) AS aborted_sessions,
pg_stat_get_db_stat_reset_time(d.oid) AS stats_reset
FROM ( SELECT 0 AS oid,
NULL::name AS datname
--
2.21.3
On Wed, Jul 8, 2020 at 4:17 PM Laurenz Albe <laurenz.albe@cybertec.at>
wrote:
Here is a patch that adds the following to pg_stat_database:
- number of connections
Is it expected behaviour to not count idle connections? The connection is
included after it is aborted but not while it was idle.
- number of sessions that were not disconnected regularly
- total time spent in database sessions
- total time spent executing queries
- total idle in transaction timeThis is useful to check if connection pooling is working.
It also helps to estimate the size of the connection pool
required to keep the database busy, which depends on the
percentage of the transaction time that is spent idling.Yours,
Laurenz Albe
--
Highgo Software (Canada/China/Pakistan)
URL : http://www.highgo.ca
ADDR: 10318 WHALLEY BLVD, Surrey, BC
EMAIL: mailto: ahsan.hadi@highgo.ca
On Thu, 2020-07-23 at 18:16 +0500, Ahsan Hadi wrote:
On Wed, Jul 8, 2020 at 4:17 PM Laurenz Albe <laurenz.albe@cybertec.at> wrote:
Here is a patch that adds the following to pg_stat_database:
- number of connectionsIs it expected behaviour to not count idle connections? The connection is included after it is aborted but not while it was idle.
Thanks for looking.
Currently, the patch counts connections when they close.
I could change the behavior that they are counted immediately.
Yours,
Laurenz Albe
On Tue, 2020-08-11 at 13:53 +0200, I wrote:
On Thu, 2020-07-23 at 18:16 +0500, Ahsan Hadi wrote:
On Wed, Jul 8, 2020 at 4:17 PM Laurenz Albe <laurenz.albe@cybertec.at> wrote:
Here is a patch that adds the following to pg_stat_database:
- number of connectionsIs it expected behaviour to not count idle connections? The connection is included after it is aborted but not while it was idle.
Currently, the patch counts connections when they close.
I could change the behavior that they are counted immediately.
I have changed the code so that connections are counted immediately.
Attached is a new version.
Yours,
Laurenz Albe
Attachments:
0001-Add-session-statistics-to-pg_stat_database.v2.patchtext/x-patch; charset=UTF-8; name=0001-Add-session-statistics-to-pg_stat_database.v2.patchDownload
From 6d9bfbd682a9f4723f030fdc461f731175f55f44 Mon Sep 17 00:00:00 2001
From: Laurenz Albe <laurenz.albe@cybertec.at>
Date: Fri, 4 Sep 2020 17:30:24 +0200
Subject: [PATCH] Add session statistics to pg_stat_database
If "track_counts" is active, track the following per database:
- number of connections
- number of sessions that were not disconnected regularly
- total time spent in database sessions
- total time spent executing queries
- total idle in transaction time
This is useful to check if connection pooling is working.
It also helps to estimate the size of the connection pool
required to keep the database busy, which depends on the
percentage of the transaction time that is spent idling.
---
doc/src/sgml/monitoring.sgml | 46 +++++++++
src/backend/catalog/system_views.sql | 5 +
src/backend/postmaster/pgstat.c | 146 ++++++++++++++++++++++++++-
src/backend/tcop/postgres.c | 5 +
src/backend/utils/adt/pgstatfuncs.c | 78 ++++++++++++++
src/include/catalog/pg_proc.dat | 20 ++++
src/include/pgstat.h | 29 +++++-
src/test/regress/expected/rules.out | 5 +
8 files changed, 332 insertions(+), 2 deletions(-)
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 673a0e73e4..aa5e22d213 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -3514,6 +3514,52 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
</para></entry>
</row>
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>session_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent in database sessions in this database, in milliseconds.
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>active_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent executing SQL statements in this database, in milliseconds.
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>idle_in_transaction_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent idling while in a transaction in this database, in milliseconds.
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>connections</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of connections established to this database.
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>aborted_sessions</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of database sessions to this database that did not end
+ with a regular client disconnection.
+ </para></entry>
+ </row>
+
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>stats_reset</structfield> <type>timestamp with time zone</type>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index ed4f3f142d..d8b28c7600 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -912,6 +912,11 @@ CREATE VIEW pg_stat_database AS
pg_stat_get_db_checksum_last_failure(D.oid) AS checksum_last_failure,
pg_stat_get_db_blk_read_time(D.oid) AS blk_read_time,
pg_stat_get_db_blk_write_time(D.oid) AS blk_write_time,
+ pg_stat_get_db_session_time(D.oid) AS session_time,
+ pg_stat_get_db_active_time(D.oid) AS active_time,
+ pg_stat_get_db_idle_in_transaction_time(D.oid) AS idle_in_transaction_time,
+ pg_stat_get_db_connections(D.oid) AS connections,
+ pg_stat_get_db_aborted_sessions(D.oid) AS aborted_sessions,
pg_stat_get_db_stat_reset_time(D.oid) AS stats_reset
FROM (
SELECT 0 AS oid, NULL::name AS datname
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 5f4b168fd1..12a7543554 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -247,6 +247,12 @@ static int pgStatXactCommit = 0;
static int pgStatXactRollback = 0;
PgStat_Counter pgStatBlockReadTime = 0;
PgStat_Counter pgStatBlockWriteTime = 0;
+static TimestampTz pgStatActiveStart = DT_NOBEGIN;
+static PgStat_Counter pgStatActiveTime = 0;
+static TimestampTz pgStatTransactionIdleStart = DT_NOBEGIN;
+static PgStat_Counter pgStatTransactionIdleTime = 0;
+static bool pgStatSessionReported = false;
+bool pgStatSessionDisconnected = false;
/* Record that's written to 2PC state file when pgstat state is persisted */
typedef struct TwoPhasePgStatRecord
@@ -326,6 +332,7 @@ static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
static void pgstat_send_funcstats(void);
static void pgstat_send_slru(void);
static HTAB *pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid);
+static void pgstat_send_connstats(bool force);
static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared);
@@ -359,6 +366,7 @@ static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len);
static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len);
static void pgstat_recv_checksum_failure(PgStat_MsgChecksumFailure *msg, int len);
+static void pgstat_recv_connection(PgStat_MsgConn *msg, int len);
static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len);
/* ------------------------------------------------------------
@@ -851,7 +859,7 @@ pgstat_report_stat(bool force)
/* Don't expend a clock check if nothing to do */
if ((pgStatTabList == NULL || pgStatTabList->tsa_used == 0) &&
pgStatXactCommit == 0 && pgStatXactRollback == 0 &&
- !have_function_stats)
+ !have_function_stats && !force)
return;
/*
@@ -938,6 +946,10 @@ pgstat_report_stat(bool force)
/* Now, send function statistics */
pgstat_send_funcstats();
+ /* for backends, send connection statistics */
+ if (MyBackendType == B_BACKEND)
+ pgstat_send_connstats(force);
+
/* Finally send SLRU statistics */
pgstat_send_slru();
}
@@ -1327,6 +1339,61 @@ pgstat_drop_relation(Oid relid)
#endif /* NOT_USED */
+/* ----------
+ * pgstat_send_connstats() -
+ *
+ * Tell the collector about session statistics.
+ * The parameter "force" will be true when the session ends,
+ * so we report total session time only if it is true.
+ * ----------
+ */
+static void
+pgstat_send_connstats(bool force)
+{
+ PgStat_MsgConn msg;
+
+ if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
+ return;
+
+ pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_CONNECTION);
+ msg.m_databaseid = MyDatabaseId;
+
+ if (force)
+ {
+ long secs;
+ int usecs;
+
+ TimestampDifference(MyStartTimestamp,
+ GetCurrentTimestamp(),
+ &secs, &usecs);
+
+ msg.m_session_time = secs * 1000000 + usecs;
+ msg.m_aborted = pgStatSessionDisconnected ? 0 : 1;
+ }
+ else
+ {
+ msg.m_session_time = 0;
+ msg.m_aborted = 0;
+ }
+
+ msg.m_active_time = pgStatActiveTime;
+ pgStatActiveTime = 0;
+ msg.m_idle_in_xact_time = pgStatTransactionIdleTime;
+ pgStatTransactionIdleTime = 0;
+
+ /* report a new session only the first time */
+ if (pgStatSessionReported)
+ msg.m_count = 0;
+ else
+ {
+ msg.m_count = 1;
+ pgStatSessionReported = true;
+ }
+
+ pgstat_send(&msg, sizeof(PgStat_MsgConn));
+}
+
+
/* ----------
* pgstat_reset_counters() -
*
@@ -3197,6 +3264,54 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
}
PGSTAT_END_WRITE_ACTIVITY(beentry);
+
+ /*
+ * If the state has changed to "active" or "idle in transaction",
+ * record the start time.
+ * If the state has changed away from these, calculate the duration.
+ */
+ if (state == STATE_RUNNING)
+ {
+ if (pgStatActiveStart == DT_NOBEGIN)
+ pgStatActiveStart = current_timestamp;
+ }
+ else
+ {
+ if (pgStatActiveStart != DT_NOBEGIN)
+ {
+ long secs;
+ int usecs;
+
+ TimestampDifference(pgStatActiveStart,
+ GetCurrentTimestamp(),
+ &secs, &usecs);
+
+ pgStatActiveTime += secs * 1000000 + usecs;
+ pgStatActiveStart = DT_NOBEGIN;
+ }
+ }
+
+ if (state == STATE_IDLEINTRANSACTION ||
+ state == STATE_IDLEINTRANSACTION_ABORTED)
+ {
+ if (pgStatTransactionIdleStart == DT_NOBEGIN)
+ pgStatTransactionIdleStart = current_timestamp;
+ }
+ else
+ {
+ if (pgStatTransactionIdleStart != DT_NOBEGIN)
+ {
+ long secs;
+ int usecs;
+
+ TimestampDifference(pgStatTransactionIdleStart,
+ GetCurrentTimestamp(),
+ &secs, &usecs);
+
+ pgStatTransactionIdleTime += secs * 1000000 + usecs;
+ pgStatTransactionIdleStart = DT_NOBEGIN;
+ }
+ }
}
/*-----------
@@ -4688,6 +4803,10 @@ PgstatCollectorMain(int argc, char *argv[])
len);
break;
+ case PGSTAT_MTYPE_CONNECTION:
+ pgstat_recv_connection(&msg.msg_conn, len);
+ break;
+
default:
break;
}
@@ -4762,6 +4881,11 @@ reset_dbentry_counters(PgStat_StatDBEntry *dbentry)
dbentry->last_checksum_failure = 0;
dbentry->n_block_read_time = 0;
dbentry->n_block_write_time = 0;
+ dbentry->n_connections = 0;
+ dbentry->n_session_time = 0;
+ dbentry->n_active_time = 0;
+ dbentry->n_idle_in_xact_time = 0;
+ dbentry->n_aborted = 0;
dbentry->stat_reset_timestamp = GetCurrentTimestamp();
dbentry->stats_timestamp = 0;
@@ -6513,6 +6637,26 @@ pgstat_recv_checksum_failure(PgStat_MsgChecksumFailure *msg, int len)
dbentry->last_checksum_failure = msg->m_failure_time;
}
+/* ----------
+ * pgstat_recv_connection() -
+ *
+ * Process connection information.
+ * ----------
+ */
+static void
+pgstat_recv_connection(PgStat_MsgConn *msg, int len)
+{
+ PgStat_StatDBEntry *dbentry;
+
+ dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
+
+ dbentry->n_connections += msg->m_count;
+ dbentry->n_session_time += msg->m_session_time;
+ dbentry->n_active_time += msg->m_active_time;
+ dbentry->n_idle_in_xact_time += msg->m_idle_in_xact_time;
+ dbentry->n_aborted += msg->m_aborted;
+}
+
/* ----------
* pgstat_recv_tempfile() -
*
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index c9424f167c..79d3d236d7 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -4504,6 +4504,11 @@ PostgresMain(int argc, char *argv[],
* perform normal shutdown.
*/
case 'X':
+ /* report as normal client disconnection */
+ pgStatSessionDisconnected = true;
+
+ /* FALLTHROUGH */
+
case EOF:
/*
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 95738a4e34..02c659d196 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1629,6 +1629,84 @@ pg_stat_get_db_blk_write_time(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(result);
}
+Datum
+pg_stat_get_db_session_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = ((double) dbentry->n_session_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_active_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = ((double) dbentry->n_active_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_idle_in_transaction_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = ((double) dbentry->n_idle_in_xact_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_connections(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_connections);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_aborted_sessions(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_aborted);
+
+ PG_RETURN_INT64(result);
+}
+
Datum
pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS)
{
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 687509ba92..4724755dd0 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5425,6 +5425,26 @@
proname => 'pg_stat_get_db_blk_write_time', provolatile => 's',
proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
prosrc => 'pg_stat_get_db_blk_write_time' },
+{ oid => '9575', descr => 'statistics: session time, in seconds',
+ proname => 'pg_stat_get_db_session_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_session_time' },
+{ oid => '9576', descr => 'statistics: session active time, in seconds',
+ proname => 'pg_stat_get_db_active_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_active_time' },
+{ oid => '9577', descr => 'statistics: session idle in transaction time, in seconds',
+ proname => 'pg_stat_get_db_idle_in_transaction_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_idle_in_transaction_time' },
+{ oid => '9578', descr => 'statistics: number of connections',
+ proname => 'pg_stat_get_db_connections', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_connections' },
+{ oid => '9579', descr => 'statistics: number of aborted sessions',
+ proname => 'pg_stat_get_db_aborted_sessions', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_aborted_sessions' },
{ oid => '3195', descr => 'statistics: information about WAL archiver',
proname => 'pg_stat_get_archiver', proisstrict => 'f', provolatile => 's',
proparallel => 'r', prorettype => 'record', proargtypes => '',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 0dfbac46b4..91b32ced48 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -67,7 +67,8 @@ typedef enum StatMsgType
PGSTAT_MTYPE_RECOVERYCONFLICT,
PGSTAT_MTYPE_TEMPFILE,
PGSTAT_MTYPE_DEADLOCK,
- PGSTAT_MTYPE_CHECKSUMFAILURE
+ PGSTAT_MTYPE_CHECKSUMFAILURE,
+ PGSTAT_MTYPE_CONNECTION
} StatMsgType;
/* ----------
@@ -574,6 +575,21 @@ typedef struct PgStat_MsgChecksumFailure
TimestampTz m_failure_time;
} PgStat_MsgChecksumFailure;
+/* ----------
+ * PgStat_MsgConn Sent by pgstat_connection to update connection statistics.
+ * ----------
+ */
+typedef struct PgStat_MsgConn
+{
+ PgStat_MsgHdr m_hdr;
+ Oid m_databaseid;
+ PgStat_Counter m_count;
+ PgStat_Counter m_session_time;
+ PgStat_Counter m_active_time;
+ PgStat_Counter m_idle_in_xact_time;
+ PgStat_Counter m_aborted;
+} PgStat_MsgConn;
+
/* ----------
* PgStat_Msg Union over all possible messages.
@@ -603,6 +619,7 @@ typedef union PgStat_Msg
PgStat_MsgDeadlock msg_deadlock;
PgStat_MsgTempFile msg_tempfile;
PgStat_MsgChecksumFailure msg_checksumfailure;
+ PgStat_MsgConn msg_conn;
} PgStat_Msg;
@@ -645,6 +662,11 @@ typedef struct PgStat_StatDBEntry
TimestampTz last_checksum_failure;
PgStat_Counter n_block_read_time; /* times in microseconds */
PgStat_Counter n_block_write_time;
+ PgStat_Counter n_connections;
+ PgStat_Counter n_session_time;
+ PgStat_Counter n_active_time;
+ PgStat_Counter n_idle_in_xact_time;
+ PgStat_Counter n_aborted;
TimestampTz stat_reset_timestamp;
TimestampTz stats_timestamp; /* time of db stats file update */
@@ -1271,6 +1293,11 @@ extern PgStat_MsgBgWriter BgWriterStats;
extern PgStat_Counter pgStatBlockReadTime;
extern PgStat_Counter pgStatBlockWriteTime;
+/*
+ * Updated in PostgresMain upon disconnect.
+ */
+extern bool pgStatSessionDisconnected;
+
/* ----------
* Functions called from postmaster
* ----------
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 2a18dc423e..7b07442d30 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1848,6 +1848,11 @@ pg_stat_database| SELECT d.oid AS datid,
pg_stat_get_db_checksum_last_failure(d.oid) AS checksum_last_failure,
pg_stat_get_db_blk_read_time(d.oid) AS blk_read_time,
pg_stat_get_db_blk_write_time(d.oid) AS blk_write_time,
+ pg_stat_get_db_session_time(d.oid) AS session_time,
+ pg_stat_get_db_active_time(d.oid) AS active_time,
+ pg_stat_get_db_idle_in_transaction_time(d.oid) AS idle_in_transaction_time,
+ pg_stat_get_db_connections(d.oid) AS connections,
+ pg_stat_get_db_aborted_sessions(d.oid) AS aborted_sessions,
pg_stat_get_db_stat_reset_time(d.oid) AS stats_reset
FROM ( SELECT 0 AS oid,
NULL::name AS datname
--
2.26.2
Hello Laurenz,
Thanks for submitting this! Please find my feedback below.
* Are we trying to capture ONLY client initiated disconnects in
m_aborted (we are not handling other disconnects by not accounting for
EOF..like if psql was killed)? If yes, why?
* pgstat_send_connstats(): How about renaming the "force" argument to
"disconnected"?
*
static TimestampTz pgStatActiveStart = DT_NOBEGIN;
static PgStat_Counter pgStatActiveTime = 0;
static TimestampTz pgStatTransactionIdleStart = DT_NOBEGIN;
static PgStat_Counter pgStatTransactionIdleTime = 0;
static bool pgStatSessionReported = false;
bool pgStatSessionDisconnected = false;
I think we can house all of these globals inside PgBackendStatus and can
follow the protocol for reading/writing fields in PgBackendStatus.
Refer: PGSTAT_{BEGIN|END}_WRITE_ACTIVITY
Also, some of these fields are not required:
I don't think we need pgStatActiveStart and pgStatTransactionIdleStart -
instead of these two we could use
PgBackendStatus.st_state_start_timestamp which marks the beginning TS of
the backend's current state (st_state). We can look at that field along
with the current and to-be-transitioned-to states inside
pgstat_report_activity() when there is a transition away from
STATE_RUNNING, STATE_IDLEINTRANSACTION or
STATE_IDLEINTRANSACTION_ABORTED, in order to update pgStatActiveTime and
pgStatTransactionIdleTime. We would also need to update those counters
on disconnect/PGSTAT_STAT_INTERVAL timeout if the backend's current
state was STATE_RUNNING, STATE_IDLEINTRANSACTION or
STATE_IDLEINTRANSACTION_ABORTED (in pgstat_send_connstats())
pgStatSessionDisconnected is not required as it can be determined if a
session has been disconnected by looking at the force argument to
pgstat_report_stat() [unless we would want to distinguish between
client-initiated disconnects, which I am not sure why, as I have
brought up above].
pgStatSessionReported is not required. We can glean this information by
checking if the function local static last_report in
pgstat_report_stat() is 0 and passing this on as another param
"first_report" to pgstat_send_connstats().
* PGSTAT_FILE_FORMAT_ID needs to be updated when a stats collector data
structure changes and we had a change in PgStat_StatDBEntry.
* We can directly use PgBackendStatus.st_proc_start_timestamp for
calculating m_session_time. We can also choose to report session uptime
even when the report is for the not-disconnect case
(PGSTAT_STAT_INTERVAL elapsed). No reason why not. Then we would need to
pass in the value of last_report to pgstat_send_connstats() -> calculate
m_session_time to be number of time units from
PgBackendStatus.st_proc_start_timestamp for the first report and then
number of time units from the last_report for all subsequent reports.
* We would need to bump the catalog version since we have made
changes to system views. Refer: #define CATALOG_VERSION_NO
Regards,
Soumyadeep (VMware)
On Thu, 2020-09-24 at 14:38 -0700, Soumyadeep Chakraborty wrote:
Thanks for submitting this! Please find my feedback below.
Thanks for the thorough review.
Before I update the patch, I have a few comments and questions.
* Are we trying to capture ONLY client initiated disconnects in
m_aborted (we are not handling other disconnects by not accounting for
EOF..like if psql was killed)? If yes, why?
I thought it was interesting to know how many database sessions are
ended regularly as opposed to ones that get killed or end by unexpected
client death.
* pgstat_send_connstats(): How about renaming the "force" argument to
"disconnected"?
Yes, that might be better. I'll do that.
*
static TimestampTz pgStatActiveStart = DT_NOBEGIN;
static PgStat_Counter pgStatActiveTime = 0;
static TimestampTz pgStatTransactionIdleStart = DT_NOBEGIN;
static PgStat_Counter pgStatTransactionIdleTime = 0;
static bool pgStatSessionReported = false;
bool pgStatSessionDisconnected = false;I think we can house all of these globals inside PgBackendStatus and can
follow the protocol for reading/writing fields in PgBackendStatus.
Refer: PGSTAT_{BEGIN|END}_WRITE_ACTIVITY
Are you sure that is the right way to go?
Correct me if I am wrong, but isn't PgBackendStatus for relevant status
information that other processes can access?
I'd assume that it is not the correct place to store backend-private data
that are not relevant to others.
Besides, if data is written to this structure more often, readers would
have deal with more contention, which could affect performance.
But I agree with the following:
Also, some of these fields are not required:
I don't think we need pgStatActiveStart and pgStatTransactionIdleStart -
instead of these two we could use
PgBackendStatus.st_state_start_timestamp which marks the beginning TS of
the backend's current state (st_state). We can look at that field along
with the current and to-be-transitioned-to states inside
pgstat_report_activity() when there is a transition away from
STATE_RUNNING, STATE_IDLEINTRANSACTION or
STATE_IDLEINTRANSACTION_ABORTED, in order to update pgStatActiveTime and
pgStatTransactionIdleTime. We would also need to update those counters
on disconnect/PGSTAT_STAT_INTERVAL timeout if the backend's current
state was STATE_RUNNING, STATE_IDLEINTRANSACTION or
STATE_IDLEINTRANSACTION_ABORTED (in pgstat_send_connstats())
Yes, that would be better.
pgStatSessionDisconnected is not required as it can be determined if a
session has been disconnected by looking at the force argument to
pgstat_report_stat() [unless we would want to distinguish between
client-initiated disconnects, which I am not sure why, as I have
brought up above].
But wouldn't that mean that we count *every* end of a session as regular
disconnection, even if the backend was killed?
I personally would want all my database connections to be closed by
the client, unless something unexpected happens.
pgStatSessionReported is not required. We can glean this information by
checking if the function local static last_report in
pgstat_report_stat() is 0 and passing this on as another param
"first_report" to pgstat_send_connstats().
Yes, that is better.
* PGSTAT_FILE_FORMAT_ID needs to be updated when a stats collector data
structure changes and we had a change in PgStat_StatDBEntry.
I think that should be left to the committer.
* We can directly use PgBackendStatus.st_proc_start_timestamp for
calculating m_session_time. We can also choose to report session uptime
even when the report is for the not-disconnect case
(PGSTAT_STAT_INTERVAL elapsed). No reason why not. Then we would need to
pass in the value of last_report to pgstat_send_connstats() -> calculate
m_session_time to be number of time units from
PgBackendStatus.st_proc_start_timestamp for the first report and then
number of time units from the last_report for all subsequent reports.
Yes, that would make for better statistics, since client connections
can last quite long.
* We would need to bump the catalog version since we have made
changes to system views. Refer: #define CATALOG_VERSION_NO
Again, I think that's up to the committer.
Thanks again!
Yours,
Laurenz Albe
On Tue, Sep 29, 2020 at 2:44 AM Laurenz Albe <laurenz.albe@cybertec.at> wrote:
* Are we trying to capture ONLY client initiated disconnects in
m_aborted (we are not handling other disconnects by not accounting for
EOF..like if psql was killed)? If yes, why?I thought it was interesting to know how many database sessions are
ended regularly as opposed to ones that get killed or end by unexpected
client death.
It may very well be. It would also be interesting to find out how many
connections are still open on the database (something we could easily
glean if we had the number of all disconnects, client-initiated or
unnatural). Maybe we could have both?
m_sessions_disconnected;
m_sessions_killed;
*
static TimestampTz pgStatActiveStart = DT_NOBEGIN;
static PgStat_Counter pgStatActiveTime = 0;
static TimestampTz pgStatTransactionIdleStart = DT_NOBEGIN;
static PgStat_Counter pgStatTransactionIdleTime = 0;
static bool pgStatSessionReported = false;
bool pgStatSessionDisconnected = false;I think we can house all of these globals inside PgBackendStatus and can
follow the protocol for reading/writing fields in PgBackendStatus.
Refer: PGSTAT_{BEGIN|END}_WRITE_ACTIVITYAre you sure that is the right way to go?
Correct me if I am wrong, but isn't PgBackendStatus for relevant status
information that other processes can access?
I'd assume that it is not the correct place to store backend-private data
that are not relevant to others.
Besides, if data is written to this structure more often, readers would
have deal with more contention, which could affect performance.
You are absolutely right! PgBackendStatus is not the place for any of
these fields. We could place them in LocalPgBackendStatus perhaps. But
I don't feel too strongly about that now, having looked at similar fields
such as pgStatXactCommit, pgStatXactRollback etc. If we decide to stick
with the globals, let's isolate and decorate them with a comment such as
this example from the source:
/*
* Updated by pgstat_count_buffer_*_time macros
*/
extern PgStat_Counter pgStatBlockReadTime;
extern PgStat_Counter pgStatBlockWriteTime;
pgStatSessionDisconnected is not required as it can be determined if a
session has been disconnected by looking at the force argument to
pgstat_report_stat() [unless we would want to distinguish between
client-initiated disconnects, which I am not sure why, as I have
brought up above].But wouldn't that mean that we count *every* end of a session as regular
disconnection, even if the backend was killed?
See my comment above about client-initiated and unnatural disconnects.
* PGSTAT_FILE_FORMAT_ID needs to be updated when a stats collector data
structure changes and we had a change in PgStat_StatDBEntry.I think that should be left to the committer.
Fair.
* We would need to bump the catalog version since we have made
changes to system views. Refer: #define CATALOG_VERSION_NOAgain, I think that's up to the committer.
Fair.
Regards,
Soumyadeep (VMware)
On 2020-09-05 00:50, Laurenz Albe wrote:
I have changed the code so that connections are counted immediately.
Attached is a new version.
Thanks for making a patch.
I'm interested in this feature.
I think to add the number of login failures is good for security.
Although we can see the event from log files, it's useful to know the
overview
if the database may be attached or not.
By the way, could you rebase the patch since the latest patches
failed to be applied to the master branch?
Regards,
--
Masahiro Ikeda
NTT DATA CORPORATION
On Fri, 2020-10-02 at 15:10 -0700, Soumyadeep Chakraborty wrote:
On Tue, Sep 29, 2020 at 2:44 AM Laurenz Albe <laurenz.albe@cybertec.at> wrote:
* Are we trying to capture ONLY client initiated disconnects in
m_aborted (we are not handling other disconnects by not accounting for
EOF..like if psql was killed)? If yes, why?I thought it was interesting to know how many database sessions are
ended regularly as opposed to ones that get killed or end by unexpected
client death.It may very well be. It would also be interesting to find out how many
connections are still open on the database (something we could easily
glean if we had the number of all disconnects, client-initiated or
unnatural). Maybe we could have both?m_sessions_disconnected;
m_sessions_killed;
We already have "numbackends" in "pg_stat_database", so we know the number
of active connections, right?
You are absolutely right! PgBackendStatus is not the place for any of
these fields. We could place them in LocalPgBackendStatus perhaps. But
I don't feel too strongly about that now, having looked at similar fields
such as pgStatXactCommit, pgStatXactRollback etc. If we decide to stick
with the globals, let's isolate and decorate them with a comment such as
this example from the source:/*
* Updated by pgstat_count_buffer_*_time macros
*/
extern PgStat_Counter pgStatBlockReadTime;
extern PgStat_Counter pgStatBlockWriteTime;
I have reduced the number of variables with my latest patch; I think
the rewrite based on your review is definitely an improvement.
The comment you quote is from "pgstat.h", and my only global variable
has a comment there.
pgStatSessionDisconnected is not required as it can be determined if a
session has been disconnected by looking at the force argument to
pgstat_report_stat() [unless we would want to distinguish between
client-initiated disconnects, which I am not sure why, as I have
brought up above].But wouldn't that mean that we count *every* end of a session as regular
disconnection, even if the backend was killed?See my comment above about client-initiated and unnatural disconnects.
I decided to leave the functionality as it is; I think it is interesting
information to know if your clients disconnect cleanly or not.
Masahiro Ikeda wrote:
I think to add the number of login failures is good for security.
Although we can see the event from log files, it's useful to know the
overview if the database may be attached or not.
I don't think login failures can be reasonably reported in
"pg_stat_database", since authentication happens before the session is
attached to a database.
What if somebody attempts to connect to a non-existing database?
I agree that this is interesting information, but I don't think it
belongs into this patch.
By the way, could you rebase the patch since the latest patches
failed to be applied to the master branch?
Yes, the patch has bit-rotted.
Attached is v3 with improvements.
Yours,
Laurenz Albe
Attachments:
0001-Add-session-statistics-to-pg_stat_database.v3.patchtext/x-patch; charset=UTF-8; name=0001-Add-session-statistics-to-pg_stat_database.v3.patchDownload
From 0cc86e8a2bf3ffc76358c9022636502779c30910 Mon Sep 17 00:00:00 2001
From: Laurenz Albe <laurenz.albe@cybertec.at>
Date: Tue, 13 Oct 2020 13:26:48 +0200
Subject: [PATCH] Add session statistics to pg_stat_database
If "track_counts" is active, track the following per database:
- number of connections
- number of sessions that were not disconnected regularly
- total time spent in database sessions
- total time spent executing queries
- total idle in transaction time
This is useful to check if connection pooling is working.
It also helps to estimate the size of the connection pool
required to keep the database busy, which depends on the
percentage of the transaction time that is spent idling.
Discussion: https://postgr.es/m/b07e1f9953701b90c66ed368656f2aef40cac4fb.camel@cybertec.at
Reviewed-By: Soumyadeep Chakraborty, Masahiro Ikeda
---
doc/src/sgml/monitoring.sgml | 46 ++++++++++++
src/backend/catalog/system_views.sql | 5 ++
src/backend/postmaster/pgstat.c | 105 ++++++++++++++++++++++++++-
src/backend/tcop/postgres.c | 5 ++
src/backend/utils/adt/pgstatfuncs.c | 78 ++++++++++++++++++++
src/include/catalog/pg_proc.dat | 20 +++++
src/include/pgstat.h | 27 +++++++
src/test/regress/expected/rules.out | 5 ++
8 files changed, 290 insertions(+), 1 deletion(-)
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 66566765f0..13ef586857 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -3415,6 +3415,52 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
</para></entry>
</row>
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>session_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent in database sessions in this database, in milliseconds.
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>active_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent executing SQL statements in this database, in milliseconds.
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>idle_in_transaction_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent idling while in a transaction in this database, in milliseconds.
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>connections</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of connections established to this database.
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>aborted_sessions</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of database sessions to this database that did not end
+ with a regular client disconnection.
+ </para></entry>
+ </row>
+
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>stats_reset</structfield> <type>timestamp with time zone</type>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index c29390760f..8db677948c 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -921,6 +921,11 @@ CREATE VIEW pg_stat_database AS
pg_stat_get_db_checksum_last_failure(D.oid) AS checksum_last_failure,
pg_stat_get_db_blk_read_time(D.oid) AS blk_read_time,
pg_stat_get_db_blk_write_time(D.oid) AS blk_write_time,
+ pg_stat_get_db_session_time(D.oid) AS session_time,
+ pg_stat_get_db_active_time(D.oid) AS active_time,
+ pg_stat_get_db_idle_in_transaction_time(D.oid) AS idle_in_transaction_time,
+ pg_stat_get_db_connections(D.oid) AS connections,
+ pg_stat_get_db_aborted_sessions(D.oid) AS aborted_sessions,
pg_stat_get_db_stat_reset_time(D.oid) AS stats_reset
FROM (
SELECT 0 AS oid, NULL::name AS datname
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 822f0ebc62..37822fe49a 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -249,6 +249,9 @@ static int pgStatXactCommit = 0;
static int pgStatXactRollback = 0;
PgStat_Counter pgStatBlockReadTime = 0;
PgStat_Counter pgStatBlockWriteTime = 0;
+static PgStat_Counter pgStatActiveTime = 0;
+static PgStat_Counter pgStatTransactionIdleTime = 0;
+bool pgStatSessionDisconnected = false;
/* Record that's written to 2PC state file when pgstat state is persisted */
typedef struct TwoPhasePgStatRecord
@@ -334,6 +337,7 @@ static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
static void pgstat_send_funcstats(void);
static void pgstat_send_slru(void);
static HTAB *pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid);
+static void pgstat_send_connstats(bool disconnect, TimestampTz last_report);
static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared);
@@ -369,6 +373,7 @@ static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len);
static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len);
static void pgstat_recv_checksum_failure(PgStat_MsgChecksumFailure *msg, int len);
+static void pgstat_recv_connection(PgStat_MsgConn *msg, int len);
static void pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len);
static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len);
@@ -862,7 +867,7 @@ pgstat_report_stat(bool force)
/* Don't expend a clock check if nothing to do */
if ((pgStatTabList == NULL || pgStatTabList->tsa_used == 0) &&
pgStatXactCommit == 0 && pgStatXactRollback == 0 &&
- !have_function_stats)
+ !have_function_stats && !force)
return;
/*
@@ -873,6 +878,11 @@ pgstat_report_stat(bool force)
if (!force &&
!TimestampDifferenceExceeds(last_report, now, PGSTAT_STAT_INTERVAL))
return;
+
+ /* for backends, send connection statistics */
+ if (MyBackendType == B_BACKEND)
+ pgstat_send_connstats(force, last_report);
+
last_report = now;
/*
@@ -1341,6 +1351,48 @@ pgstat_drop_relation(Oid relid)
#endif /* NOT_USED */
+/* ----------
+ * pgstat_send_connstats() -
+ *
+ * Tell the collector about session statistics.
+ * The parameter "disconnect" will be true when the session ends.
+ * "last_report" is the last time we were called (0 if never).
+ * ----------
+ */
+static void
+pgstat_send_connstats(bool disconnect, TimestampTz last_report)
+{
+ PgStat_MsgConn msg;
+ long secs;
+ int usecs;
+
+ if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
+ return;
+
+ pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_CONNECTION);
+ msg.m_databaseid = MyDatabaseId;
+
+ /* session time since the last report */
+ TimestampDifference(((last_report == 0) ? MyStartTimestamp : last_report),
+ GetCurrentTimestamp(),
+ &secs, &usecs);
+ msg.m_session_time = secs * 1000000 + usecs;
+
+ msg.m_aborted = (!disconnect || pgStatSessionDisconnected) ? 0 : 1;
+
+ msg.m_active_time = pgStatActiveTime;
+ pgStatActiveTime = 0;
+
+ msg.m_idle_in_xact_time = pgStatTransactionIdleTime;
+ pgStatTransactionIdleTime = 0;
+
+ /* report a new session only the first time */
+ msg.m_count = (last_report == 0) ? 1 : 0;
+
+ pgstat_send(&msg, sizeof(PgStat_MsgConn));
+}
+
+
/* ----------
* pgstat_reset_counters() -
*
@@ -3324,6 +3376,28 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
}
current_timestamp = GetCurrentTimestamp();
+ /*
+ * If the state has changed from "active" or "idle in transaction",
+ * calculate the duration.
+ */
+ if ((beentry->st_state == STATE_RUNNING ||
+ beentry->st_state == STATE_IDLEINTRANSACTION ||
+ beentry->st_state == STATE_IDLEINTRANSACTION_ABORTED) &&
+ state != beentry->st_state)
+ {
+ long secs;
+ int usecs;
+
+ TimestampDifference(beentry->st_state_start_timestamp,
+ current_timestamp,
+ &secs, &usecs);
+
+ if (beentry->st_state == STATE_RUNNING)
+ pgStatActiveTime += secs * 1000000 + usecs;
+ else
+ pgStatTransactionIdleTime += secs * 1000000 + usecs;
+ }
+
/*
* Now update the status entry
*/
@@ -4876,6 +4950,10 @@ PgstatCollectorMain(int argc, char *argv[])
pgstat_recv_replslot(&msg.msg_replslot, len);
break;
+ case PGSTAT_MTYPE_CONNECTION:
+ pgstat_recv_connection(&msg.msg_conn, len);
+ break;
+
default:
break;
}
@@ -4950,6 +5028,11 @@ reset_dbentry_counters(PgStat_StatDBEntry *dbentry)
dbentry->last_checksum_failure = 0;
dbentry->n_block_read_time = 0;
dbentry->n_block_write_time = 0;
+ dbentry->n_connections = 0;
+ dbentry->n_session_time = 0;
+ dbentry->n_active_time = 0;
+ dbentry->n_idle_in_xact_time = 0;
+ dbentry->n_aborted = 0;
dbentry->stat_reset_timestamp = GetCurrentTimestamp();
dbentry->stats_timestamp = 0;
@@ -6895,6 +6978,26 @@ pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len)
}
}
+/* ----------
+ * pgstat_recv_connection() -
+ *
+ * Process connection information.
+ * ----------
+ */
+static void
+pgstat_recv_connection(PgStat_MsgConn *msg, int len)
+{
+ PgStat_StatDBEntry *dbentry;
+
+ dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
+
+ dbentry->n_connections += msg->m_count;
+ dbentry->n_session_time += msg->m_session_time;
+ dbentry->n_active_time += msg->m_active_time;
+ dbentry->n_idle_in_xact_time += msg->m_idle_in_xact_time;
+ dbentry->n_aborted += msg->m_aborted;
+}
+
/* ----------
* pgstat_recv_tempfile() -
*
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 411cfadbff..888ffea1cd 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -4498,6 +4498,11 @@ PostgresMain(int argc, char *argv[],
* perform normal shutdown.
*/
case 'X':
+ /* report as normal client disconnection */
+ pgStatSessionDisconnected = true;
+
+ /* FALLTHROUGH */
+
case EOF:
/*
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 0d0d2e6d2b..76d237829e 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1629,6 +1629,84 @@ pg_stat_get_db_blk_write_time(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(result);
}
+Datum
+pg_stat_get_db_session_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = ((double) dbentry->n_session_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_active_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = ((double) dbentry->n_active_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_idle_in_transaction_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = ((double) dbentry->n_idle_in_xact_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_connections(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_connections);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_aborted_sessions(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_aborted);
+
+ PG_RETURN_INT64(result);
+}
+
Datum
pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS)
{
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 22340baf1c..95f65eb95e 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5430,6 +5430,26 @@
proname => 'pg_stat_get_db_blk_write_time', provolatile => 's',
proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
prosrc => 'pg_stat_get_db_blk_write_time' },
+{ oid => '9575', descr => 'statistics: session time, in seconds',
+ proname => 'pg_stat_get_db_session_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_session_time' },
+{ oid => '9576', descr => 'statistics: session active time, in seconds',
+ proname => 'pg_stat_get_db_active_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_active_time' },
+{ oid => '9577', descr => 'statistics: session idle in transaction time, in seconds',
+ proname => 'pg_stat_get_db_idle_in_transaction_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_idle_in_transaction_time' },
+{ oid => '9578', descr => 'statistics: number of connections',
+ proname => 'pg_stat_get_db_connections', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_connections' },
+{ oid => '9579', descr => 'statistics: number of aborted sessions',
+ proname => 'pg_stat_get_db_aborted_sessions', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_aborted_sessions' },
{ oid => '3195', descr => 'statistics: information about WAL archiver',
proname => 'pg_stat_get_archiver', proisstrict => 'f', provolatile => 's',
proparallel => 'r', prorettype => 'record', proargtypes => '',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index a821ff4f15..97064cde05 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -71,6 +71,7 @@ typedef enum StatMsgType
PGSTAT_MTYPE_DEADLOCK,
PGSTAT_MTYPE_CHECKSUMFAILURE,
PGSTAT_MTYPE_REPLSLOT,
+ PGSTAT_MTYPE_CONNECTION,
} StatMsgType;
/* ----------
@@ -616,6 +617,21 @@ typedef struct PgStat_MsgChecksumFailure
TimestampTz m_failure_time;
} PgStat_MsgChecksumFailure;
+/* ----------
+ * PgStat_MsgConn Sent by pgstat_connection to update connection statistics.
+ * ----------
+ */
+typedef struct PgStat_MsgConn
+{
+ PgStat_MsgHdr m_hdr;
+ Oid m_databaseid;
+ PgStat_Counter m_count;
+ PgStat_Counter m_session_time;
+ PgStat_Counter m_active_time;
+ PgStat_Counter m_idle_in_xact_time;
+ PgStat_Counter m_aborted;
+} PgStat_MsgConn;
+
/* ----------
* PgStat_Msg Union over all possible messages.
@@ -648,6 +664,7 @@ typedef union PgStat_Msg
PgStat_MsgTempFile msg_tempfile;
PgStat_MsgChecksumFailure msg_checksumfailure;
PgStat_MsgReplSlot msg_replslot;
+ PgStat_MsgConn msg_conn;
} PgStat_Msg;
@@ -690,6 +707,11 @@ typedef struct PgStat_StatDBEntry
TimestampTz last_checksum_failure;
PgStat_Counter n_block_read_time; /* times in microseconds */
PgStat_Counter n_block_write_time;
+ PgStat_Counter n_connections;
+ PgStat_Counter n_session_time;
+ PgStat_Counter n_active_time;
+ PgStat_Counter n_idle_in_xact_time;
+ PgStat_Counter n_aborted;
TimestampTz stat_reset_timestamp;
TimestampTz stats_timestamp; /* time of db stats file update */
@@ -1341,6 +1363,11 @@ extern PgStat_MsgWal WalStats;
extern PgStat_Counter pgStatBlockReadTime;
extern PgStat_Counter pgStatBlockWriteTime;
+/*
+ * Updated in PostgresMain upon disconnect.
+ */
+extern bool pgStatSessionDisconnected;
+
/* ----------
* Functions called from postmaster
* ----------
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index cf2a9b4408..a8b9539e17 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1848,6 +1848,11 @@ pg_stat_database| SELECT d.oid AS datid,
pg_stat_get_db_checksum_last_failure(d.oid) AS checksum_last_failure,
pg_stat_get_db_blk_read_time(d.oid) AS blk_read_time,
pg_stat_get_db_blk_write_time(d.oid) AS blk_write_time,
+ pg_stat_get_db_session_time(d.oid) AS session_time,
+ pg_stat_get_db_active_time(d.oid) AS active_time,
+ pg_stat_get_db_idle_in_transaction_time(d.oid) AS idle_in_transaction_time,
+ pg_stat_get_db_connections(d.oid) AS connections,
+ pg_stat_get_db_aborted_sessions(d.oid) AS aborted_sessions,
pg_stat_get_db_stat_reset_time(d.oid) AS stats_reset
FROM ( SELECT 0 AS oid,
NULL::name AS datname
--
2.26.2
On Tue, Oct 13, 2020 at 01:44:41PM +0200, Laurenz Albe wrote:
Attached is v3 with improvements.
+ <para>
+ Time spent in database sessions in this database, in milliseconds.
+ </para></entry>
Should say "Total time spent *by* DB sessions..." ?
I think these counters are only accurate as of the last state change, right?
So a session which has been idle for 1hr, that 1hr is not included. I think
the documentation should explain that, or (ideally) the implementation would be
more precise. Maybe the timestamps should only be updated after a session
terminates (and the docs should say so).
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>connections</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of connections established to this database.
*Total* number of connections established, otherwise it sounds like it might
mean "the number of sessions [currently] established".
+ Number of database sessions to this database that did not end
+ with a regular client disconnection.
Does that mean "sessions which ended irregularly" ? Or does it also include
"sessions which have not ended" ?
+ msg.m_aborted = (!disconnect || pgStatSessionDisconnected) ? 0 : 1;
I think this can be just:
msg.m_aborted = (bool) (disconnect && !pgStatSessionDisconnected);
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = ((double) dbentry->n_session_time) / 1000.0;
I think these can say:
|double result = 0;
|if ((dbentry=..) != NULL)
| result = (double) ..;
That not only uses fewer LOC, but also the assignment to zero is (known to be)
done at compile time (BSS) rather than runtime.
Thanks for the --- as always --- valuable review!
On Tue, 2020-10-13 at 17:55 -0500, Justin Pryzby wrote:
On Tue, Oct 13, 2020 at 01:44:41PM +0200, Laurenz Albe wrote:
Attached is v3 with improvements.
+ <para> + Time spent in database sessions in this database, in milliseconds. + </para></entry>Should say "Total time spent *by* DB sessions..." ?
That is indeed better. Fixed.
I think these counters are only accurate as of the last state change, right?
So a session which has been idle for 1hr, that 1hr is not included. I think
the documentation should explain that, or (ideally) the implementation would be
more precise. Maybe the timestamps should only be updated after a session
terminates (and the docs should say so).
I agree, and I have added an explanation that the value doesn't include
the duration of the current state.
Of course it would be nice to have totally accurate values, but I think
that the statistics are by nature inaccurate (datagrams can get lost),
and more frequent statistics updates increase the work load.
I don't think that is worth the effort.
+ <entry role="catalog_table_entry"><para role="column_definition"> + <structfield>connections</structfield> <type>bigint</type> + </para> + <para> + Number of connections established to this database.*Total* number of connections established, otherwise it sounds like it might
mean "the number of sessions [currently] established".
Fixed like that.
+ Number of database sessions to this database that did not end + with a regular client disconnection.Does that mean "sessions which ended irregularly" ? Or does it also include
"sessions which have not ended" ?
I have added an explanation for that.
+ msg.m_aborted = (!disconnect || pgStatSessionDisconnected) ? 0 : 1;
I think this can be just:
msg.m_aborted = (bool) (disconnect && !pgStatSessionDisconnected);
I mulled over this and finally decided to leave it as it is.
Since "m_aborted" gets added to the total counter, I'd prefer to
have it be an "int".
Your proposed code works (the cast is actually not necessary, right?).
But I think that my version is more readable if you think of
"m_aborted" as a counter rather than a flag.
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL) + result = 0; + else + result = ((double) dbentry->n_session_time) / 1000.0;I think these can say:
double result = 0;
if ((dbentry=..) != NULL)
result = (double) ..;That not only uses fewer LOC, but also the assignment to zero is (known to be)
done at compile time (BSS) rather than runtime.
I didn't know about the performance difference.
Concise code (if readable) is good, so I changed the code like you propose.
The code pattern is actually copied from neighboring functions,
which then should also be changed like this, but that is outside
the scope of this patch.
Attached is v4 of the patch.
Yours,
Laurenz Albe
Attachments:
0001-Add-session-statistics-to-pg_stat_database.v4.patchtext/x-patch; charset=UTF-8; name=0001-Add-session-statistics-to-pg_stat_database.v4.patchDownload
From 9e8bf3efd984306c73243736d0b4a4023cdd5f3a Mon Sep 17 00:00:00 2001
From: Laurenz Albe <laurenz.albe@cybertec.at>
Date: Wed, 14 Oct 2020 11:08:20 +0200
Subject: [PATCH] Add session statistics to pg_stat_database
If "track_counts" is active, track the following per database:
- total number of connections
- number of sessions that ended other than with a client disconnect
- total time spent in database sessions
- total time spent executing queries
- total idle in transaction time
This is useful to check if connection pooling is working.
It also helps to estimate the size of the connection pool
required to keep the database busy, which depends on the
percentage of the transaction time that is spent idling.
Discussion: https://postgr.es/m/b07e1f9953701b90c66ed368656f2aef40cac4fb.camel@cybertec.at
Reviewed-By: Soumyadeep Chakraborty, Justin Pryzby, Masahiro Ikeda
(This requires a catversion bump, as well as an update to
PGSTAT_FILE_FORMAT_ID)
---
doc/src/sgml/monitoring.sgml | 49 +++++++++++++
src/backend/catalog/system_views.sql | 5 ++
src/backend/postmaster/pgstat.c | 105 ++++++++++++++++++++++++++-
src/backend/tcop/postgres.c | 5 ++
src/backend/utils/adt/pgstatfuncs.c | 68 +++++++++++++++++
src/include/catalog/pg_proc.dat | 20 +++++
src/include/pgstat.h | 27 +++++++
src/test/regress/expected/rules.out | 5 ++
8 files changed, 283 insertions(+), 1 deletion(-)
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 66566765f0..a50fc025d5 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -3663,6 +3663,55 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
</para></entry>
</row>
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>session_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent by database sessions in this database, in milliseconds
+ (note that statistics are only updated when the state of a session
+ changes, so if sessions have been idle for a long time, this idle time
+ won't be included)
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>active_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent executing SQL statements in this database, in milliseconds
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>idle_in_transaction_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent idling while in a transaction in this database, in milliseconds
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>connections</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Total number of connections established to this database
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>aborted_sessions</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of database sessions to this database that were terminated
+ by something else than a regular client disconnection
+ </para></entry>
+ </row>
+
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>stats_reset</structfield> <type>timestamp with time zone</type>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index c29390760f..8db677948c 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -921,6 +921,11 @@ CREATE VIEW pg_stat_database AS
pg_stat_get_db_checksum_last_failure(D.oid) AS checksum_last_failure,
pg_stat_get_db_blk_read_time(D.oid) AS blk_read_time,
pg_stat_get_db_blk_write_time(D.oid) AS blk_write_time,
+ pg_stat_get_db_session_time(D.oid) AS session_time,
+ pg_stat_get_db_active_time(D.oid) AS active_time,
+ pg_stat_get_db_idle_in_transaction_time(D.oid) AS idle_in_transaction_time,
+ pg_stat_get_db_connections(D.oid) AS connections,
+ pg_stat_get_db_aborted_sessions(D.oid) AS aborted_sessions,
pg_stat_get_db_stat_reset_time(D.oid) AS stats_reset
FROM (
SELECT 0 AS oid, NULL::name AS datname
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 822f0ebc62..37822fe49a 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -249,6 +249,9 @@ static int pgStatXactCommit = 0;
static int pgStatXactRollback = 0;
PgStat_Counter pgStatBlockReadTime = 0;
PgStat_Counter pgStatBlockWriteTime = 0;
+static PgStat_Counter pgStatActiveTime = 0;
+static PgStat_Counter pgStatTransactionIdleTime = 0;
+bool pgStatSessionDisconnected = false;
/* Record that's written to 2PC state file when pgstat state is persisted */
typedef struct TwoPhasePgStatRecord
@@ -334,6 +337,7 @@ static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
static void pgstat_send_funcstats(void);
static void pgstat_send_slru(void);
static HTAB *pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid);
+static void pgstat_send_connstats(bool disconnect, TimestampTz last_report);
static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared);
@@ -369,6 +373,7 @@ static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len);
static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len);
static void pgstat_recv_checksum_failure(PgStat_MsgChecksumFailure *msg, int len);
+static void pgstat_recv_connection(PgStat_MsgConn *msg, int len);
static void pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len);
static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len);
@@ -862,7 +867,7 @@ pgstat_report_stat(bool force)
/* Don't expend a clock check if nothing to do */
if ((pgStatTabList == NULL || pgStatTabList->tsa_used == 0) &&
pgStatXactCommit == 0 && pgStatXactRollback == 0 &&
- !have_function_stats)
+ !have_function_stats && !force)
return;
/*
@@ -873,6 +878,11 @@ pgstat_report_stat(bool force)
if (!force &&
!TimestampDifferenceExceeds(last_report, now, PGSTAT_STAT_INTERVAL))
return;
+
+ /* for backends, send connection statistics */
+ if (MyBackendType == B_BACKEND)
+ pgstat_send_connstats(force, last_report);
+
last_report = now;
/*
@@ -1341,6 +1351,48 @@ pgstat_drop_relation(Oid relid)
#endif /* NOT_USED */
+/* ----------
+ * pgstat_send_connstats() -
+ *
+ * Tell the collector about session statistics.
+ * The parameter "disconnect" will be true when the session ends.
+ * "last_report" is the last time we were called (0 if never).
+ * ----------
+ */
+static void
+pgstat_send_connstats(bool disconnect, TimestampTz last_report)
+{
+ PgStat_MsgConn msg;
+ long secs;
+ int usecs;
+
+ if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
+ return;
+
+ pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_CONNECTION);
+ msg.m_databaseid = MyDatabaseId;
+
+ /* session time since the last report */
+ TimestampDifference(((last_report == 0) ? MyStartTimestamp : last_report),
+ GetCurrentTimestamp(),
+ &secs, &usecs);
+ msg.m_session_time = secs * 1000000 + usecs;
+
+ msg.m_aborted = (!disconnect || pgStatSessionDisconnected) ? 0 : 1;
+
+ msg.m_active_time = pgStatActiveTime;
+ pgStatActiveTime = 0;
+
+ msg.m_idle_in_xact_time = pgStatTransactionIdleTime;
+ pgStatTransactionIdleTime = 0;
+
+ /* report a new session only the first time */
+ msg.m_count = (last_report == 0) ? 1 : 0;
+
+ pgstat_send(&msg, sizeof(PgStat_MsgConn));
+}
+
+
/* ----------
* pgstat_reset_counters() -
*
@@ -3324,6 +3376,28 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
}
current_timestamp = GetCurrentTimestamp();
+ /*
+ * If the state has changed from "active" or "idle in transaction",
+ * calculate the duration.
+ */
+ if ((beentry->st_state == STATE_RUNNING ||
+ beentry->st_state == STATE_IDLEINTRANSACTION ||
+ beentry->st_state == STATE_IDLEINTRANSACTION_ABORTED) &&
+ state != beentry->st_state)
+ {
+ long secs;
+ int usecs;
+
+ TimestampDifference(beentry->st_state_start_timestamp,
+ current_timestamp,
+ &secs, &usecs);
+
+ if (beentry->st_state == STATE_RUNNING)
+ pgStatActiveTime += secs * 1000000 + usecs;
+ else
+ pgStatTransactionIdleTime += secs * 1000000 + usecs;
+ }
+
/*
* Now update the status entry
*/
@@ -4876,6 +4950,10 @@ PgstatCollectorMain(int argc, char *argv[])
pgstat_recv_replslot(&msg.msg_replslot, len);
break;
+ case PGSTAT_MTYPE_CONNECTION:
+ pgstat_recv_connection(&msg.msg_conn, len);
+ break;
+
default:
break;
}
@@ -4950,6 +5028,11 @@ reset_dbentry_counters(PgStat_StatDBEntry *dbentry)
dbentry->last_checksum_failure = 0;
dbentry->n_block_read_time = 0;
dbentry->n_block_write_time = 0;
+ dbentry->n_connections = 0;
+ dbentry->n_session_time = 0;
+ dbentry->n_active_time = 0;
+ dbentry->n_idle_in_xact_time = 0;
+ dbentry->n_aborted = 0;
dbentry->stat_reset_timestamp = GetCurrentTimestamp();
dbentry->stats_timestamp = 0;
@@ -6895,6 +6978,26 @@ pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len)
}
}
+/* ----------
+ * pgstat_recv_connection() -
+ *
+ * Process connection information.
+ * ----------
+ */
+static void
+pgstat_recv_connection(PgStat_MsgConn *msg, int len)
+{
+ PgStat_StatDBEntry *dbentry;
+
+ dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
+
+ dbentry->n_connections += msg->m_count;
+ dbentry->n_session_time += msg->m_session_time;
+ dbentry->n_active_time += msg->m_active_time;
+ dbentry->n_idle_in_xact_time += msg->m_idle_in_xact_time;
+ dbentry->n_aborted += msg->m_aborted;
+}
+
/* ----------
* pgstat_recv_tempfile() -
*
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 411cfadbff..888ffea1cd 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -4498,6 +4498,11 @@ PostgresMain(int argc, char *argv[],
* perform normal shutdown.
*/
case 'X':
+ /* report as normal client disconnection */
+ pgStatSessionDisconnected = true;
+
+ /* FALLTHROUGH */
+
case EOF:
/*
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 0d0d2e6d2b..657ab19492 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1629,6 +1629,74 @@ pg_stat_get_db_blk_write_time(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(result);
}
+Datum
+pg_stat_get_db_session_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->n_session_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_active_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->n_active_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_idle_in_transaction_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->n_idle_in_xact_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_connections(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_connections);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_aborted_sessions(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_aborted);
+
+ PG_RETURN_INT64(result);
+}
+
Datum
pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS)
{
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 22340baf1c..95f65eb95e 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5430,6 +5430,26 @@
proname => 'pg_stat_get_db_blk_write_time', provolatile => 's',
proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
prosrc => 'pg_stat_get_db_blk_write_time' },
+{ oid => '9575', descr => 'statistics: session time, in seconds',
+ proname => 'pg_stat_get_db_session_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_session_time' },
+{ oid => '9576', descr => 'statistics: session active time, in seconds',
+ proname => 'pg_stat_get_db_active_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_active_time' },
+{ oid => '9577', descr => 'statistics: session idle in transaction time, in seconds',
+ proname => 'pg_stat_get_db_idle_in_transaction_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_idle_in_transaction_time' },
+{ oid => '9578', descr => 'statistics: number of connections',
+ proname => 'pg_stat_get_db_connections', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_connections' },
+{ oid => '9579', descr => 'statistics: number of aborted sessions',
+ proname => 'pg_stat_get_db_aborted_sessions', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_aborted_sessions' },
{ oid => '3195', descr => 'statistics: information about WAL archiver',
proname => 'pg_stat_get_archiver', proisstrict => 'f', provolatile => 's',
proparallel => 'r', prorettype => 'record', proargtypes => '',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index a821ff4f15..97064cde05 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -71,6 +71,7 @@ typedef enum StatMsgType
PGSTAT_MTYPE_DEADLOCK,
PGSTAT_MTYPE_CHECKSUMFAILURE,
PGSTAT_MTYPE_REPLSLOT,
+ PGSTAT_MTYPE_CONNECTION,
} StatMsgType;
/* ----------
@@ -616,6 +617,21 @@ typedef struct PgStat_MsgChecksumFailure
TimestampTz m_failure_time;
} PgStat_MsgChecksumFailure;
+/* ----------
+ * PgStat_MsgConn Sent by pgstat_connection to update connection statistics.
+ * ----------
+ */
+typedef struct PgStat_MsgConn
+{
+ PgStat_MsgHdr m_hdr;
+ Oid m_databaseid;
+ PgStat_Counter m_count;
+ PgStat_Counter m_session_time;
+ PgStat_Counter m_active_time;
+ PgStat_Counter m_idle_in_xact_time;
+ PgStat_Counter m_aborted;
+} PgStat_MsgConn;
+
/* ----------
* PgStat_Msg Union over all possible messages.
@@ -648,6 +664,7 @@ typedef union PgStat_Msg
PgStat_MsgTempFile msg_tempfile;
PgStat_MsgChecksumFailure msg_checksumfailure;
PgStat_MsgReplSlot msg_replslot;
+ PgStat_MsgConn msg_conn;
} PgStat_Msg;
@@ -690,6 +707,11 @@ typedef struct PgStat_StatDBEntry
TimestampTz last_checksum_failure;
PgStat_Counter n_block_read_time; /* times in microseconds */
PgStat_Counter n_block_write_time;
+ PgStat_Counter n_connections;
+ PgStat_Counter n_session_time;
+ PgStat_Counter n_active_time;
+ PgStat_Counter n_idle_in_xact_time;
+ PgStat_Counter n_aborted;
TimestampTz stat_reset_timestamp;
TimestampTz stats_timestamp; /* time of db stats file update */
@@ -1341,6 +1363,11 @@ extern PgStat_MsgWal WalStats;
extern PgStat_Counter pgStatBlockReadTime;
extern PgStat_Counter pgStatBlockWriteTime;
+/*
+ * Updated in PostgresMain upon disconnect.
+ */
+extern bool pgStatSessionDisconnected;
+
/* ----------
* Functions called from postmaster
* ----------
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index cf2a9b4408..a8b9539e17 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1848,6 +1848,11 @@ pg_stat_database| SELECT d.oid AS datid,
pg_stat_get_db_checksum_last_failure(d.oid) AS checksum_last_failure,
pg_stat_get_db_blk_read_time(d.oid) AS blk_read_time,
pg_stat_get_db_blk_write_time(d.oid) AS blk_write_time,
+ pg_stat_get_db_session_time(d.oid) AS session_time,
+ pg_stat_get_db_active_time(d.oid) AS active_time,
+ pg_stat_get_db_idle_in_transaction_time(d.oid) AS idle_in_transaction_time,
+ pg_stat_get_db_connections(d.oid) AS connections,
+ pg_stat_get_db_aborted_sessions(d.oid) AS aborted_sessions,
pg_stat_get_db_stat_reset_time(d.oid) AS stats_reset
FROM ( SELECT 0 AS oid,
NULL::name AS datname
--
2.26.2
Hi Laurenz,
I have applied the latest patch on master, all the regression test cases
are passing and the implemented functionality is also looking fine. The
point that I raised about idle connection not included is also addressed.
thanks,
Ahsan
On Wed, Oct 14, 2020 at 2:28 PM Laurenz Albe <laurenz.albe@cybertec.at>
wrote:
Thanks for the --- as always --- valuable review!
On Tue, 2020-10-13 at 17:55 -0500, Justin Pryzby wrote:
On Tue, Oct 13, 2020 at 01:44:41PM +0200, Laurenz Albe wrote:
Attached is v3 with improvements.
+ <para> + Time spent in database sessions in this database, inmilliseconds.
+ </para></entry>
Should say "Total time spent *by* DB sessions..." ?
That is indeed better. Fixed.
I think these counters are only accurate as of the last state change,
right?
So a session which has been idle for 1hr, that 1hr is not included. I
think
the documentation should explain that, or (ideally) the implementation
would be
more precise. Maybe the timestamps should only be updated after a
session
terminates (and the docs should say so).
I agree, and I have added an explanation that the value doesn't include
the duration of the current state.Of course it would be nice to have totally accurate values, but I think
that the statistics are by nature inaccurate (datagrams can get lost),
and more frequent statistics updates increase the work load.
I don't think that is worth the effort.+ <entry role="catalog_table_entry"><para role="column_definition"> + <structfield>connections</structfield> <type>bigint</type> + </para> + <para> + Number of connections established to this database.*Total* number of connections established, otherwise it sounds like it
might
mean "the number of sessions [currently] established".
Fixed like that.
+ Number of database sessions to this database that did not end + with a regular client disconnection.Does that mean "sessions which ended irregularly" ? Or does it also
include
"sessions which have not ended" ?
I have added an explanation for that.
+ msg.m_aborted = (!disconnect || pgStatSessionDisconnected) ? 0 :
1;
I think this can be just:
msg.m_aborted = (bool) (disconnect && !pgStatSessionDisconnected);I mulled over this and finally decided to leave it as it is.
Since "m_aborted" gets added to the total counter, I'd prefer to
have it be an "int".Your proposed code works (the cast is actually not necessary, right?).
But I think that my version is more readable if you think of
"m_aborted" as a counter rather than a flag.+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL) + result = 0; + else + result = ((double) dbentry->n_session_time) / 1000.0;I think these can say:
double result = 0;
if ((dbentry=..) != NULL)
result = (double) ..;That not only uses fewer LOC, but also the assignment to zero is (known
to be)
done at compile time (BSS) rather than runtime.
I didn't know about the performance difference.
Concise code (if readable) is good, so I changed the code like you propose.The code pattern is actually copied from neighboring functions,
which then should also be changed like this, but that is outside
the scope of this patch.Attached is v4 of the patch.
Yours,
Laurenz Albe
--
Highgo Software (Canada/China/Pakistan)
URL : http://www.highgo.ca
ADDR: 10318 WHALLEY BLVD, Surrey, BC
EMAIL: mailto: ahsan.hadi@highgo.ca
Hi,
I noticed that the cfbot fails for this patch.
For this, I am setting the status to: 'Waiting on Author'.
Cheers,
//Georgios
The new status of this patch is: Waiting on Author
On Tue, 2020-11-10 at 15:03 +0000, Georgios Kokolatos wrote:
I noticed that the cfbot fails for this patch.
For this, I am setting the status to: 'Waiting on Author'.
Thanks for noticing, it was only the documentation build.
Version 5 attached, status changed back to "waiting for review".
Yours,
Laurenz Albe
Attachments:
0001-Add-session-statistics-to-pg_stat_database.v5.patchtext/x-patch; charset=UTF-8; name=0001-Add-session-statistics-to-pg_stat_database.v5.patchDownload
From afc37856c12fd0a85587c638fca291a0b5652d9b Mon Sep 17 00:00:00 2001
From: Laurenz Albe <laurenz.albe@cybertec.at>
Date: Wed, 11 Nov 2020 20:14:28 +0100
Subject: [PATCH] Add session statistics to pg_stat_database
If "track_counts" is active, track the following per database:
- total number of connections
- number of sessions that ended other than with a client disconnect
- total time spent in database sessions
- total time spent executing queries
- total idle in transaction time
This is useful to check if connection pooling is working.
It also helps to estimate the size of the connection pool
required to keep the database busy, which depends on the
percentage of the transaction time that is spent idling.
Discussion: https://postgr.es/m/b07e1f9953701b90c66ed368656f2aef40cac4fb.camel@cybertec.at
Reviewed-By: Soumyadeep Chakraborty, Justin Pryzby, Masahiro Ikeda
(This requires a catversion bump, as well as an update to
PGSTAT_FILE_FORMAT_ID)
---
doc/src/sgml/monitoring.sgml | 49 +++++++++++++
src/backend/catalog/system_views.sql | 5 ++
src/backend/postmaster/pgstat.c | 105 ++++++++++++++++++++++++++-
src/backend/tcop/postgres.c | 5 ++
src/backend/utils/adt/pgstatfuncs.c | 68 +++++++++++++++++
src/include/catalog/pg_proc.dat | 20 +++++
src/include/pgstat.h | 27 +++++++
src/test/regress/expected/rules.out | 5 ++
8 files changed, 283 insertions(+), 1 deletion(-)
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 98e1995453..89610d1010 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -3704,6 +3704,55 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
</para></entry>
</row>
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>session_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent by database sessions in this database, in milliseconds
+ (note that statistics are only updated when the state of a session
+ changes, so if sessions have been idle for a long time, this idle time
+ won't be included)
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>active_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent executing SQL statements in this database, in milliseconds
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>idle_in_transaction_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent idling while in a transaction in this database, in milliseconds
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>connections</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Total number of connections established to this database
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>aborted_sessions</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of database sessions to this database that were terminated
+ by something else than a regular client disconnection
+ </para></entry>
+ </row>
+
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>stats_reset</structfield> <type>timestamp with time zone</type>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 2e4aa1c4b6..998b4d542a 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -924,6 +924,11 @@ CREATE VIEW pg_stat_database AS
pg_stat_get_db_checksum_last_failure(D.oid) AS checksum_last_failure,
pg_stat_get_db_blk_read_time(D.oid) AS blk_read_time,
pg_stat_get_db_blk_write_time(D.oid) AS blk_write_time,
+ pg_stat_get_db_session_time(D.oid) AS session_time,
+ pg_stat_get_db_active_time(D.oid) AS active_time,
+ pg_stat_get_db_idle_in_transaction_time(D.oid) AS idle_in_transaction_time,
+ pg_stat_get_db_connections(D.oid) AS connections,
+ pg_stat_get_db_aborted_sessions(D.oid) AS aborted_sessions,
pg_stat_get_db_stat_reset_time(D.oid) AS stats_reset
FROM (
SELECT 0 AS oid, NULL::name AS datname
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index e76e627c6b..9978aab60a 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -249,6 +249,9 @@ static int pgStatXactCommit = 0;
static int pgStatXactRollback = 0;
PgStat_Counter pgStatBlockReadTime = 0;
PgStat_Counter pgStatBlockWriteTime = 0;
+static PgStat_Counter pgStatActiveTime = 0;
+static PgStat_Counter pgStatTransactionIdleTime = 0;
+bool pgStatSessionDisconnected = false;
/* Record that's written to 2PC state file when pgstat state is persisted */
typedef struct TwoPhasePgStatRecord
@@ -334,6 +337,7 @@ static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
static void pgstat_send_funcstats(void);
static void pgstat_send_slru(void);
static HTAB *pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid);
+static void pgstat_send_connstats(bool disconnect, TimestampTz last_report);
static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared);
@@ -369,6 +373,7 @@ static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len);
static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len);
static void pgstat_recv_checksum_failure(PgStat_MsgChecksumFailure *msg, int len);
+static void pgstat_recv_connection(PgStat_MsgConn *msg, int len);
static void pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len);
static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len);
@@ -862,7 +867,7 @@ pgstat_report_stat(bool force)
/* Don't expend a clock check if nothing to do */
if ((pgStatTabList == NULL || pgStatTabList->tsa_used == 0) &&
pgStatXactCommit == 0 && pgStatXactRollback == 0 &&
- !have_function_stats)
+ !have_function_stats && !force)
return;
/*
@@ -873,6 +878,11 @@ pgstat_report_stat(bool force)
if (!force &&
!TimestampDifferenceExceeds(last_report, now, PGSTAT_STAT_INTERVAL))
return;
+
+ /* for backends, send connection statistics */
+ if (MyBackendType == B_BACKEND)
+ pgstat_send_connstats(force, last_report);
+
last_report = now;
/*
@@ -1341,6 +1351,48 @@ pgstat_drop_relation(Oid relid)
#endif /* NOT_USED */
+/* ----------
+ * pgstat_send_connstats() -
+ *
+ * Tell the collector about session statistics.
+ * The parameter "disconnect" will be true when the session ends.
+ * "last_report" is the last time we were called (0 if never).
+ * ----------
+ */
+static void
+pgstat_send_connstats(bool disconnect, TimestampTz last_report)
+{
+ PgStat_MsgConn msg;
+ long secs;
+ int usecs;
+
+ if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
+ return;
+
+ pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_CONNECTION);
+ msg.m_databaseid = MyDatabaseId;
+
+ /* session time since the last report */
+ TimestampDifference(((last_report == 0) ? MyStartTimestamp : last_report),
+ GetCurrentTimestamp(),
+ &secs, &usecs);
+ msg.m_session_time = secs * 1000000 + usecs;
+
+ msg.m_aborted = (!disconnect || pgStatSessionDisconnected) ? 0 : 1;
+
+ msg.m_active_time = pgStatActiveTime;
+ pgStatActiveTime = 0;
+
+ msg.m_idle_in_xact_time = pgStatTransactionIdleTime;
+ pgStatTransactionIdleTime = 0;
+
+ /* report a new session only the first time */
+ msg.m_count = (last_report == 0) ? 1 : 0;
+
+ pgstat_send(&msg, sizeof(PgStat_MsgConn));
+}
+
+
/* ----------
* pgstat_reset_counters() -
*
@@ -3327,6 +3379,28 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
}
current_timestamp = GetCurrentTimestamp();
+ /*
+ * If the state has changed from "active" or "idle in transaction",
+ * calculate the duration.
+ */
+ if ((beentry->st_state == STATE_RUNNING ||
+ beentry->st_state == STATE_IDLEINTRANSACTION ||
+ beentry->st_state == STATE_IDLEINTRANSACTION_ABORTED) &&
+ state != beentry->st_state)
+ {
+ long secs;
+ int usecs;
+
+ TimestampDifference(beentry->st_state_start_timestamp,
+ current_timestamp,
+ &secs, &usecs);
+
+ if (beentry->st_state == STATE_RUNNING)
+ pgStatActiveTime += secs * 1000000 + usecs;
+ else
+ pgStatTransactionIdleTime += secs * 1000000 + usecs;
+ }
+
/*
* Now update the status entry
*/
@@ -4879,6 +4953,10 @@ PgstatCollectorMain(int argc, char *argv[])
pgstat_recv_replslot(&msg.msg_replslot, len);
break;
+ case PGSTAT_MTYPE_CONNECTION:
+ pgstat_recv_connection(&msg.msg_conn, len);
+ break;
+
default:
break;
}
@@ -4953,6 +5031,11 @@ reset_dbentry_counters(PgStat_StatDBEntry *dbentry)
dbentry->last_checksum_failure = 0;
dbentry->n_block_read_time = 0;
dbentry->n_block_write_time = 0;
+ dbentry->n_connections = 0;
+ dbentry->n_session_time = 0;
+ dbentry->n_active_time = 0;
+ dbentry->n_idle_in_xact_time = 0;
+ dbentry->n_aborted = 0;
dbentry->stat_reset_timestamp = GetCurrentTimestamp();
dbentry->stats_timestamp = 0;
@@ -6903,6 +6986,26 @@ pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len)
}
}
+/* ----------
+ * pgstat_recv_connection() -
+ *
+ * Process connection information.
+ * ----------
+ */
+static void
+pgstat_recv_connection(PgStat_MsgConn *msg, int len)
+{
+ PgStat_StatDBEntry *dbentry;
+
+ dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
+
+ dbentry->n_connections += msg->m_count;
+ dbentry->n_session_time += msg->m_session_time;
+ dbentry->n_active_time += msg->m_active_time;
+ dbentry->n_idle_in_xact_time += msg->m_idle_in_xact_time;
+ dbentry->n_aborted += msg->m_aborted;
+}
+
/* ----------
* pgstat_recv_tempfile() -
*
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 7c5f7c775b..05b7832149 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -4494,6 +4494,11 @@ PostgresMain(int argc, char *argv[],
* perform normal shutdown.
*/
case 'X':
+ /* report as normal client disconnection */
+ pgStatSessionDisconnected = true;
+
+ /* FALLTHROUGH */
+
case EOF:
/*
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index a210fc93b4..4a889ee43d 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1629,6 +1629,74 @@ pg_stat_get_db_blk_write_time(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(result);
}
+Datum
+pg_stat_get_db_session_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->n_session_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_active_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->n_active_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_idle_in_transaction_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->n_idle_in_xact_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_connections(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_connections);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_aborted_sessions(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_aborted);
+
+ PG_RETURN_INT64(result);
+}
+
Datum
pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS)
{
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index c01da4bf01..f953dcecbf 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5441,6 +5441,26 @@
proname => 'pg_stat_get_db_blk_write_time', provolatile => 's',
proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
prosrc => 'pg_stat_get_db_blk_write_time' },
+{ oid => '9575', descr => 'statistics: session time, in seconds',
+ proname => 'pg_stat_get_db_session_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_session_time' },
+{ oid => '9576', descr => 'statistics: session active time, in seconds',
+ proname => 'pg_stat_get_db_active_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_active_time' },
+{ oid => '9577', descr => 'statistics: session idle in transaction time, in seconds',
+ proname => 'pg_stat_get_db_idle_in_transaction_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_idle_in_transaction_time' },
+{ oid => '9578', descr => 'statistics: number of connections',
+ proname => 'pg_stat_get_db_connections', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_connections' },
+{ oid => '9579', descr => 'statistics: number of aborted sessions',
+ proname => 'pg_stat_get_db_aborted_sessions', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_aborted_sessions' },
{ oid => '3195', descr => 'statistics: information about WAL archiver',
proname => 'pg_stat_get_archiver', proisstrict => 'f', provolatile => 's',
proparallel => 'r', prorettype => 'record', proargtypes => '',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 257e515bfe..06775282e9 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -71,6 +71,7 @@ typedef enum StatMsgType
PGSTAT_MTYPE_DEADLOCK,
PGSTAT_MTYPE_CHECKSUMFAILURE,
PGSTAT_MTYPE_REPLSLOT,
+ PGSTAT_MTYPE_CONNECTION,
} StatMsgType;
/* ----------
@@ -619,6 +620,21 @@ typedef struct PgStat_MsgChecksumFailure
TimestampTz m_failure_time;
} PgStat_MsgChecksumFailure;
+/* ----------
+ * PgStat_MsgConn Sent by pgstat_connection to update connection statistics.
+ * ----------
+ */
+typedef struct PgStat_MsgConn
+{
+ PgStat_MsgHdr m_hdr;
+ Oid m_databaseid;
+ PgStat_Counter m_count;
+ PgStat_Counter m_session_time;
+ PgStat_Counter m_active_time;
+ PgStat_Counter m_idle_in_xact_time;
+ PgStat_Counter m_aborted;
+} PgStat_MsgConn;
+
/* ----------
* PgStat_Msg Union over all possible messages.
@@ -651,6 +667,7 @@ typedef union PgStat_Msg
PgStat_MsgTempFile msg_tempfile;
PgStat_MsgChecksumFailure msg_checksumfailure;
PgStat_MsgReplSlot msg_replslot;
+ PgStat_MsgConn msg_conn;
} PgStat_Msg;
@@ -693,6 +710,11 @@ typedef struct PgStat_StatDBEntry
TimestampTz last_checksum_failure;
PgStat_Counter n_block_read_time; /* times in microseconds */
PgStat_Counter n_block_write_time;
+ PgStat_Counter n_connections;
+ PgStat_Counter n_session_time;
+ PgStat_Counter n_active_time;
+ PgStat_Counter n_idle_in_xact_time;
+ PgStat_Counter n_aborted;
TimestampTz stat_reset_timestamp;
TimestampTz stats_timestamp; /* time of db stats file update */
@@ -1347,6 +1369,11 @@ extern PgStat_MsgWal WalStats;
extern PgStat_Counter pgStatBlockReadTime;
extern PgStat_Counter pgStatBlockWriteTime;
+/*
+ * Updated in PostgresMain upon disconnect.
+ */
+extern bool pgStatSessionDisconnected;
+
/* ----------
* Functions called from postmaster
* ----------
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 097ff5d111..46ae183449 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1848,6 +1848,11 @@ pg_stat_database| SELECT d.oid AS datid,
pg_stat_get_db_checksum_last_failure(d.oid) AS checksum_last_failure,
pg_stat_get_db_blk_read_time(d.oid) AS blk_read_time,
pg_stat_get_db_blk_write_time(d.oid) AS blk_write_time,
+ pg_stat_get_db_session_time(d.oid) AS session_time,
+ pg_stat_get_db_active_time(d.oid) AS active_time,
+ pg_stat_get_db_idle_in_transaction_time(d.oid) AS idle_in_transaction_time,
+ pg_stat_get_db_connections(d.oid) AS connections,
+ pg_stat_get_db_aborted_sessions(d.oid) AS aborted_sessions,
pg_stat_get_db_stat_reset_time(d.oid) AS stats_reset
FROM ( SELECT 0 AS oid,
NULL::name AS datname
--
2.26.2
I wrote:
On Tue, 2020-11-10 at 15:03 +0000, Georgios Kokolatos wrote:
I noticed that the cfbot fails for this patch.
For this, I am setting the status to: 'Waiting on Author'.
Thanks for noticing, it was only the documentation build.
Version 5 attached, status changed back to "waiting for review".
The patch is still failing, so I looked again:
make[3]: Entering directory '/home/travis/build/postgresql-cfbot/postgresql/doc/src/sgml'
{ \
echo "<!ENTITY version \"14devel\">"; \
echo "<!ENTITY majorversion \"14\">"; \
} > version.sgml
'/usr/bin/perl' ./mk_feature_tables.pl YES ../../../src/backend/catalog/sql_feature_packages.txt ../../../src/backend/catalog/sql_features.txt > features-supported.sgml
'/usr/bin/perl' ./mk_feature_tables.pl NO ../../../src/backend/catalog/sql_feature_packages.txt ../../../src/backend/catalog/sql_features.txt > features-unsupported.sgml
'/usr/bin/perl' ./generate-errcodes-table.pl ../../../src/backend/utils/errcodes.txt > errcodes-table.sgml
'/usr/bin/perl' ./generate-keywords-table.pl . > keywords-table.sgml
/usr/bin/xmllint --path . --noout --valid postgres.sgml
error : Unknown IO error
postgres.sgml:21: /usr/bin/bison -Wno-deprecated -d -o gram.c gram.y
warning: failed to load external entity "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd"
]>
^
postgres.sgml:23: element book: validity error : No declaration for attribute id of element book
<book id="postgres">
^
postgres.sgml:24: element title: validity error : No declaration for element title
<title>PostgreSQL &version; Documentation</title>
I have the impression that this is not the fault of my patch, something seems to be
wrong with the cfbot.
I see that other patches are failing with the same error.
Yours,
Laurenz Albe
‐‐‐‐‐‐‐ Original Message ‐‐‐‐‐‐‐
On Thursday, November 12, 2020 9:31 AM, Laurenz Albe <laurenz.albe@cybertec.at> wrote:
I wrote:
On Tue, 2020-11-10 at 15:03 +0000, Georgios Kokolatos wrote:
I noticed that the cfbot fails for this patch.
For this, I am setting the status to: 'Waiting on Author'.Thanks for noticing, it was only the documentation build.
Version 5 attached, status changed back to "waiting for review".The patch is still failing, so I looked again:
make[3]: Entering directory '/home/travis/build/postgresql-cfbot/postgresql/doc/src/sgml'
{ \
echo "<!ENTITY version \"14devel\">"; \echo "<!ENTITY majorversion \\"14\\">"; \\
} > version.sgml
'/usr/bin/perl' ./mk_feature_tables.pl YES ../../../src/backend/catalog/sql_feature_packages.txt ../../../src/backend/catalog/sql_features.txt > features-supported.sgml
'/usr/bin/perl' ./mk_feature_tables.pl NO ../../../src/backend/catalog/sql_feature_packages.txt ../../../src/backend/catalog/sql_features.txt > features-unsupported.sgml
'/usr/bin/perl' ./generate-errcodes-table.pl ../../../src/backend/utils/errcodes.txt > errcodes-table.sgml
'/usr/bin/perl' ./generate-keywords-table.pl . > keywords-table.sgml
/usr/bin/xmllint --path . --noout --valid postgres.sgml
error : Unknown IO error
postgres.sgml:21: /usr/bin/bison -Wno-deprecated -d -o gram.c gram.y
warning: failed to load external entity "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd"
]>^
postgres.sgml:23: element book: validity error : No declaration for attribute id of element book
<book id="postgres">^
postgres.sgml:24: element title: validity error : No declaration for element title
<title>PostgreSQL &version; Documentation</title>I have the impression that this is not the fault of my patch, something seems to be
wrong with the cfbot.I see that other patches are failing with the same error.
You are indeed correct. Unfortunately the cfbot is a bit unstable due
to some issues related to the documentation. I alerted a contributor
and he was quick to try to address the issue in pgsql-www [1]/messages/by-id/E2EE6B76-2D96-408A-B961-CAE47D1A86F0@yesql.se.
Thank you very much for looking and apologies for the chatter.
Yours,
Laurenz Albe
[1]: /messages/by-id/E2EE6B76-2D96-408A-B961-CAE47D1A86F0@yesql.se
On Fri, 2020-10-16 at 16:24 +0500, Ahsan Hadi wrote:
I have applied the latest patch on master, all the regression test cases are passing
and the implemented functionality is also looking fine. The point that I raised about
idle connection not included is also addressed.
If you think that the patch is ready to go, you could mark it as
"ready for committer" in the commitfest app.
Yours,
Laurenz Albe
On Tue, Nov 17, 2020 at 4:22 PM Laurenz Albe <laurenz.albe@cybertec.at>
wrote:
On Fri, 2020-10-16 at 16:24 +0500, Ahsan Hadi wrote:
I have applied the latest patch on master, all the regression test cases
are passing
and the implemented functionality is also looking fine. The point that
I raised about
idle connection not included is also addressed.
If you think that the patch is ready to go, you could mark it as
"ready for committer" in the commitfest app.
I've taken a look as well, and here are a few short notes:
* It talks about "number of connections" but "number of aborted sessions".
We should probably be consistent about talking either about connections or
sessions? In particular, connections seems wrong in this case, because it
only starts counting after authentication is complete (since otherwise we
send no stats)? (This goes for both docs and actual function names)
* Is there a reason we're counting active and idle in transaction
(including aborted), but not fastpath? In particular, we seem to ignore
fastpath -- if we don't want to single it out specifically, it should
probably be included in active?
* pgstat_send_connstat() but pgstat_recv_connection(). Let's call both
connstat or both connection (I'd vote connstat)?
* Is this actually a fix that's independent of the new stats? It seems in
general to be changing the behaviour of "force", which is more generic?
- !have_function_stats)
+ !have_function_stats && !force)
* in pgstat_send_connstat() you pass the parameter "force" in as
"disconnect". That behaviour at least requires a comment saying why, I
think. My understanding is it relies on that "force" means this is
a "backend is shutting down", but that is not actually documented anywhere.
Maybe the "force" parameter should actually be renamed to indicate this is
really what it means, to avoid a future mistake in the area? But even with
that, how does that turn into disconnect?
* Maybe rename pgStatSessionDisconnected
to pgStatSessionNormalDisconnected? To avoid having to go back to the
setting point and look it up in a comment.
I wonder if there would also be a way to count "sessions that crashed" as
well. That is,the ones that failed in a way that caused the postmaster to
restart the system. But that's information we'd have to send from the
postmaster, but I'm actually unsure if we're "allowed" to send things to
the stats collector from the postmaster. But I think it could be quite
useful information to have. Maybe we can find some way to piggyback on the
fact that we're restarting the stats collector as a result?
--
Magnus Hagander
Me: https://www.hagander.net/ <http://www.hagander.net/>
Work: https://www.redpill-linpro.com/ <http://www.redpill-linpro.com/>
On Tue, 2020-11-17 at 17:33 +0100, Magnus Hagander wrote:
I've taken a look as well, and here are a few short notes:
Much appreciated!
* It talks about "number of connections" but "number of aborted sessions". We should probably
be consistent about talking either about connections or sessions? In particular, connections
seems wrong in this case, because it only starts counting after authentication is complete
(since otherwise we send no stats)? (This goes for both docs and actual function names)
Yes, that is true. I have changed "connections" to "sessions" and renamed the new
column "connections" to "session_count".
I think that most people will understand a session as started after a successful
connection.
* Is there a reason we're counting active and idle in transaction (including aborted),
but not fastpath? In particular, we seem to ignore fastpath -- if we don't want to single
it out specifically, it should probably be included in active?
The only reason is that I didn't think of it. Fixed.
* pgstat_send_connstat() but pgstat_recv_connection(). Let's call both connstat or both
connection (I'd vote connstat)?
Agreed, done.
* Is this actually a fix that's independent of the new stats? It seems in general to be changing the behaviour of "force", which is more generic? - !have_function_stats) + !have_function_stats && !force)
The comment right above that reads:
/* Don't expend a clock check if nothing to do */
So it is just a quick exit if there is nothing to do.
But with that patch we have something to do if "force" (see below) is true:
Report the remaining session duration and if the session was closed normally.
Thus the additional check.
* in pgstat_send_connstat() you pass the parameter "force" in as "disconnect".
That behaviour at least requires a comment saying why, I think. My understanding is
it relies on that "force" means this is a "backend is shutting down", but that is not
actually documented anywhere. Maybe the "force" parameter should actually be renamed
to indicate this is really what it means, to avoid a future mistake in the area?
But even with that, how does that turn into disconnect?
"pgstat_report_stat(true)" is only called from "pgstat_beshutdown_hook()", so
it is currently only called when the backend is about to exit.
According the the comments the flag means that "caller wants to force stats out".
I guess that the author thought that there may arise other reasons to force sending
statistics in the future (commit 641912b4d from 2007).
However, since that has not happened, I have renamed the flag to "disconnect" and
adapted the documentation. This doesn't change the current behavior, but establishes
a new rule.
* Maybe rename pgStatSessionDisconnected to pgStatSessionNormalDisconnected?
To avoid having to go back to the setting point and look it up in a comment.
Long, descriptive names are a good thing.
I have decided to use "pgStatSessionDisconnectedNormally", since that is even longer
and seems to fit the "yes or no" category better.
I wonder if there would also be a way to count "sessions that crashed" as well.
That is,the ones that failed in a way that caused the postmaster to restart the system.
But that's information we'd have to send from the postmaster, but I'm actually unsure
if we're "allowed" to send things to the stats collector from the postmaster.
But I think it could be quite useful information to have. Maybe we can find some way
to piggyback on the fact that we're restarting the stats collector as a result?
Sure, a crash count would be useful. I don't know if it is easy for the stats collector
to tell the difference between a start after a backend crash and - say - starting from
a base backup.
Patch v6 attached.
I think that that would be material for another patch, and I don't think it should go
to "pg_stat_database", because a) it might be hard to tell to which database the crashed
backend was attached, b) it might be a background process that doesn't belong to a database
and c) if the crash were caused by - say - corruption in a shared catalog, it would be
misleading.
Yours,
Laurenz Albe
Attachments:
0001-Add-session-statistics-to-pg_stat_database.v6.patchtext/x-patch; charset=UTF-8; name=0001-Add-session-statistics-to-pg_stat_database.v6.patchDownload
From 8feed416f91a5de9011616c1545156b9c8f28943 Mon Sep 17 00:00:00 2001
From: Laurenz Albe <laurenz.albe@cybertec.at>
Date: Fri, 20 Nov 2020 15:11:57 +0100
Subject: [PATCH] Add session statistics to pg_stat_database
If "track_counts" is active, track the following per database:
- total number of connections
- number of sessions that ended other than with a client disconnect
- total time spent in database sessions
- total time spent executing queries
- total idle in transaction time
This is useful to check if connection pooling is working.
It also helps to estimate the size of the connection pool
required to keep the database busy, which depends on the
percentage of the transaction time that is spent idling.
Discussion: https://postgr.es/m/b07e1f9953701b90c66ed368656f2aef40cac4fb.camel@cybertec.at
Reviewed-By: Soumyadeep Chakraborty, Justin Pryzby, Masahiro Ikeda, Magnus Hagander
(This requires a catversion bump, as well as an update to
PGSTAT_FILE_FORMAT_ID)
---
doc/src/sgml/monitoring.sgml | 49 +++++++++++
src/backend/catalog/system_views.sql | 5 ++
src/backend/postmaster/pgstat.c | 117 ++++++++++++++++++++++++++-
src/backend/tcop/postgres.c | 5 ++
src/backend/utils/adt/pgstatfuncs.c | 68 ++++++++++++++++
src/include/catalog/pg_proc.dat | 20 +++++
src/include/pgstat.h | 27 +++++++
src/test/regress/expected/rules.out | 5 ++
8 files changed, 292 insertions(+), 4 deletions(-)
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 98e1995453..21742ce81e 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -3704,6 +3704,55 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
</para></entry>
</row>
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>session_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent by database sessions in this database, in milliseconds
+ (note that statistics are only updated when the state of a session
+ changes, so if sessions have been idle for a long time, this idle time
+ won't be included)
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>active_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent executing SQL statements in this database, in milliseconds
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>idle_in_transaction_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent idling while in a transaction in this database, in milliseconds
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>session_count</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Total number of sessions established to this database
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>aborted_sessions</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of database sessions to this database that were terminated
+ by something else than a regular client disconnection
+ </para></entry>
+ </row>
+
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>stats_reset</structfield> <type>timestamp with time zone</type>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 2e4aa1c4b6..e941b9e0f6 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -924,6 +924,11 @@ CREATE VIEW pg_stat_database AS
pg_stat_get_db_checksum_last_failure(D.oid) AS checksum_last_failure,
pg_stat_get_db_blk_read_time(D.oid) AS blk_read_time,
pg_stat_get_db_blk_write_time(D.oid) AS blk_write_time,
+ pg_stat_get_db_session_time(D.oid) AS session_time,
+ pg_stat_get_db_active_time(D.oid) AS active_time,
+ pg_stat_get_db_idle_in_transaction_time(D.oid) AS idle_in_transaction_time,
+ pg_stat_get_db_session_count(D.oid) AS session_count,
+ pg_stat_get_db_aborted_sessions(D.oid) AS aborted_sessions,
pg_stat_get_db_stat_reset_time(D.oid) AS stats_reset
FROM (
SELECT 0 AS oid, NULL::name AS datname
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index e76e627c6b..d5be1cae4b 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -249,6 +249,9 @@ static int pgStatXactCommit = 0;
static int pgStatXactRollback = 0;
PgStat_Counter pgStatBlockReadTime = 0;
PgStat_Counter pgStatBlockWriteTime = 0;
+static PgStat_Counter pgStatActiveTime = 0;
+static PgStat_Counter pgStatTransactionIdleTime = 0;
+bool pgStatSessionDisconnectedNormally = false;
/* Record that's written to 2PC state file when pgstat state is persisted */
typedef struct TwoPhasePgStatRecord
@@ -334,6 +337,7 @@ static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
static void pgstat_send_funcstats(void);
static void pgstat_send_slru(void);
static HTAB *pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid);
+static void pgstat_send_connstats(bool disconnect, TimestampTz last_report);
static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared);
@@ -369,6 +373,7 @@ static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len);
static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len);
static void pgstat_recv_checksum_failure(PgStat_MsgChecksumFailure *msg, int len);
+static void pgstat_recv_connstat(PgStat_MsgConn *msg, int len);
static void pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len);
static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len);
@@ -844,10 +849,14 @@ allow_immediate_pgstat_restart(void)
* per-table and function usage statistics to the collector. Note that this
* is called only when not within a transaction, so it is fair to use
* transaction stop time as an approximation of current time.
+ *
+ * "disconnect" is "true" only for the last call before the backend
+ * exits. This makes sure that no data are lost and that interrupted
+ * sessions are reported correctly.
* ----------
*/
void
-pgstat_report_stat(bool force)
+pgstat_report_stat(bool disconnect)
{
/* we assume this inits to all zeroes: */
static const PgStat_TableCounts all_zeroes;
@@ -862,17 +871,22 @@ pgstat_report_stat(bool force)
/* Don't expend a clock check if nothing to do */
if ((pgStatTabList == NULL || pgStatTabList->tsa_used == 0) &&
pgStatXactCommit == 0 && pgStatXactRollback == 0 &&
- !have_function_stats)
+ !have_function_stats && !disconnect)
return;
/*
* Don't send a message unless it's been at least PGSTAT_STAT_INTERVAL
- * msec since we last sent one, or the caller wants to force stats out.
+ * msec since we last sent one, or the backend is about to exit.
*/
now = GetCurrentTransactionStopTimestamp();
- if (!force &&
+ if (!disconnect &&
!TimestampDifferenceExceeds(last_report, now, PGSTAT_STAT_INTERVAL))
return;
+
+ /* for backends, send connection statistics */
+ if (MyBackendType == B_BACKEND)
+ pgstat_send_connstats(disconnect, last_report);
+
last_report = now;
/*
@@ -1341,6 +1355,48 @@ pgstat_drop_relation(Oid relid)
#endif /* NOT_USED */
+/* ----------
+ * pgstat_send_connstats() -
+ *
+ * Tell the collector about session statistics.
+ * The parameter "disconnect" will be true when the backend exits.
+ * "last_report" is the last time we were called (0 if never).
+ * ----------
+ */
+static void
+pgstat_send_connstats(bool disconnect, TimestampTz last_report)
+{
+ PgStat_MsgConn msg;
+ long secs;
+ int usecs;
+
+ if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
+ return;
+
+ pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_CONNECTION);
+ msg.m_databaseid = MyDatabaseId;
+
+ /* session time since the last report */
+ TimestampDifference(((last_report == 0) ? MyStartTimestamp : last_report),
+ GetCurrentTimestamp(),
+ &secs, &usecs);
+ msg.m_session_time = secs * 1000000 + usecs;
+
+ msg.m_aborted = (!disconnect || pgStatSessionDisconnectedNormally) ? 0 : 1;
+
+ msg.m_active_time = pgStatActiveTime;
+ pgStatActiveTime = 0;
+
+ msg.m_idle_in_xact_time = pgStatTransactionIdleTime;
+ pgStatTransactionIdleTime = 0;
+
+ /* report a new session only the first time */
+ msg.m_count = (last_report == 0) ? 1 : 0;
+
+ pgstat_send(&msg, sizeof(PgStat_MsgConn));
+}
+
+
/* ----------
* pgstat_reset_counters() -
*
@@ -3327,6 +3383,30 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
}
current_timestamp = GetCurrentTimestamp();
+ /*
+ * If the state has changed from "active" or "idle in transaction",
+ * calculate the duration.
+ */
+ if ((beentry->st_state == STATE_RUNNING ||
+ beentry->st_state == STATE_FASTPATH ||
+ beentry->st_state == STATE_IDLEINTRANSACTION ||
+ beentry->st_state == STATE_IDLEINTRANSACTION_ABORTED) &&
+ state != beentry->st_state)
+ {
+ long secs;
+ int usecs;
+
+ TimestampDifference(beentry->st_state_start_timestamp,
+ current_timestamp,
+ &secs, &usecs);
+
+ if (beentry->st_state == STATE_RUNNING ||
+ beentry->st_state == STATE_FASTPATH)
+ pgStatActiveTime += secs * 1000000 + usecs;
+ else
+ pgStatTransactionIdleTime += secs * 1000000 + usecs;
+ }
+
/*
* Now update the status entry
*/
@@ -4879,6 +4959,10 @@ PgstatCollectorMain(int argc, char *argv[])
pgstat_recv_replslot(&msg.msg_replslot, len);
break;
+ case PGSTAT_MTYPE_CONNECTION:
+ pgstat_recv_connstat(&msg.msg_conn, len);
+ break;
+
default:
break;
}
@@ -4953,6 +5037,11 @@ reset_dbentry_counters(PgStat_StatDBEntry *dbentry)
dbentry->last_checksum_failure = 0;
dbentry->n_block_read_time = 0;
dbentry->n_block_write_time = 0;
+ dbentry->n_session_count = 0;
+ dbentry->n_session_time = 0;
+ dbentry->n_active_time = 0;
+ dbentry->n_idle_in_xact_time = 0;
+ dbentry->n_aborted = 0;
dbentry->stat_reset_timestamp = GetCurrentTimestamp();
dbentry->stats_timestamp = 0;
@@ -6903,6 +6992,26 @@ pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len)
}
}
+/* ----------
+ * pgstat_recv_connstat() -
+ *
+ * Process connection information.
+ * ----------
+ */
+static void
+pgstat_recv_connstat(PgStat_MsgConn *msg, int len)
+{
+ PgStat_StatDBEntry *dbentry;
+
+ dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
+
+ dbentry->n_session_count += msg->m_count;
+ dbentry->n_session_time += msg->m_session_time;
+ dbentry->n_active_time += msg->m_active_time;
+ dbentry->n_idle_in_xact_time += msg->m_idle_in_xact_time;
+ dbentry->n_aborted += msg->m_aborted;
+}
+
/* ----------
* pgstat_recv_tempfile() -
*
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 7c5f7c775b..514ee532db 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -4494,6 +4494,11 @@ PostgresMain(int argc, char *argv[],
* perform normal shutdown.
*/
case 'X':
+ /* report as normal client disconnection */
+ pgStatSessionDisconnectedNormally = true;
+
+ /* FALLTHROUGH */
+
case EOF:
/*
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index a210fc93b4..aaf2d77515 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1629,6 +1629,74 @@ pg_stat_get_db_blk_write_time(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(result);
}
+Datum
+pg_stat_get_db_session_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->n_session_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_active_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->n_active_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_idle_in_transaction_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->n_idle_in_xact_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_session_count(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_session_count);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_aborted_sessions(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_aborted);
+
+ PG_RETURN_INT64(result);
+}
+
Datum
pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS)
{
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 33dacfd340..9adba05701 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5441,6 +5441,26 @@
proname => 'pg_stat_get_db_blk_write_time', provolatile => 's',
proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
prosrc => 'pg_stat_get_db_blk_write_time' },
+{ oid => '9575', descr => 'statistics: session time, in seconds',
+ proname => 'pg_stat_get_db_session_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_session_time' },
+{ oid => '9576', descr => 'statistics: session active time, in seconds',
+ proname => 'pg_stat_get_db_active_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_active_time' },
+{ oid => '9577', descr => 'statistics: session idle in transaction time, in seconds',
+ proname => 'pg_stat_get_db_idle_in_transaction_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_idle_in_transaction_time' },
+{ oid => '9578', descr => 'statistics: total number of sessions',
+ proname => 'pg_stat_get_db_session_count', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_session_count' },
+{ oid => '9579', descr => 'statistics: number of aborted sessions',
+ proname => 'pg_stat_get_db_aborted_sessions', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_aborted_sessions' },
{ oid => '3195', descr => 'statistics: information about WAL archiver',
proname => 'pg_stat_get_archiver', proisstrict => 'f', provolatile => 's',
proparallel => 'r', prorettype => 'record', proargtypes => '',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 257e515bfe..5f1641632c 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -71,6 +71,7 @@ typedef enum StatMsgType
PGSTAT_MTYPE_DEADLOCK,
PGSTAT_MTYPE_CHECKSUMFAILURE,
PGSTAT_MTYPE_REPLSLOT,
+ PGSTAT_MTYPE_CONNECTION,
} StatMsgType;
/* ----------
@@ -619,6 +620,21 @@ typedef struct PgStat_MsgChecksumFailure
TimestampTz m_failure_time;
} PgStat_MsgChecksumFailure;
+/* ----------
+ * PgStat_MsgConn Sent by pgstat_connection to update connection statistics.
+ * ----------
+ */
+typedef struct PgStat_MsgConn
+{
+ PgStat_MsgHdr m_hdr;
+ Oid m_databaseid;
+ PgStat_Counter m_count;
+ PgStat_Counter m_session_time;
+ PgStat_Counter m_active_time;
+ PgStat_Counter m_idle_in_xact_time;
+ PgStat_Counter m_aborted;
+} PgStat_MsgConn;
+
/* ----------
* PgStat_Msg Union over all possible messages.
@@ -651,6 +667,7 @@ typedef union PgStat_Msg
PgStat_MsgTempFile msg_tempfile;
PgStat_MsgChecksumFailure msg_checksumfailure;
PgStat_MsgReplSlot msg_replslot;
+ PgStat_MsgConn msg_conn;
} PgStat_Msg;
@@ -693,6 +710,11 @@ typedef struct PgStat_StatDBEntry
TimestampTz last_checksum_failure;
PgStat_Counter n_block_read_time; /* times in microseconds */
PgStat_Counter n_block_write_time;
+ PgStat_Counter n_session_count;
+ PgStat_Counter n_session_time;
+ PgStat_Counter n_active_time;
+ PgStat_Counter n_idle_in_xact_time;
+ PgStat_Counter n_aborted;
TimestampTz stat_reset_timestamp;
TimestampTz stats_timestamp; /* time of db stats file update */
@@ -1347,6 +1369,11 @@ extern PgStat_MsgWal WalStats;
extern PgStat_Counter pgStatBlockReadTime;
extern PgStat_Counter pgStatBlockWriteTime;
+/*
+ * Updated in PostgresMain upon disconnect.
+ */
+extern bool pgStatSessionDisconnectedNormally;
+
/* ----------
* Functions called from postmaster
* ----------
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 097ff5d111..39d034ccab 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1848,6 +1848,11 @@ pg_stat_database| SELECT d.oid AS datid,
pg_stat_get_db_checksum_last_failure(d.oid) AS checksum_last_failure,
pg_stat_get_db_blk_read_time(d.oid) AS blk_read_time,
pg_stat_get_db_blk_write_time(d.oid) AS blk_write_time,
+ pg_stat_get_db_session_time(d.oid) AS session_time,
+ pg_stat_get_db_active_time(d.oid) AS active_time,
+ pg_stat_get_db_idle_in_transaction_time(d.oid) AS idle_in_transaction_time,
+ pg_stat_get_db_session_count(d.oid) AS session_count,
+ pg_stat_get_db_aborted_sessions(d.oid) AS aborted_sessions,
pg_stat_get_db_stat_reset_time(d.oid) AS stats_reset
FROM ( SELECT 0 AS oid,
NULL::name AS datname
--
2.26.2
On Fri, Nov 20, 2020 at 3:41 PM Laurenz Albe <laurenz.albe@cybertec.at>
wrote:
On Tue, 2020-11-17 at 17:33 +0100, Magnus Hagander wrote:
I've taken a look as well, and here are a few short notes:
Much appreciated!
Sorry about the delay in getting back to you on this one. FYI, while the
patch has been bumped to the next CF by now, I do intend to continue
working on it before that starts.
* It talks about "number of connections" but "number of aborted
sessions". We should probablybe consistent about talking either about connections or sessions? In
particular, connections
seems wrong in this case, because it only starts counting after
authentication is complete
(since otherwise we send no stats)? (This goes for both docs and
actual function names)
Yes, that is true. I have changed "connections" to "sessions" and renamed
the new
column "connections" to "session_count".I think that most people will understand a session as started after a
successful
connection.
Yeah, I agree, and as long as it's consistent we don't need more
explanations than that.
Further int he views, it's a bit strange to have session_count and
aborted_session, but I'm not sure what to suggest. "aborted_session_count"
seems too long. Maybe just "sessions" instead of "session_count" -- no
other counters actually have the "_count" suffix.
* Is this actually a fix that's independent of the new stats? It seems in
general to bechanging the behaviour of "force", which is more generic? - !have_function_stats) + !have_function_stats && !force)The comment right above that reads:
/* Don't expend a clock check if nothing to do */
So it is just a quick exit if there is nothing to do.But with that patch we have something to do if "force" (see below) is true:
Report the remaining session duration and if the session was closed
normally.Thus the additional check.
Ah yeah, makes sense. It becomes more clear with the rename.
* in pgstat_send_connstat() you pass the parameter "force" in as
"disconnect".That behaviour at least requires a comment saying why, I think. My
understanding is
it relies on that "force" means this is a "backend is shutting down",
but that is not
actually documented anywhere. Maybe the "force" parameter should
actually be renamed
to indicate this is really what it means, to avoid a future mistake in
the area?
But even with that, how does that turn into disconnect?
"pgstat_report_stat(true)" is only called from "pgstat_beshutdown_hook()",
so
it is currently only called when the backend is about to exit.According the the comments the flag means that "caller wants to force
stats out".
I guess that the author thought that there may arise other reasons to
force sending
statistics in the future (commit 641912b4d from 2007).However, since that has not happened, I have renamed the flag to
"disconnect" and
adapted the documentation. This doesn't change the current behavior, but
establishes
a new rule.
That makes it a lot more clear. And I agree, if nobody came up with a
reason since 2007, then we are free to repurpose it :)
* Maybe rename pgStatSessionDisconnected to
pgStatSessionNormalDisconnected?To avoid having to go back to the setting point and look it up in a
comment.
Long, descriptive names are a good thing.
I have decided to use "pgStatSessionDisconnectedNormally", since that is
even longer
and seems to fit the "yes or no" category better.
WFM.
I wonder if there would also be a way to count "sessions that crashed" as
well.That is,the ones that failed in a way that caused the postmaster to
restart the system.
But that's information we'd have to send from the postmaster, but I'm
actually unsure
if we're "allowed" to send things to the stats collector from the
postmaster.
But I think it could be quite useful information to have. Maybe we can
find some way
to piggyback on the fact that we're restarting the stats collector as a
result?
Sure, a crash count would be useful. I don't know if it is easy for the
stats collector
to tell the difference between a start after a backend crash and - say -
starting from
a base backup.Patch v6 attached.
I think that that would be material for another patch, and I don't think
it should go
to "pg_stat_database", because a) it might be hard to tell to which
database the crashed
backend was attached, b) it might be a background process that doesn't
belong to a database
and c) if the crash were caused by - say - corruption in a shared catalog,
it would be
misleading
I'm not sure it is outside the scope of this patch, because I think it
might be easier to do than I (and I think you) first thought. We don't need
to track which database crashed -- if we track all *other* ways a database
exits, then crashes are all that remains.
So in fact, we *almost* have all the data we need already. We have the
number of sessions started. We have the number of sessions "aborted". if we
also had the number of sessions that were closed normally, then whatever is
"left" would be the number of sessions crashed. And we do already, in your
patch, send the message in the case of both aborted and non-aborted
sessions. So we just need to keep track of both in the statsfile (which we
don't now), and we'd more or less have it, wouldn't we?
However, some thinking around that also leads me to another question which
is very much in scope for this patch regardless, which is what about
shutdown and admin termination. Right now, when you do a "pg_ctl stop" on
the database, all sessions count as aborted. Same thing for a
pg_terminate_backend(). I wonder if this is also a case that would be
useful to track as a separate thing? One could argue that the docs in your
patch say aborted means "terminated by something else than a regular client
disconnection". But that's true for a "shutdown", but not for a crash, so
whichever way we go with crashes it's slightly incorrect.
But thinking from a usability perspective, wouldn't what we want more be
something like <closed by correct disconnect>, <closed by abnormal
disconnect>, <closed by admin>, <crash>?
What do you think of adapting it to that?
Basically, that would change pgStatSessionDisconnectedNormally into instead
being an enum of reasons, which could be normal disconnect, abnormal
disconnect and admin. And we'd track all those three as separate numbers in
the stats file, meaning we could then calculate the crash by subtracting
all three from the total number of sessions?
(Let me know if you think the idea could work and would prefer it if I
worked up a complete suggestion based on it rather than just spitting ideas)
--
Magnus Hagander
Me: https://www.hagander.net/ <http://www.hagander.net/>
Work: https://www.redpill-linpro.com/ <http://www.redpill-linpro.com/>
On Tue, 2020-12-01 at 17:32 +0100, Magnus Hagander wrote:
I have changed "connections" to "sessions" and renamed the new
column "connections" to "session_count".I think that most people will understand a session as started after a successful
connection.Yeah, I agree, and as long as it's consistent we don't need more explanations than that.
Further int he views, it's a bit strange to have session_count and aborted_session, but I'm not
sure what to suggest. "aborted_session_count" seems too long. Maybe just "sessions" instead
of "session_count" -- no other counters actually have the "_count" suffix.
"sessions" is fine, I think; I changed the name.
I wonder if there would also be a way to count "sessions that crashed" as well.
That is,the ones that failed in a way that caused the postmaster to restart the system.Sure, a crash count would be useful. I don't know if it is easy for the stats collector
to tell the difference between a start after a backend crash and - say - starting from
a base backup.I think that that would be material for another patch, and I don't think it should go
to "pg_stat_database", because a) it might be hard to tell to which database the crashed
backend was attached, b) it might be a background process that doesn't belong to a database
and c) if the crash were caused by - say - corruption in a shared catalog, it would be
misleadingI'm not sure it is outside the scope of this patch, because I think it might be easier to
do than I (and I think you) first thought. We don't need to track which database crashed --
if we track all *other* ways a database exits, then crashes are all that remains.So in fact, we *almost* have all the data we need already. We have the number of sessions
started. We have the number of sessions "aborted". if we also had the number of sessions
that were closed normally, then whatever is "left" would be the number of sessions crashed.
And we do already, in your patch, send the message in the case of both aborted and
non-aborted sessions. So we just need to keep track of both in the statsfile
(which we don't now), and we'd more or less have it, wouldn't we?
There is one problem with that: the statistics collector is not guaranteed to get all
messages, right? If a disconnection statistics UDP datagram doesn't reach the statistics
collector, that connection
would end up being reported as crashed.
That would alarm people unnecessarily and make the crash statistics misleading.
However, some thinking around that also leads me to another question which is very much
in scope for this patch regardless, which is what about shutdown and admin termination.
Right now, when you do a "pg_ctl stop" on the database, all sessions count as aborted.
Same thing for a pg_terminate_backend(). I wonder if this is also a case that would be
useful to track as a separate thing? One could argue that the docs in your patch say
aborted means "terminated by something else than a regular client disconnection".
But that's true for a "shutdown", but not for a crash, so whichever way we go with crashes
it's slightly incorrect.
But thinking from a usability perspective, wouldn't what we want more be something
like <closed by correct disconnect>, <closed by abnormal disconnect>, <closed by admin>,
<crash>?What do you think of adapting it to that?
Basically, that would change pgStatSessionDisconnectedNormally into instead being an
enum of reasons, which could be normal disconnect, abnormal disconnect and admin.
And we'd track all those three as separate numbers in the stats file, meaning we could
then calculate the crash by subtracting all three from the total number of sessions?
I think at least "closed by admin" might be interesting; I'll have a look.
I don't think we have to specifically count "closed by normal disconnect", because
that should be the rule and could be more or less deduced from the other numbers
(with the uncertainty mentioned above).
(Let me know if you think the idea could work and would prefer it if I worked up a
complete suggestion based on it rather than just spitting ideas)
Thanks for the offer, and I'll get back to it if I get stuck.
But I'm ready to do the grunt work, so that you can spend your precious
committer cycles elsewhere :^)
I'll have a go at "closed by admin", meanwhile here is patch v7 with the renaming
"session_count -> sessions".
Yours,
Laurenz Albe
Attachments:
0001-Add-session-statistics-to-pg_stat_database.v7.patchtext/x-patch; charset=UTF-8; name=0001-Add-session-statistics-to-pg_stat_database.v7.patchDownload
From 8feed416f91a5de9011616c1545156b9c8f28943 Mon Sep 17 00:00:00 2001
From: Laurenz Albe <laurenz.albe@cybertec.at>
Date: Fri, 20 Nov 2020 15:11:57 +0100
Subject: [PATCH] Add session statistics to pg_stat_database
If "track_counts" is active, track the following per database:
- total number of connections
- number of sessions that ended other than with a client disconnect
- total time spent in database sessions
- total time spent executing queries
- total idle in transaction time
This is useful to check if connection pooling is working.
It also helps to estimate the size of the connection pool
required to keep the database busy, which depends on the
percentage of the transaction time that is spent idling.
Discussion: https://postgr.es/m/b07e1f9953701b90c66ed368656f2aef40cac4fb.camel@cybertec.at
Reviewed-By: Soumyadeep Chakraborty, Justin Pryzby, Masahiro Ikeda, Magnus Hagander
(This requires a catversion bump, as well as an update to
PGSTAT_FILE_FORMAT_ID)
---
doc/src/sgml/monitoring.sgml | 49 +++++++++++
src/backend/catalog/system_views.sql | 5 ++
src/backend/postmaster/pgstat.c | 117 ++++++++++++++++++++++++++-
src/backend/tcop/postgres.c | 5 ++
src/backend/utils/adt/pgstatfuncs.c | 68 ++++++++++++++++
src/include/catalog/pg_proc.dat | 20 +++++
src/include/pgstat.h | 27 +++++++
src/test/regress/expected/rules.out | 5 ++
8 files changed, 292 insertions(+), 4 deletions(-)
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 98e1995453..21742ce81e 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -3704,6 +3704,55 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
</para></entry>
</row>
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>session_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent by database sessions in this database, in milliseconds
+ (note that statistics are only updated when the state of a session
+ changes, so if sessions have been idle for a long time, this idle time
+ won't be included)
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>active_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent executing SQL statements in this database, in milliseconds
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>idle_in_transaction_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent idling while in a transaction in this database, in milliseconds
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>sessions</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Total number of sessions established to this database
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>aborted_sessions</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of database sessions to this database that were terminated
+ by something else than a regular client disconnection
+ </para></entry>
+ </row>
+
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>stats_reset</structfield> <type>timestamp with time zone</type>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 2e4aa1c4b6..e941b9e0f6 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -924,6 +924,11 @@ CREATE VIEW pg_stat_database AS
pg_stat_get_db_checksum_last_failure(D.oid) AS checksum_last_failure,
pg_stat_get_db_blk_read_time(D.oid) AS blk_read_time,
pg_stat_get_db_blk_write_time(D.oid) AS blk_write_time,
+ pg_stat_get_db_session_time(D.oid) AS session_time,
+ pg_stat_get_db_active_time(D.oid) AS active_time,
+ pg_stat_get_db_idle_in_transaction_time(D.oid) AS idle_in_transaction_time,
+ pg_stat_get_db_sessions(D.oid) AS sessions,
+ pg_stat_get_db_aborted_sessions(D.oid) AS aborted_sessions,
pg_stat_get_db_stat_reset_time(D.oid) AS stats_reset
FROM (
SELECT 0 AS oid, NULL::name AS datname
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index e76e627c6b..d5be1cae4b 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -249,6 +249,9 @@ static int pgStatXactCommit = 0;
static int pgStatXactRollback = 0;
PgStat_Counter pgStatBlockReadTime = 0;
PgStat_Counter pgStatBlockWriteTime = 0;
+static PgStat_Counter pgStatActiveTime = 0;
+static PgStat_Counter pgStatTransactionIdleTime = 0;
+bool pgStatSessionDisconnectedNormally = false;
/* Record that's written to 2PC state file when pgstat state is persisted */
typedef struct TwoPhasePgStatRecord
@@ -334,6 +337,7 @@ static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
static void pgstat_send_funcstats(void);
static void pgstat_send_slru(void);
static HTAB *pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid);
+static void pgstat_send_connstats(bool disconnect, TimestampTz last_report);
static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared);
@@ -369,6 +373,7 @@ static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len);
static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len);
static void pgstat_recv_checksum_failure(PgStat_MsgChecksumFailure *msg, int len);
+static void pgstat_recv_connstat(PgStat_MsgConn *msg, int len);
static void pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len);
static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len);
@@ -844,10 +849,14 @@ allow_immediate_pgstat_restart(void)
* per-table and function usage statistics to the collector. Note that this
* is called only when not within a transaction, so it is fair to use
* transaction stop time as an approximation of current time.
+ *
+ * "disconnect" is "true" only for the last call before the backend
+ * exits. This makes sure that no data are lost and that interrupted
+ * sessions are reported correctly.
* ----------
*/
void
-pgstat_report_stat(bool force)
+pgstat_report_stat(bool disconnect)
{
/* we assume this inits to all zeroes: */
static const PgStat_TableCounts all_zeroes;
@@ -862,17 +871,22 @@ pgstat_report_stat(bool force)
/* Don't expend a clock check if nothing to do */
if ((pgStatTabList == NULL || pgStatTabList->tsa_used == 0) &&
pgStatXactCommit == 0 && pgStatXactRollback == 0 &&
- !have_function_stats)
+ !have_function_stats && !disconnect)
return;
/*
* Don't send a message unless it's been at least PGSTAT_STAT_INTERVAL
- * msec since we last sent one, or the caller wants to force stats out.
+ * msec since we last sent one, or the backend is about to exit.
*/
now = GetCurrentTransactionStopTimestamp();
- if (!force &&
+ if (!disconnect &&
!TimestampDifferenceExceeds(last_report, now, PGSTAT_STAT_INTERVAL))
return;
+
+ /* for backends, send connection statistics */
+ if (MyBackendType == B_BACKEND)
+ pgstat_send_connstats(disconnect, last_report);
+
last_report = now;
/*
@@ -1341,6 +1355,48 @@ pgstat_drop_relation(Oid relid)
#endif /* NOT_USED */
+/* ----------
+ * pgstat_send_connstats() -
+ *
+ * Tell the collector about session statistics.
+ * The parameter "disconnect" will be true when the backend exits.
+ * "last_report" is the last time we were called (0 if never).
+ * ----------
+ */
+static void
+pgstat_send_connstats(bool disconnect, TimestampTz last_report)
+{
+ PgStat_MsgConn msg;
+ long secs;
+ int usecs;
+
+ if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
+ return;
+
+ pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_CONNECTION);
+ msg.m_databaseid = MyDatabaseId;
+
+ /* session time since the last report */
+ TimestampDifference(((last_report == 0) ? MyStartTimestamp : last_report),
+ GetCurrentTimestamp(),
+ &secs, &usecs);
+ msg.m_session_time = secs * 1000000 + usecs;
+
+ msg.m_aborted = (!disconnect || pgStatSessionDisconnectedNormally) ? 0 : 1;
+
+ msg.m_active_time = pgStatActiveTime;
+ pgStatActiveTime = 0;
+
+ msg.m_idle_in_xact_time = pgStatTransactionIdleTime;
+ pgStatTransactionIdleTime = 0;
+
+ /* report a new session only the first time */
+ msg.m_count = (last_report == 0) ? 1 : 0;
+
+ pgstat_send(&msg, sizeof(PgStat_MsgConn));
+}
+
+
/* ----------
* pgstat_reset_counters() -
*
@@ -3327,6 +3383,30 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
}
current_timestamp = GetCurrentTimestamp();
+ /*
+ * If the state has changed from "active" or "idle in transaction",
+ * calculate the duration.
+ */
+ if ((beentry->st_state == STATE_RUNNING ||
+ beentry->st_state == STATE_FASTPATH ||
+ beentry->st_state == STATE_IDLEINTRANSACTION ||
+ beentry->st_state == STATE_IDLEINTRANSACTION_ABORTED) &&
+ state != beentry->st_state)
+ {
+ long secs;
+ int usecs;
+
+ TimestampDifference(beentry->st_state_start_timestamp,
+ current_timestamp,
+ &secs, &usecs);
+
+ if (beentry->st_state == STATE_RUNNING ||
+ beentry->st_state == STATE_FASTPATH)
+ pgStatActiveTime += secs * 1000000 + usecs;
+ else
+ pgStatTransactionIdleTime += secs * 1000000 + usecs;
+ }
+
/*
* Now update the status entry
*/
@@ -4879,6 +4959,10 @@ PgstatCollectorMain(int argc, char *argv[])
pgstat_recv_replslot(&msg.msg_replslot, len);
break;
+ case PGSTAT_MTYPE_CONNECTION:
+ pgstat_recv_connstat(&msg.msg_conn, len);
+ break;
+
default:
break;
}
@@ -4953,6 +5037,11 @@ reset_dbentry_counters(PgStat_StatDBEntry *dbentry)
dbentry->last_checksum_failure = 0;
dbentry->n_block_read_time = 0;
dbentry->n_block_write_time = 0;
+ dbentry->n_sessions = 0;
+ dbentry->n_session_time = 0;
+ dbentry->n_active_time = 0;
+ dbentry->n_idle_in_xact_time = 0;
+ dbentry->n_aborted = 0;
dbentry->stat_reset_timestamp = GetCurrentTimestamp();
dbentry->stats_timestamp = 0;
@@ -6903,6 +6992,26 @@ pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len)
}
}
+/* ----------
+ * pgstat_recv_connstat() -
+ *
+ * Process connection information.
+ * ----------
+ */
+static void
+pgstat_recv_connstat(PgStat_MsgConn *msg, int len)
+{
+ PgStat_StatDBEntry *dbentry;
+
+ dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
+
+ dbentry->n_sessions += msg->m_count;
+ dbentry->n_session_time += msg->m_session_time;
+ dbentry->n_active_time += msg->m_active_time;
+ dbentry->n_idle_in_xact_time += msg->m_idle_in_xact_time;
+ dbentry->n_aborted += msg->m_aborted;
+}
+
/* ----------
* pgstat_recv_tempfile() -
*
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 7c5f7c775b..514ee532db 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -4494,6 +4494,11 @@ PostgresMain(int argc, char *argv[],
* perform normal shutdown.
*/
case 'X':
+ /* report as normal client disconnection */
+ pgStatSessionDisconnectedNormally = true;
+
+ /* FALLTHROUGH */
+
case EOF:
/*
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index a210fc93b4..aaf2d77515 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1629,6 +1629,74 @@ pg_stat_get_db_blk_write_time(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(result);
}
+Datum
+pg_stat_get_db_session_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->n_session_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_active_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->n_active_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_idle_in_transaction_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->n_idle_in_xact_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_sessions(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_sessions);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_aborted_sessions(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_aborted);
+
+ PG_RETURN_INT64(result);
+}
+
Datum
pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS)
{
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 33dacfd340..9adba05701 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5441,6 +5441,26 @@
proname => 'pg_stat_get_db_blk_write_time', provolatile => 's',
proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
prosrc => 'pg_stat_get_db_blk_write_time' },
+{ oid => '9575', descr => 'statistics: session time, in seconds',
+ proname => 'pg_stat_get_db_session_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_session_time' },
+{ oid => '9576', descr => 'statistics: session active time, in seconds',
+ proname => 'pg_stat_get_db_active_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_active_time' },
+{ oid => '9577', descr => 'statistics: session idle in transaction time, in seconds',
+ proname => 'pg_stat_get_db_idle_in_transaction_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_idle_in_transaction_time' },
+{ oid => '9578', descr => 'statistics: total number of sessions',
+ proname => 'pg_stat_get_db_sessions', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_sessions' },
+{ oid => '9579', descr => 'statistics: number of aborted sessions',
+ proname => 'pg_stat_get_db_aborted_sessions', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_aborted_sessions' },
{ oid => '3195', descr => 'statistics: information about WAL archiver',
proname => 'pg_stat_get_archiver', proisstrict => 'f', provolatile => 's',
proparallel => 'r', prorettype => 'record', proargtypes => '',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 257e515bfe..5f1641632c 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -71,6 +71,7 @@ typedef enum StatMsgType
PGSTAT_MTYPE_DEADLOCK,
PGSTAT_MTYPE_CHECKSUMFAILURE,
PGSTAT_MTYPE_REPLSLOT,
+ PGSTAT_MTYPE_CONNECTION,
} StatMsgType;
/* ----------
@@ -619,6 +620,21 @@ typedef struct PgStat_MsgChecksumFailure
TimestampTz m_failure_time;
} PgStat_MsgChecksumFailure;
+/* ----------
+ * PgStat_MsgConn Sent by pgstat_connection to update connection statistics.
+ * ----------
+ */
+typedef struct PgStat_MsgConn
+{
+ PgStat_MsgHdr m_hdr;
+ Oid m_databaseid;
+ PgStat_Counter m_count;
+ PgStat_Counter m_session_time;
+ PgStat_Counter m_active_time;
+ PgStat_Counter m_idle_in_xact_time;
+ PgStat_Counter m_aborted;
+} PgStat_MsgConn;
+
/* ----------
* PgStat_Msg Union over all possible messages.
@@ -651,6 +667,7 @@ typedef union PgStat_Msg
PgStat_MsgTempFile msg_tempfile;
PgStat_MsgChecksumFailure msg_checksumfailure;
PgStat_MsgReplSlot msg_replslot;
+ PgStat_MsgConn msg_conn;
} PgStat_Msg;
@@ -693,6 +710,11 @@ typedef struct PgStat_StatDBEntry
TimestampTz last_checksum_failure;
PgStat_Counter n_block_read_time; /* times in microseconds */
PgStat_Counter n_block_write_time;
+ PgStat_Counter n_sessions;
+ PgStat_Counter n_session_time;
+ PgStat_Counter n_active_time;
+ PgStat_Counter n_idle_in_xact_time;
+ PgStat_Counter n_aborted;
TimestampTz stat_reset_timestamp;
TimestampTz stats_timestamp; /* time of db stats file update */
@@ -1347,6 +1369,11 @@ extern PgStat_MsgWal WalStats;
extern PgStat_Counter pgStatBlockReadTime;
extern PgStat_Counter pgStatBlockWriteTime;
+/*
+ * Updated in PostgresMain upon disconnect.
+ */
+extern bool pgStatSessionDisconnectedNormally;
+
/* ----------
* Functions called from postmaster
* ----------
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 097ff5d111..39d034ccab 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1848,6 +1848,11 @@ pg_stat_database| SELECT d.oid AS datid,
pg_stat_get_db_checksum_last_failure(d.oid) AS checksum_last_failure,
pg_stat_get_db_blk_read_time(d.oid) AS blk_read_time,
pg_stat_get_db_blk_write_time(d.oid) AS blk_write_time,
+ pg_stat_get_db_session_time(d.oid) AS session_time,
+ pg_stat_get_db_active_time(d.oid) AS active_time,
+ pg_stat_get_db_idle_in_transaction_time(d.oid) AS idle_in_transaction_time,
+ pg_stat_get_db_sessions(d.oid) AS sessions,
+ pg_stat_get_db_aborted_sessions(d.oid) AS aborted_sessions,
pg_stat_get_db_stat_reset_time(d.oid) AS stats_reset
FROM ( SELECT 0 AS oid,
NULL::name AS datname
--
2.26.2
On Thu, 2020-12-03 at 13:22 +0100, Laurenz Albe wrote:
Basically, that would change pgStatSessionDisconnectedNormally into instead being an
enum of reasons, which could be normal disconnect, abnormal disconnect and admin.
And we'd track all those three as separate numbers in the stats file, meaning we could
then calculate the crash by subtracting all three from the total number of sessions?I think at least "closed by admin" might be interesting; I'll have a look.
I don't think we have to specifically count "closed by normal disconnect", because
that should be the rule and could be more or less deduced from the other numbers
(with the uncertainty mentioned above).(Let me know if you think the idea could work and would prefer it if I worked up a
complete suggestion based on it rather than just spitting ideas)Thanks for the offer, and I'll get back to it if I get stuck.
Ok, I could use a pointer.
I am considering the cases
1) client just went away (currently "aborted")
2) death by FATAL error
3) killed by the administrator (or shutdown)
What is a good place in the code to tell 2) or 3)
so that I can set the state accordingly?
Yours,
Laurenz Albe
On Fri, 2020-12-04 at 16:55 +0100, I wrote:
Basically, that would change pgStatSessionDisconnectedNormally into instead being an
enum of reasons, which could be normal disconnect, abnormal disconnect and admin.
And we'd track all those three as separate numbers in the stats file, meaning we could
then calculate the crash by subtracting all three from the total number of sessions?I think at least "closed by admin" might be interesting; I'll have a look.
I don't think we have to specifically count "closed by normal disconnect", because
that should be the rule and could be more or less deduced from the other numbers
(with the uncertainty mentioned above).I am considering the cases
1) client just went away (currently "aborted")
2) death by FATAL error
3) killed by the administrator (or shutdown)
I think I figured it out. Here is a patch along these lines.
I named the three counters "sessions_client_eof", "sessions_fatal" and
"sessions_killed", but I am not wedded to these bike shed colors.
Yours,
Laurenz Albe
Attachments:
0001-Add-session-statistics-to-pg_stat_database.v8.patchtext/x-patch; charset=UTF-8; name=0001-Add-session-statistics-to-pg_stat_database.v8.patchDownload
From 96c4aaa71ceff3ef83004780ac6dc3f5059302bb Mon Sep 17 00:00:00 2001
From: Laurenz Albe <laurenz.albe@cybertec.at>
Date: Sat, 5 Dec 2020 12:58:46 +0100
Subject: [PATCH] Add session statistics to pg_stat_database
If "track_counts" is active, track the following per database:
- total number of connections
- number of sessions that ended by loss of network connection,
fatal errors and operator intervention
- total time spent in database sessions
- total time spent executing queries
- total idle in transaction time
This is useful to check if connection pooling is working.
It also helps to estimate the size of the connection pool
required to keep the database busy, which depends on the
percentage of the transaction time that is spent idling.
Discussion: https://postgr.es/m/b07e1f9953701b90c66ed368656f2aef40cac4fb.camel@cybertec.at
Reviewed-By: Soumyadeep Chakraborty, Justin Pryzby, Masahiro Ikeda, Magnus Hagander
(This requires a catversion bump, as well as an update to
PGSTAT_FILE_FORMAT_ID)
---
doc/src/sgml/monitoring.sgml | 69 ++++++++++++++
src/backend/catalog/system_views.sql | 7 ++
src/backend/postmaster/pgstat.c | 134 ++++++++++++++++++++++++++-
src/backend/tcop/postgres.c | 20 ++--
src/backend/utils/adt/pgstatfuncs.c | 94 +++++++++++++++++++
src/backend/utils/error/elog.c | 9 ++
src/include/catalog/pg_proc.dat | 28 ++++++
src/include/pgstat.h | 37 ++++++++
src/test/regress/expected/rules.out | 7 ++
9 files changed, 395 insertions(+), 10 deletions(-)
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 52a69a5366..9bc1acf841 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -3731,6 +3731,75 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
</para></entry>
</row>
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>session_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent by database sessions in this database, in milliseconds
+ (note that statistics are only updated when the state of a session
+ changes, so if sessions have been idle for a long time, this idle time
+ won't be included)
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>active_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent executing SQL statements in this database, in milliseconds
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>idle_in_transaction_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent idling while in a transaction in this database, in milliseconds
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>sessions</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Total number of sessions established to this database
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>sessions_client_eof</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of database sessions to this database that were terminated
+ because connection to the client was lost
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>sessions_fatal</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of database sessions to this database that were terminated
+ by fatal errors
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>sessions_killed</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of database sessions to this database that were terminated
+ by operator intervention
+ </para></entry>
+ </row>
+
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>stats_reset</structfield> <type>timestamp with time zone</type>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index b140c210bc..1b022a114f 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -924,6 +924,13 @@ CREATE VIEW pg_stat_database AS
pg_stat_get_db_checksum_last_failure(D.oid) AS checksum_last_failure,
pg_stat_get_db_blk_read_time(D.oid) AS blk_read_time,
pg_stat_get_db_blk_write_time(D.oid) AS blk_write_time,
+ pg_stat_get_db_session_time(D.oid) AS session_time,
+ pg_stat_get_db_active_time(D.oid) AS active_time,
+ pg_stat_get_db_idle_in_transaction_time(D.oid) AS idle_in_transaction_time,
+ pg_stat_get_db_sessions(D.oid) AS sessions,
+ pg_stat_get_db_sessions_client_eof(D.oid) AS sessions_client_eof,
+ pg_stat_get_db_sessions_fatal(D.oid) AS sessions_fatal,
+ pg_stat_get_db_sessions_killed(D.oid) AS sessions_killed,
pg_stat_get_db_stat_reset_time(D.oid) AS stats_reset
FROM (
SELECT 0 AS oid, NULL::name AS datname
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 9bad14981b..6bd3fe0bf2 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -258,6 +258,9 @@ static int pgStatXactCommit = 0;
static int pgStatXactRollback = 0;
PgStat_Counter pgStatBlockReadTime = 0;
PgStat_Counter pgStatBlockWriteTime = 0;
+static PgStat_Counter pgStatActiveTime = 0;
+static PgStat_Counter pgStatTransactionIdleTime = 0;
+sessionEndType pgStatSessionEndCause = DISCONNECT_NORMAL;
/* Record that's written to 2PC state file when pgstat state is persisted */
typedef struct TwoPhasePgStatRecord
@@ -343,6 +346,7 @@ static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
static void pgstat_send_funcstats(void);
static void pgstat_send_slru(void);
static HTAB *pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid);
+static void pgstat_send_connstats(bool disconnect, TimestampTz last_report);
static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared);
@@ -378,6 +382,7 @@ static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len);
static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len);
static void pgstat_recv_checksum_failure(PgStat_MsgChecksumFailure *msg, int len);
+static void pgstat_recv_connstat(PgStat_MsgConn *msg, int len);
static void pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len);
static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len);
@@ -853,10 +858,14 @@ allow_immediate_pgstat_restart(void)
* per-table and function usage statistics to the collector. Note that this
* is called only when not within a transaction, so it is fair to use
* transaction stop time as an approximation of current time.
+ *
+ * "disconnect" is "true" only for the last call before the backend
+ * exits. This makes sure that no data are lost and that interrupted
+ * sessions are reported correctly.
* ----------
*/
void
-pgstat_report_stat(bool force)
+pgstat_report_stat(bool disconnect)
{
/* we assume this inits to all zeroes: */
static const PgStat_TableCounts all_zeroes;
@@ -871,17 +880,22 @@ pgstat_report_stat(bool force)
/* Don't expend a clock check if nothing to do */
if ((pgStatTabList == NULL || pgStatTabList->tsa_used == 0) &&
pgStatXactCommit == 0 && pgStatXactRollback == 0 &&
- !have_function_stats)
+ !have_function_stats && !disconnect)
return;
/*
* Don't send a message unless it's been at least PGSTAT_STAT_INTERVAL
- * msec since we last sent one, or the caller wants to force stats out.
+ * msec since we last sent one, or the backend is about to exit.
*/
now = GetCurrentTransactionStopTimestamp();
- if (!force &&
+ if (!disconnect &&
!TimestampDifferenceExceeds(last_report, now, PGSTAT_STAT_INTERVAL))
return;
+
+ /* for backends, send connection statistics */
+ if (MyBackendType == B_BACKEND)
+ pgstat_send_connstats(disconnect, last_report);
+
last_report = now;
/*
@@ -1350,6 +1364,49 @@ pgstat_drop_relation(Oid relid)
#endif /* NOT_USED */
+/* ----------
+ * pgstat_send_connstats() -
+ *
+ * Tell the collector about session statistics.
+ * The parameter "disconnect" will be true when the backend exits.
+ * "last_report" is the last time we were called (0 if never).
+ * ----------
+ */
+static void
+pgstat_send_connstats(bool disconnect, TimestampTz last_report)
+{
+ PgStat_MsgConn msg;
+ long secs;
+ int usecs;
+
+ if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
+ return;
+
+ pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_CONNECTION);
+ msg.m_databaseid = MyDatabaseId;
+
+ /* session time since the last report */
+ TimestampDifference(((last_report == 0) ? MyStartTimestamp : last_report),
+ GetCurrentTimestamp(),
+ &secs, &usecs);
+ msg.m_session_time = secs * 1000000 + usecs;
+
+ if (disconnect)
+ msg.m_disconnect = pgStatSessionEndCause;
+
+ msg.m_active_time = pgStatActiveTime;
+ pgStatActiveTime = 0;
+
+ msg.m_idle_in_xact_time = pgStatTransactionIdleTime;
+ pgStatTransactionIdleTime = 0;
+
+ /* report a new session only the first time */
+ msg.m_count = (last_report == 0) ? 1 : 0;
+
+ pgstat_send(&msg, sizeof(PgStat_MsgConn));
+}
+
+
/* ----------
* pgstat_reset_counters() -
*
@@ -3343,6 +3400,30 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
}
current_timestamp = GetCurrentTimestamp();
+ /*
+ * If the state has changed from "active" or "idle in transaction",
+ * calculate the duration.
+ */
+ if ((beentry->st_state == STATE_RUNNING ||
+ beentry->st_state == STATE_FASTPATH ||
+ beentry->st_state == STATE_IDLEINTRANSACTION ||
+ beentry->st_state == STATE_IDLEINTRANSACTION_ABORTED) &&
+ state != beentry->st_state)
+ {
+ long secs;
+ int usecs;
+
+ TimestampDifference(beentry->st_state_start_timestamp,
+ current_timestamp,
+ &secs, &usecs);
+
+ if (beentry->st_state == STATE_RUNNING ||
+ beentry->st_state == STATE_FASTPATH)
+ pgStatActiveTime += secs * 1000000 + usecs;
+ else
+ pgStatTransactionIdleTime += secs * 1000000 + usecs;
+ }
+
/*
* Now update the status entry
*/
@@ -4914,6 +4995,10 @@ PgstatCollectorMain(int argc, char *argv[])
pgstat_recv_replslot(&msg.msg_replslot, len);
break;
+ case PGSTAT_MTYPE_CONNECTION:
+ pgstat_recv_connstat(&msg.msg_conn, len);
+ break;
+
default:
break;
}
@@ -4988,6 +5073,13 @@ reset_dbentry_counters(PgStat_StatDBEntry *dbentry)
dbentry->last_checksum_failure = 0;
dbentry->n_block_read_time = 0;
dbentry->n_block_write_time = 0;
+ dbentry->n_sessions = 0;
+ dbentry->n_session_time = 0;
+ dbentry->n_active_time = 0;
+ dbentry->n_idle_in_xact_time = 0;
+ dbentry->n_sessions_client_eof = 0;
+ dbentry->n_sessions_fatal = 0;
+ dbentry->n_sessions_killed = 0;
dbentry->stat_reset_timestamp = GetCurrentTimestamp();
dbentry->stats_timestamp = 0;
@@ -6941,6 +7033,40 @@ pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len)
}
}
+/* ----------
+ * pgstat_recv_connstat() -
+ *
+ * Process connection information.
+ * ----------
+ */
+static void
+pgstat_recv_connstat(PgStat_MsgConn *msg, int len)
+{
+ PgStat_StatDBEntry *dbentry;
+
+ dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
+
+ dbentry->n_sessions += msg->m_count;
+ dbentry->n_session_time += msg->m_session_time;
+ dbentry->n_active_time += msg->m_active_time;
+ dbentry->n_idle_in_xact_time += msg->m_idle_in_xact_time;
+ switch (msg->m_disconnect)
+ {
+ case DISCONNECT_NORMAL:
+ /* we don't collect these */
+ break;
+ case DISCONNECT_CLIENT_EOF:
+ ++(dbentry->n_sessions_client_eof);
+ break;
+ case DISCONNECT_FATAL:
+ ++(dbentry->n_sessions_fatal);
+ break;
+ case DISCONNECT_KILLED:
+ ++(dbentry->n_sessions_killed);
+ break;
+ }
+}
+
/* ----------
* pgstat_recv_tempfile() -
*
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 3679799e50..385041f157 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -2834,6 +2834,9 @@ die(SIGNAL_ARGS)
ProcDiePending = true;
}
+ /* for the statistics collector */
+ pgStatSessionEndCause = DISCONNECT_KILLED;
+
/* If we're still here, waken anything waiting on the process latch */
SetLatch(MyLatch);
@@ -4496,13 +4499,18 @@ PostgresMain(int argc, char *argv[],
send_ready_for_query = true;
break;
- /*
- * 'X' means that the frontend is closing down the socket. EOF
- * means unexpected loss of frontend connection. Either way,
- * perform normal shutdown.
- */
- case 'X':
+ /*
+ * 'X' means that the frontend is closing down the socket. EOF
+ * means unexpected loss of frontend connection. Either way,
+ * perform normal shutdown.
+ */
case EOF:
+ /* for the statistics collector */
+ pgStatSessionEndCause = DISCONNECT_CLIENT_EOF;
+
+ /* FALLTHROUGH */
+
+ case 'X':
/*
* Reset whereToSendOutput to prevent ereport from attempting
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 6afe1b6f56..114293c3c9 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1629,6 +1629,100 @@ pg_stat_get_db_blk_write_time(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(result);
}
+Datum
+pg_stat_get_db_session_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->n_session_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_active_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->n_active_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_idle_in_transaction_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->n_idle_in_xact_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_sessions(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_sessions);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_sessions_client_eof(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_sessions_client_eof);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_sessions_fatal(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_sessions_fatal);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_sessions_killed(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_sessions_killed);
+
+ PG_RETURN_INT64(result);
+}
+
Datum
pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS)
{
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index 3558e660c7..7830a408a7 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -72,6 +72,7 @@
#include "libpq/pqformat.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
+#include "pgstat.h"
#include "postmaster/bgworker.h"
#include "postmaster/postmaster.h"
#include "postmaster/syslogger.h"
@@ -651,6 +652,14 @@ errfinish(const char *filename, int lineno, const char *funcname)
fflush(stdout);
fflush(stderr);
+ /*
+ * Let the statistics collector know.
+ * Only mark the session as terminated by fatal error if there is
+ * no other known cause.
+ */
+ if (pgStatSessionEndCause == DISCONNECT_NORMAL)
+ pgStatSessionEndCause = DISCONNECT_FATAL;
+
/*
* Do normal process-exit cleanup, then return exit code 1 to indicate
* FATAL termination. The postmaster may or may not consider this
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index fc2202b843..da8062fd04 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5438,6 +5438,34 @@
proname => 'pg_stat_get_db_blk_write_time', provolatile => 's',
proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
prosrc => 'pg_stat_get_db_blk_write_time' },
+{ oid => '9575', descr => 'statistics: session time, in seconds',
+ proname => 'pg_stat_get_db_session_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_session_time' },
+{ oid => '9576', descr => 'statistics: session active time, in seconds',
+ proname => 'pg_stat_get_db_active_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_active_time' },
+{ oid => '9577', descr => 'statistics: session idle in transaction time, in seconds',
+ proname => 'pg_stat_get_db_idle_in_transaction_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_idle_in_transaction_time' },
+{ oid => '9578', descr => 'statistics: total number of sessions',
+ proname => 'pg_stat_get_db_sessions', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_sessions' },
+{ oid => '9579', descr => 'statistics: number of sessions disconnected by the client closing the network connection',
+ proname => 'pg_stat_get_db_sessions_client_eof', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_sessions_client_eof' },
+{ oid => '9580', descr => 'statistics: number of sessions disconnected by fatal errors',
+ proname => 'pg_stat_get_db_sessions_fatal', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_sessions_fatal' },
+{ oid => '9581', descr => 'statistics: number of sessions killed by administrative action',
+ proname => 'pg_stat_get_db_sessions_killed', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_sessions_killed' },
{ oid => '3195', descr => 'statistics: information about WAL archiver',
proname => 'pg_stat_get_archiver', proisstrict => 'f', provolatile => 's',
proparallel => 'r', prorettype => 'record', proargtypes => '',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 5954068dec..d1d6692079 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -41,6 +41,14 @@ typedef enum TrackFunctionsLevel
TRACK_FUNC_ALL
} TrackFunctionsLevel;
+/* Values to track the cause of session termination */
+typedef enum sessionEndType {
+ DISCONNECT_NORMAL,
+ DISCONNECT_CLIENT_EOF,
+ DISCONNECT_FATAL,
+ DISCONNECT_KILLED
+} sessionEndType;
+
/* ----------
* The types of backend -> collector messages
* ----------
@@ -71,6 +79,7 @@ typedef enum StatMsgType
PGSTAT_MTYPE_DEADLOCK,
PGSTAT_MTYPE_CHECKSUMFAILURE,
PGSTAT_MTYPE_REPLSLOT,
+ PGSTAT_MTYPE_CONNECTION,
} StatMsgType;
/* ----------
@@ -622,6 +631,21 @@ typedef struct PgStat_MsgChecksumFailure
TimestampTz m_failure_time;
} PgStat_MsgChecksumFailure;
+/* ----------
+ * PgStat_MsgConn Sent by pgstat_connection to update connection statistics.
+ * ----------
+ */
+typedef struct PgStat_MsgConn
+{
+ PgStat_MsgHdr m_hdr;
+ Oid m_databaseid;
+ PgStat_Counter m_count;
+ PgStat_Counter m_session_time;
+ PgStat_Counter m_active_time;
+ PgStat_Counter m_idle_in_xact_time;
+ sessionEndType m_disconnect;
+} PgStat_MsgConn;
+
/* ----------
* PgStat_Msg Union over all possible messages.
@@ -654,6 +678,7 @@ typedef union PgStat_Msg
PgStat_MsgTempFile msg_tempfile;
PgStat_MsgChecksumFailure msg_checksumfailure;
PgStat_MsgReplSlot msg_replslot;
+ PgStat_MsgConn msg_conn;
} PgStat_Msg;
@@ -696,6 +721,13 @@ typedef struct PgStat_StatDBEntry
TimestampTz last_checksum_failure;
PgStat_Counter n_block_read_time; /* times in microseconds */
PgStat_Counter n_block_write_time;
+ PgStat_Counter n_sessions;
+ PgStat_Counter n_session_time;
+ PgStat_Counter n_active_time;
+ PgStat_Counter n_idle_in_xact_time;
+ PgStat_Counter n_sessions_client_eof;
+ PgStat_Counter n_sessions_fatal;
+ PgStat_Counter n_sessions_killed;
TimestampTz stat_reset_timestamp;
TimestampTz stats_timestamp; /* time of db stats file update */
@@ -1353,6 +1385,11 @@ extern PgStat_MsgWal WalStats;
extern PgStat_Counter pgStatBlockReadTime;
extern PgStat_Counter pgStatBlockWriteTime;
+/*
+ * Updated by the traffic cop and in errfinish()
+ */
+extern sessionEndType pgStatSessionEndCause;
+
/* ----------
* Functions called from postmaster
* ----------
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 6293ab57bc..6fd7e194ba 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1848,6 +1848,13 @@ pg_stat_database| SELECT d.oid AS datid,
pg_stat_get_db_checksum_last_failure(d.oid) AS checksum_last_failure,
pg_stat_get_db_blk_read_time(d.oid) AS blk_read_time,
pg_stat_get_db_blk_write_time(d.oid) AS blk_write_time,
+ pg_stat_get_db_session_time(d.oid) AS session_time,
+ pg_stat_get_db_active_time(d.oid) AS active_time,
+ pg_stat_get_db_idle_in_transaction_time(d.oid) AS idle_in_transaction_time,
+ pg_stat_get_db_sessions(d.oid) AS sessions,
+ pg_stat_get_db_sessions_client_eof(d.oid) AS sessions_client_eof,
+ pg_stat_get_db_sessions_fatal(d.oid) AS sessions_fatal,
+ pg_stat_get_db_sessions_killed(d.oid) AS sessions_killed,
pg_stat_get_db_stat_reset_time(d.oid) AS stats_reset
FROM ( SELECT 0 AS oid,
NULL::name AS datname
--
2.26.2
On Sat, Dec 5, 2020 at 1:04 PM Laurenz Albe <laurenz.albe@cybertec.at>
wrote:
On Fri, 2020-12-04 at 16:55 +0100, I wrote:
Basically, that would change pgStatSessionDisconnectedNormally into
instead being an
enum of reasons, which could be normal disconnect, abnormal
disconnect and admin.
And we'd track all those three as separate numbers in the stats
file, meaning we could
then calculate the crash by subtracting all three from the total
number of sessions?
I think at least "closed by admin" might be interesting; I'll have a
look.
I don't think we have to specifically count "closed by normal
disconnect", because
that should be the rule and could be more or less deduced from the
other numbers
(with the uncertainty mentioned above).
I am considering the cases
1) client just went away (currently "aborted")
2) death by FATAL error
3) killed by the administrator (or shutdown)I think I figured it out. Here is a patch along these lines.
I named the three counters "sessions_client_eof", "sessions_fatal" and
"sessions_killed", but I am not wedded to these bike shed colors.
Maybe we should, in honor of the bikeshed, we should call them
sessions_blue, sessions_green etc :)
In true bikeshedding mode, I'm not entirely happy with sessions_client_eof,
but I'm also not sure I have a better suggestion. Maybe just
"sessions_lost" or "sessions_connlost", which is basically the terminology
that the documentation uses? Maybe it's just me, but I don't really like
the eof terminology here.
What do you think about that? Or does somebody else have an opinion here?
Aside from that bikeshedding, I think this version looks very good!
In today's dept of small things I noticed:
+ if (disconnect)
+ msg.m_disconnect = pgStatSessionEndCause;
in the non-disconnect state, that variable is left uninitialized, isn't
it? It does end up getting ignored later, but to be more future proof the
enum should probably have a value specifically for "not disconnected yet"?
+ case DISCONNECT_CLIENT_EOF:
+ ++(dbentry->n_sessions_client_eof);
+ break;
The normal syntax we'd use for that would be
dbentry->n_sessions_client_eof++;
+ typedef enum sessionEndType {
To be consistent with the other enums in the same place, seems this should
be SessionEndType.
--
Magnus Hagander
Me: https://www.hagander.net/ <http://www.hagander.net/>
Work: https://www.redpill-linpro.com/ <http://www.redpill-linpro.com/>
On Sun, 2020-12-13 at 17:49 +0100, Magnus Hagander wrote:
I am considering the cases
1) client just went away (currently "aborted")
2) death by FATAL error
3) killed by the administrator (or shutdown)I named the three counters "sessions_client_eof", "sessions_fatal" and
"sessions_killed", but I am not wedded to these bike shed colors.In true bikeshedding mode, I'm not entirely happy with sessions_client_eof,
but I'm also not sure I have a better suggestion. Maybe just "sessions_lost"
or "sessions_connlost", which is basically the terminology that the documentation uses?
Maybe it's just me, but I don't really like the eof terminology here.What do you think about that? Or does somebody else have an opinion here?
I slept over it, and came up with "sessions_abandoned".
In today's dept of small things I noticed:
+ if (disconnect) + msg.m_disconnect = pgStatSessionEndCause;in the non-disconnect state, that variable is left uninitialized, isn't it?
It does end up getting ignored later, but to be more future proof the enum should probably
have a value specifically for "not disconnected yet"?
Yes. I named it DISCONNECT_NOT_YET.
+ case DISCONNECT_CLIENT_EOF: + ++(dbentry->n_sessions_client_eof); + break;The normal syntax we'd use for that would be
dbentry->n_sessions_client_eof++;
Ok, changed.
+ typedef enum sessionEndType {
To be consistent with the other enums in the same place, seems this should be SessionEndType.
True. I have renamed the type.
Attached is patch version 9.
Added goodie: I ran pgindent on it.
Yours,
Laurenz Albe
Attachments:
0001-Add-session-statistics-to-pg_stat_database.v9.patchtext/x-patch; charset=UTF-8; name=0001-Add-session-statistics-to-pg_stat_database.v9.patchDownload
From b40e34141c80ff59c0005f430bd8c273918eb7bb Mon Sep 17 00:00:00 2001
From: Laurenz Albe <laurenz.albe@cybertec.at>
Date: Tue, 15 Dec 2020 13:46:44 +0100
Subject: [PATCH] Add session statistics to pg_stat_database
If "track_counts" is active, track the following per database:
- total number of connections
- number of sessions that ended by loss of network connection,
fatal errors and operator intervention
- total time spent in database sessions
- total time spent executing queries
- total idle in transaction time
This is useful to check if connection pooling is working.
It also helps to estimate the size of the connection pool
required to keep the database busy, which depends on the
percentage of the transaction time that is spent idling.
Discussion: https://postgr.es/m/b07e1f9953701b90c66ed368656f2aef40cac4fb.camel@cybertec.at
Reviewed-By: Soumyadeep Chakraborty, Justin Pryzby, Masahiro Ikeda, Magnus Hagander
(This requires a catversion bump, as well as an update to PGSTAT_FILE_FORMAT_ID)
---
doc/src/sgml/monitoring.sgml | 69 ++++++++++++++
src/backend/catalog/system_views.sql | 7 ++
src/backend/postmaster/pgstat.c | 134 ++++++++++++++++++++++++++-
src/backend/tcop/postgres.c | 10 +-
src/backend/utils/adt/pgstatfuncs.c | 94 +++++++++++++++++++
src/backend/utils/error/elog.c | 8 ++
src/include/catalog/pg_proc.dat | 28 ++++++
src/include/pgstat.h | 39 ++++++++
src/test/regress/expected/rules.out | 7 ++
9 files changed, 391 insertions(+), 5 deletions(-)
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 52a69a5366..6206fefec0 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -3731,6 +3731,75 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
</para></entry>
</row>
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>session_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent by database sessions in this database, in milliseconds
+ (note that statistics are only updated when the state of a session
+ changes, so if sessions have been idle for a long time, this idle time
+ won't be included)
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>active_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent executing SQL statements in this database, in milliseconds
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>idle_in_transaction_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent idling while in a transaction in this database, in milliseconds
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>sessions</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Total number of sessions established to this database
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>sessions_abandoned</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of database sessions to this database that were terminated
+ because connection to the client was lost
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>sessions_fatal</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of database sessions to this database that were terminated
+ by fatal errors
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>sessions_killed</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of database sessions to this database that were terminated
+ by operator intervention
+ </para></entry>
+ </row>
+
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>stats_reset</structfield> <type>timestamp with time zone</type>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index b140c210bc..3a2569b135 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -924,6 +924,13 @@ CREATE VIEW pg_stat_database AS
pg_stat_get_db_checksum_last_failure(D.oid) AS checksum_last_failure,
pg_stat_get_db_blk_read_time(D.oid) AS blk_read_time,
pg_stat_get_db_blk_write_time(D.oid) AS blk_write_time,
+ pg_stat_get_db_session_time(D.oid) AS session_time,
+ pg_stat_get_db_active_time(D.oid) AS active_time,
+ pg_stat_get_db_idle_in_transaction_time(D.oid) AS idle_in_transaction_time,
+ pg_stat_get_db_sessions(D.oid) AS sessions,
+ pg_stat_get_db_sessions_abandoned(D.oid) AS sessions_abandoned,
+ pg_stat_get_db_sessions_fatal(D.oid) AS sessions_fatal,
+ pg_stat_get_db_sessions_killed(D.oid) AS sessions_killed,
pg_stat_get_db_stat_reset_time(D.oid) AS stats_reset
FROM (
SELECT 0 AS oid, NULL::name AS datname
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 7c75a25d21..a2337b78f1 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -258,6 +258,9 @@ static int pgStatXactCommit = 0;
static int pgStatXactRollback = 0;
PgStat_Counter pgStatBlockReadTime = 0;
PgStat_Counter pgStatBlockWriteTime = 0;
+static PgStat_Counter pgStatActiveTime = 0;
+static PgStat_Counter pgStatTransactionIdleTime = 0;
+SessionEndType pgStatSessionEndCause = DISCONNECT_NORMAL;
/* Record that's written to 2PC state file when pgstat state is persisted */
typedef struct TwoPhasePgStatRecord
@@ -343,6 +346,7 @@ static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
static void pgstat_send_funcstats(void);
static void pgstat_send_slru(void);
static HTAB *pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid);
+static void pgstat_send_connstats(bool disconnect, TimestampTz last_report);
static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared);
@@ -378,6 +382,7 @@ static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len);
static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len);
static void pgstat_recv_checksum_failure(PgStat_MsgChecksumFailure *msg, int len);
+static void pgstat_recv_connstat(PgStat_MsgConn * msg, int len);
static void pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len);
static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len);
@@ -855,10 +860,14 @@ allow_immediate_pgstat_restart(void)
* per-table and function usage statistics to the collector. Note that this
* is called only when not within a transaction, so it is fair to use
* transaction stop time as an approximation of current time.
+ *
+ * "disconnect" is "true" only for the last call before the backend
+ * exits. This makes sure that no data are lost and that interrupted
+ * sessions are reported correctly.
* ----------
*/
void
-pgstat_report_stat(bool force)
+pgstat_report_stat(bool disconnect)
{
/* we assume this inits to all zeroes: */
static const PgStat_TableCounts all_zeroes;
@@ -873,17 +882,22 @@ pgstat_report_stat(bool force)
/* Don't expend a clock check if nothing to do */
if ((pgStatTabList == NULL || pgStatTabList->tsa_used == 0) &&
pgStatXactCommit == 0 && pgStatXactRollback == 0 &&
- !have_function_stats)
+ !have_function_stats && !disconnect)
return;
/*
* Don't send a message unless it's been at least PGSTAT_STAT_INTERVAL
- * msec since we last sent one, or the caller wants to force stats out.
+ * msec since we last sent one, or the backend is about to exit.
*/
now = GetCurrentTransactionStopTimestamp();
- if (!force &&
+ if (!disconnect &&
!TimestampDifferenceExceeds(last_report, now, PGSTAT_STAT_INTERVAL))
return;
+
+ /* for backends, send connection statistics */
+ if (MyBackendType == B_BACKEND)
+ pgstat_send_connstats(disconnect, last_report);
+
last_report = now;
/*
@@ -1352,6 +1366,48 @@ pgstat_drop_relation(Oid relid)
#endif /* NOT_USED */
+/* ----------
+ * pgstat_send_connstats() -
+ *
+ * Tell the collector about session statistics.
+ * The parameter "disconnect" will be true when the backend exits.
+ * "last_report" is the last time we were called (0 if never).
+ * ----------
+ */
+static void
+pgstat_send_connstats(bool disconnect, TimestampTz last_report)
+{
+ PgStat_MsgConn msg;
+ long secs;
+ int usecs;
+
+ if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
+ return;
+
+ pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_CONNECTION);
+ msg.m_databaseid = MyDatabaseId;
+
+ /* session time since the last report */
+ TimestampDifference(((last_report == 0) ? MyStartTimestamp : last_report),
+ GetCurrentTimestamp(),
+ &secs, &usecs);
+ msg.m_session_time = secs * 1000000 + usecs;
+
+ msg.m_disconnect = disconnect ? pgStatSessionEndCause : DISCONNECT_NOT_YET;
+
+ msg.m_active_time = pgStatActiveTime;
+ pgStatActiveTime = 0;
+
+ msg.m_idle_in_xact_time = pgStatTransactionIdleTime;
+ pgStatTransactionIdleTime = 0;
+
+ /* report a new session only the first time */
+ msg.m_count = (last_report == 0) ? 1 : 0;
+
+ pgstat_send(&msg, sizeof(PgStat_MsgConn));
+}
+
+
/* ----------
* pgstat_reset_counters() -
*
@@ -3345,6 +3401,30 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
}
current_timestamp = GetCurrentTimestamp();
+ /*
+ * If the state has changed from "active" or "idle in transaction",
+ * calculate the duration.
+ */
+ if ((beentry->st_state == STATE_RUNNING ||
+ beentry->st_state == STATE_FASTPATH ||
+ beentry->st_state == STATE_IDLEINTRANSACTION ||
+ beentry->st_state == STATE_IDLEINTRANSACTION_ABORTED) &&
+ state != beentry->st_state)
+ {
+ long secs;
+ int usecs;
+
+ TimestampDifference(beentry->st_state_start_timestamp,
+ current_timestamp,
+ &secs, &usecs);
+
+ if (beentry->st_state == STATE_RUNNING ||
+ beentry->st_state == STATE_FASTPATH)
+ pgStatActiveTime += secs * 1000000 + usecs;
+ else
+ pgStatTransactionIdleTime += secs * 1000000 + usecs;
+ }
+
/*
* Now update the status entry
*/
@@ -4916,6 +4996,10 @@ PgstatCollectorMain(int argc, char *argv[])
pgstat_recv_replslot(&msg.msg_replslot, len);
break;
+ case PGSTAT_MTYPE_CONNECTION:
+ pgstat_recv_connstat(&msg.msg_conn, len);
+ break;
+
default:
break;
}
@@ -4990,6 +5074,13 @@ reset_dbentry_counters(PgStat_StatDBEntry *dbentry)
dbentry->last_checksum_failure = 0;
dbentry->n_block_read_time = 0;
dbentry->n_block_write_time = 0;
+ dbentry->n_sessions = 0;
+ dbentry->n_session_time = 0;
+ dbentry->n_active_time = 0;
+ dbentry->n_idle_in_xact_time = 0;
+ dbentry->n_sessions_abandoned = 0;
+ dbentry->n_sessions_fatal = 0;
+ dbentry->n_sessions_killed = 0;
dbentry->stat_reset_timestamp = GetCurrentTimestamp();
dbentry->stats_timestamp = 0;
@@ -6944,6 +7035,41 @@ pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len)
}
}
+/* ----------
+ * pgstat_recv_connstat() -
+ *
+ * Process connection information.
+ * ----------
+ */
+static void
+pgstat_recv_connstat(PgStat_MsgConn * msg, int len)
+{
+ PgStat_StatDBEntry *dbentry;
+
+ dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
+
+ dbentry->n_sessions += msg->m_count;
+ dbentry->n_session_time += msg->m_session_time;
+ dbentry->n_active_time += msg->m_active_time;
+ dbentry->n_idle_in_xact_time += msg->m_idle_in_xact_time;
+ switch (msg->m_disconnect)
+ {
+ case DISCONNECT_NOT_YET:
+ case DISCONNECT_NORMAL:
+ /* we don't collect these */
+ break;
+ case DISCONNECT_CLIENT_EOF:
+ ++(dbentry->n_sessions_abandoned);
+ break;
+ case DISCONNECT_FATAL:
+ ++(dbentry->n_sessions_fatal);
+ break;
+ case DISCONNECT_KILLED:
+ ++(dbentry->n_sessions_killed);
+ break;
+ }
+}
+
/* ----------
* pgstat_recv_tempfile() -
*
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 3679799e50..1600e0d16d 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -2834,6 +2834,9 @@ die(SIGNAL_ARGS)
ProcDiePending = true;
}
+ /* for the statistics collector */
+ pgStatSessionEndCause = DISCONNECT_KILLED;
+
/* If we're still here, waken anything waiting on the process latch */
SetLatch(MyLatch);
@@ -4501,9 +4504,14 @@ PostgresMain(int argc, char *argv[],
* means unexpected loss of frontend connection. Either way,
* perform normal shutdown.
*/
- case 'X':
case EOF:
+ /* for the statistics collector */
+ pgStatSessionEndCause = DISCONNECT_CLIENT_EOF;
+
+ /* FALLTHROUGH */
+
+ case 'X':
/*
* Reset whereToSendOutput to prevent ereport from attempting
* to send any more messages to client.
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 6afe1b6f56..092cf483eb 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1629,6 +1629,100 @@ pg_stat_get_db_blk_write_time(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(result);
}
+Datum
+pg_stat_get_db_session_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->n_session_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_active_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->n_active_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_idle_in_transaction_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->n_idle_in_xact_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_sessions(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_sessions);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_sessions_abandoned(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_sessions_abandoned);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_sessions_fatal(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_sessions_fatal);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_sessions_killed(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_sessions_killed);
+
+ PG_RETURN_INT64(result);
+}
+
Datum
pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS)
{
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index 3558e660c7..6f9acac423 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -72,6 +72,7 @@
#include "libpq/pqformat.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
+#include "pgstat.h"
#include "postmaster/bgworker.h"
#include "postmaster/postmaster.h"
#include "postmaster/syslogger.h"
@@ -651,6 +652,13 @@ errfinish(const char *filename, int lineno, const char *funcname)
fflush(stdout);
fflush(stderr);
+ /*
+ * Let the statistics collector know. Only mark the session as
+ * terminated by fatal error if there is no other known cause.
+ */
+ if (pgStatSessionEndCause == DISCONNECT_NORMAL)
+ pgStatSessionEndCause = DISCONNECT_FATAL;
+
/*
* Do normal process-exit cleanup, then return exit code 1 to indicate
* FATAL termination. The postmaster may or may not consider this
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index e6c7b070f6..1460786d50 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5438,6 +5438,34 @@
proname => 'pg_stat_get_db_blk_write_time', provolatile => 's',
proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
prosrc => 'pg_stat_get_db_blk_write_time' },
+{ oid => '9575', descr => 'statistics: session time, in seconds',
+ proname => 'pg_stat_get_db_session_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_session_time' },
+{ oid => '9576', descr => 'statistics: session active time, in seconds',
+ proname => 'pg_stat_get_db_active_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_active_time' },
+{ oid => '9577', descr => 'statistics: session idle in transaction time, in seconds',
+ proname => 'pg_stat_get_db_idle_in_transaction_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_idle_in_transaction_time' },
+{ oid => '9578', descr => 'statistics: total number of sessions',
+ proname => 'pg_stat_get_db_sessions', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_sessions' },
+{ oid => '9579', descr => 'statistics: number of sessions disconnected by the client closing the network connection',
+ proname => 'pg_stat_get_db_sessions_abandoned', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_sessions_abandoned' },
+{ oid => '9580', descr => 'statistics: number of sessions disconnected by fatal errors',
+ proname => 'pg_stat_get_db_sessions_fatal', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_sessions_fatal' },
+{ oid => '9581', descr => 'statistics: number of sessions killed by administrative action',
+ proname => 'pg_stat_get_db_sessions_killed', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_sessions_killed' },
{ oid => '3195', descr => 'statistics: information about WAL archiver',
proname => 'pg_stat_get_archiver', proisstrict => 'f', provolatile => 's',
proparallel => 'r', prorettype => 'record', proargtypes => '',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 5954068dec..bf37fa3547 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -41,6 +41,16 @@ typedef enum TrackFunctionsLevel
TRACK_FUNC_ALL
} TrackFunctionsLevel;
+/* Values to track the cause of session termination */
+typedef enum SessionEndType
+{
+ DISCONNECT_NOT_YET, /* still active */
+ DISCONNECT_NORMAL,
+ DISCONNECT_CLIENT_EOF,
+ DISCONNECT_FATAL,
+ DISCONNECT_KILLED
+} SessionEndType;
+
/* ----------
* The types of backend -> collector messages
* ----------
@@ -71,6 +81,7 @@ typedef enum StatMsgType
PGSTAT_MTYPE_DEADLOCK,
PGSTAT_MTYPE_CHECKSUMFAILURE,
PGSTAT_MTYPE_REPLSLOT,
+ PGSTAT_MTYPE_CONNECTION,
} StatMsgType;
/* ----------
@@ -622,6 +633,21 @@ typedef struct PgStat_MsgChecksumFailure
TimestampTz m_failure_time;
} PgStat_MsgChecksumFailure;
+/* ----------
+ * PgStat_MsgConn Sent by pgstat_connection to update connection statistics.
+ * ----------
+ */
+typedef struct PgStat_MsgConn
+{
+ PgStat_MsgHdr m_hdr;
+ Oid m_databaseid;
+ PgStat_Counter m_count;
+ PgStat_Counter m_session_time;
+ PgStat_Counter m_active_time;
+ PgStat_Counter m_idle_in_xact_time;
+ SessionEndType m_disconnect;
+} PgStat_MsgConn;
+
/* ----------
* PgStat_Msg Union over all possible messages.
@@ -654,6 +680,7 @@ typedef union PgStat_Msg
PgStat_MsgTempFile msg_tempfile;
PgStat_MsgChecksumFailure msg_checksumfailure;
PgStat_MsgReplSlot msg_replslot;
+ PgStat_MsgConn msg_conn;
} PgStat_Msg;
@@ -696,6 +723,13 @@ typedef struct PgStat_StatDBEntry
TimestampTz last_checksum_failure;
PgStat_Counter n_block_read_time; /* times in microseconds */
PgStat_Counter n_block_write_time;
+ PgStat_Counter n_sessions;
+ PgStat_Counter n_session_time;
+ PgStat_Counter n_active_time;
+ PgStat_Counter n_idle_in_xact_time;
+ PgStat_Counter n_sessions_abandoned;
+ PgStat_Counter n_sessions_fatal;
+ PgStat_Counter n_sessions_killed;
TimestampTz stat_reset_timestamp;
TimestampTz stats_timestamp; /* time of db stats file update */
@@ -1353,6 +1387,11 @@ extern PgStat_MsgWal WalStats;
extern PgStat_Counter pgStatBlockReadTime;
extern PgStat_Counter pgStatBlockWriteTime;
+/*
+ * Updated by the traffic cop and in errfinish()
+ */
+extern SessionEndType pgStatSessionEndCause;
+
/* ----------
* Functions called from postmaster
* ----------
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 6293ab57bc..a2315be720 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1848,6 +1848,13 @@ pg_stat_database| SELECT d.oid AS datid,
pg_stat_get_db_checksum_last_failure(d.oid) AS checksum_last_failure,
pg_stat_get_db_blk_read_time(d.oid) AS blk_read_time,
pg_stat_get_db_blk_write_time(d.oid) AS blk_write_time,
+ pg_stat_get_db_session_time(d.oid) AS session_time,
+ pg_stat_get_db_active_time(d.oid) AS active_time,
+ pg_stat_get_db_idle_in_transaction_time(d.oid) AS idle_in_transaction_time,
+ pg_stat_get_db_sessions(d.oid) AS sessions,
+ pg_stat_get_db_sessions_abandoned(d.oid) AS sessions_abandoned,
+ pg_stat_get_db_sessions_fatal(d.oid) AS sessions_fatal,
+ pg_stat_get_db_sessions_killed(d.oid) AS sessions_killed,
pg_stat_get_db_stat_reset_time(d.oid) AS stats_reset
FROM ( SELECT 0 AS oid,
NULL::name AS datname
--
2.26.2
On Tue, 2020-12-15 at 13:53 +0100, Laurenz Albe wrote:
Attached is patch version 9.
Aah, I forgot the ++.
Version 10 attached.
Yours,
Laurenz Albe
Attachments:
0001-Add-session-statistics-to-pg_stat_database.v10.patchtext/x-patch; charset=UTF-8; name=0001-Add-session-statistics-to-pg_stat_database.v10.patchDownload
From b40e34141c80ff59c0005f430bd8c273918eb7bb Mon Sep 17 00:00:00 2001
From: Laurenz Albe <laurenz.albe@cybertec.at>
Date: Tue, 15 Dec 2020 13:46:44 +0100
Subject: [PATCH] Add session statistics to pg_stat_database
If "track_counts" is active, track the following per database:
- total number of connections
- number of sessions that ended by loss of network connection,
fatal errors and operator intervention
- total time spent in database sessions
- total time spent executing queries
- total idle in transaction time
This is useful to check if connection pooling is working.
It also helps to estimate the size of the connection pool
required to keep the database busy, which depends on the
percentage of the transaction time that is spent idling.
Discussion: https://postgr.es/m/b07e1f9953701b90c66ed368656f2aef40cac4fb.camel@cybertec.at
Reviewed-By: Soumyadeep Chakraborty, Justin Pryzby, Masahiro Ikeda, Magnus Hagander
(This requires a catversion bump, as well as an update to PGSTAT_FILE_FORMAT_ID)
---
doc/src/sgml/monitoring.sgml | 69 ++++++++++++++
src/backend/catalog/system_views.sql | 7 ++
src/backend/postmaster/pgstat.c | 134 ++++++++++++++++++++++++++-
src/backend/tcop/postgres.c | 10 +-
src/backend/utils/adt/pgstatfuncs.c | 94 +++++++++++++++++++
src/backend/utils/error/elog.c | 8 ++
src/include/catalog/pg_proc.dat | 28 ++++++
src/include/pgstat.h | 39 ++++++++
src/test/regress/expected/rules.out | 7 ++
9 files changed, 391 insertions(+), 5 deletions(-)
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 52a69a5366..6206fefec0 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -3731,6 +3731,75 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
</para></entry>
</row>
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>session_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent by database sessions in this database, in milliseconds
+ (note that statistics are only updated when the state of a session
+ changes, so if sessions have been idle for a long time, this idle time
+ won't be included)
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>active_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent executing SQL statements in this database, in milliseconds
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>idle_in_transaction_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent idling while in a transaction in this database, in milliseconds
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>sessions</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Total number of sessions established to this database
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>sessions_abandoned</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of database sessions to this database that were terminated
+ because connection to the client was lost
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>sessions_fatal</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of database sessions to this database that were terminated
+ by fatal errors
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>sessions_killed</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of database sessions to this database that were terminated
+ by operator intervention
+ </para></entry>
+ </row>
+
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>stats_reset</structfield> <type>timestamp with time zone</type>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index b140c210bc..3a2569b135 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -924,6 +924,13 @@ CREATE VIEW pg_stat_database AS
pg_stat_get_db_checksum_last_failure(D.oid) AS checksum_last_failure,
pg_stat_get_db_blk_read_time(D.oid) AS blk_read_time,
pg_stat_get_db_blk_write_time(D.oid) AS blk_write_time,
+ pg_stat_get_db_session_time(D.oid) AS session_time,
+ pg_stat_get_db_active_time(D.oid) AS active_time,
+ pg_stat_get_db_idle_in_transaction_time(D.oid) AS idle_in_transaction_time,
+ pg_stat_get_db_sessions(D.oid) AS sessions,
+ pg_stat_get_db_sessions_abandoned(D.oid) AS sessions_abandoned,
+ pg_stat_get_db_sessions_fatal(D.oid) AS sessions_fatal,
+ pg_stat_get_db_sessions_killed(D.oid) AS sessions_killed,
pg_stat_get_db_stat_reset_time(D.oid) AS stats_reset
FROM (
SELECT 0 AS oid, NULL::name AS datname
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 7c75a25d21..a2337b78f1 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -258,6 +258,9 @@ static int pgStatXactCommit = 0;
static int pgStatXactRollback = 0;
PgStat_Counter pgStatBlockReadTime = 0;
PgStat_Counter pgStatBlockWriteTime = 0;
+static PgStat_Counter pgStatActiveTime = 0;
+static PgStat_Counter pgStatTransactionIdleTime = 0;
+SessionEndType pgStatSessionEndCause = DISCONNECT_NORMAL;
/* Record that's written to 2PC state file when pgstat state is persisted */
typedef struct TwoPhasePgStatRecord
@@ -343,6 +346,7 @@ static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
static void pgstat_send_funcstats(void);
static void pgstat_send_slru(void);
static HTAB *pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid);
+static void pgstat_send_connstats(bool disconnect, TimestampTz last_report);
static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared);
@@ -378,6 +382,7 @@ static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len);
static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len);
static void pgstat_recv_checksum_failure(PgStat_MsgChecksumFailure *msg, int len);
+static void pgstat_recv_connstat(PgStat_MsgConn * msg, int len);
static void pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len);
static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len);
@@ -855,10 +860,14 @@ allow_immediate_pgstat_restart(void)
* per-table and function usage statistics to the collector. Note that this
* is called only when not within a transaction, so it is fair to use
* transaction stop time as an approximation of current time.
+ *
+ * "disconnect" is "true" only for the last call before the backend
+ * exits. This makes sure that no data are lost and that interrupted
+ * sessions are reported correctly.
* ----------
*/
void
-pgstat_report_stat(bool force)
+pgstat_report_stat(bool disconnect)
{
/* we assume this inits to all zeroes: */
static const PgStat_TableCounts all_zeroes;
@@ -873,17 +882,22 @@ pgstat_report_stat(bool force)
/* Don't expend a clock check if nothing to do */
if ((pgStatTabList == NULL || pgStatTabList->tsa_used == 0) &&
pgStatXactCommit == 0 && pgStatXactRollback == 0 &&
- !have_function_stats)
+ !have_function_stats && !disconnect)
return;
/*
* Don't send a message unless it's been at least PGSTAT_STAT_INTERVAL
- * msec since we last sent one, or the caller wants to force stats out.
+ * msec since we last sent one, or the backend is about to exit.
*/
now = GetCurrentTransactionStopTimestamp();
- if (!force &&
+ if (!disconnect &&
!TimestampDifferenceExceeds(last_report, now, PGSTAT_STAT_INTERVAL))
return;
+
+ /* for backends, send connection statistics */
+ if (MyBackendType == B_BACKEND)
+ pgstat_send_connstats(disconnect, last_report);
+
last_report = now;
/*
@@ -1352,6 +1366,48 @@ pgstat_drop_relation(Oid relid)
#endif /* NOT_USED */
+/* ----------
+ * pgstat_send_connstats() -
+ *
+ * Tell the collector about session statistics.
+ * The parameter "disconnect" will be true when the backend exits.
+ * "last_report" is the last time we were called (0 if never).
+ * ----------
+ */
+static void
+pgstat_send_connstats(bool disconnect, TimestampTz last_report)
+{
+ PgStat_MsgConn msg;
+ long secs;
+ int usecs;
+
+ if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
+ return;
+
+ pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_CONNECTION);
+ msg.m_databaseid = MyDatabaseId;
+
+ /* session time since the last report */
+ TimestampDifference(((last_report == 0) ? MyStartTimestamp : last_report),
+ GetCurrentTimestamp(),
+ &secs, &usecs);
+ msg.m_session_time = secs * 1000000 + usecs;
+
+ msg.m_disconnect = disconnect ? pgStatSessionEndCause : DISCONNECT_NOT_YET;
+
+ msg.m_active_time = pgStatActiveTime;
+ pgStatActiveTime = 0;
+
+ msg.m_idle_in_xact_time = pgStatTransactionIdleTime;
+ pgStatTransactionIdleTime = 0;
+
+ /* report a new session only the first time */
+ msg.m_count = (last_report == 0) ? 1 : 0;
+
+ pgstat_send(&msg, sizeof(PgStat_MsgConn));
+}
+
+
/* ----------
* pgstat_reset_counters() -
*
@@ -3345,6 +3401,30 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
}
current_timestamp = GetCurrentTimestamp();
+ /*
+ * If the state has changed from "active" or "idle in transaction",
+ * calculate the duration.
+ */
+ if ((beentry->st_state == STATE_RUNNING ||
+ beentry->st_state == STATE_FASTPATH ||
+ beentry->st_state == STATE_IDLEINTRANSACTION ||
+ beentry->st_state == STATE_IDLEINTRANSACTION_ABORTED) &&
+ state != beentry->st_state)
+ {
+ long secs;
+ int usecs;
+
+ TimestampDifference(beentry->st_state_start_timestamp,
+ current_timestamp,
+ &secs, &usecs);
+
+ if (beentry->st_state == STATE_RUNNING ||
+ beentry->st_state == STATE_FASTPATH)
+ pgStatActiveTime += secs * 1000000 + usecs;
+ else
+ pgStatTransactionIdleTime += secs * 1000000 + usecs;
+ }
+
/*
* Now update the status entry
*/
@@ -4916,6 +4996,10 @@ PgstatCollectorMain(int argc, char *argv[])
pgstat_recv_replslot(&msg.msg_replslot, len);
break;
+ case PGSTAT_MTYPE_CONNECTION:
+ pgstat_recv_connstat(&msg.msg_conn, len);
+ break;
+
default:
break;
}
@@ -4990,6 +5074,13 @@ reset_dbentry_counters(PgStat_StatDBEntry *dbentry)
dbentry->last_checksum_failure = 0;
dbentry->n_block_read_time = 0;
dbentry->n_block_write_time = 0;
+ dbentry->n_sessions = 0;
+ dbentry->n_session_time = 0;
+ dbentry->n_active_time = 0;
+ dbentry->n_idle_in_xact_time = 0;
+ dbentry->n_sessions_abandoned = 0;
+ dbentry->n_sessions_fatal = 0;
+ dbentry->n_sessions_killed = 0;
dbentry->stat_reset_timestamp = GetCurrentTimestamp();
dbentry->stats_timestamp = 0;
@@ -6944,6 +7035,41 @@ pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len)
}
}
+/* ----------
+ * pgstat_recv_connstat() -
+ *
+ * Process connection information.
+ * ----------
+ */
+static void
+pgstat_recv_connstat(PgStat_MsgConn * msg, int len)
+{
+ PgStat_StatDBEntry *dbentry;
+
+ dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
+
+ dbentry->n_sessions += msg->m_count;
+ dbentry->n_session_time += msg->m_session_time;
+ dbentry->n_active_time += msg->m_active_time;
+ dbentry->n_idle_in_xact_time += msg->m_idle_in_xact_time;
+ switch (msg->m_disconnect)
+ {
+ case DISCONNECT_NOT_YET:
+ case DISCONNECT_NORMAL:
+ /* we don't collect these */
+ break;
+ case DISCONNECT_CLIENT_EOF:
+ dbentry->n_sessions_abandoned++;
+ break;
+ case DISCONNECT_FATAL:
+ dbentry->n_sessions_fatal++;
+ break;
+ case DISCONNECT_KILLED:
+ dbentry->n_sessions_killed++;
+ break;
+ }
+}
+
/* ----------
* pgstat_recv_tempfile() -
*
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 3679799e50..1600e0d16d 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -2834,6 +2834,9 @@ die(SIGNAL_ARGS)
ProcDiePending = true;
}
+ /* for the statistics collector */
+ pgStatSessionEndCause = DISCONNECT_KILLED;
+
/* If we're still here, waken anything waiting on the process latch */
SetLatch(MyLatch);
@@ -4501,9 +4504,14 @@ PostgresMain(int argc, char *argv[],
* means unexpected loss of frontend connection. Either way,
* perform normal shutdown.
*/
- case 'X':
case EOF:
+ /* for the statistics collector */
+ pgStatSessionEndCause = DISCONNECT_CLIENT_EOF;
+
+ /* FALLTHROUGH */
+
+ case 'X':
/*
* Reset whereToSendOutput to prevent ereport from attempting
* to send any more messages to client.
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 6afe1b6f56..092cf483eb 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1629,6 +1629,100 @@ pg_stat_get_db_blk_write_time(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(result);
}
+Datum
+pg_stat_get_db_session_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->n_session_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_active_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->n_active_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_idle_in_transaction_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->n_idle_in_xact_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_sessions(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_sessions);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_sessions_abandoned(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_sessions_abandoned);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_sessions_fatal(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_sessions_fatal);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_sessions_killed(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_sessions_killed);
+
+ PG_RETURN_INT64(result);
+}
+
Datum
pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS)
{
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index 3558e660c7..6f9acac423 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -72,6 +72,7 @@
#include "libpq/pqformat.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
+#include "pgstat.h"
#include "postmaster/bgworker.h"
#include "postmaster/postmaster.h"
#include "postmaster/syslogger.h"
@@ -651,6 +652,13 @@ errfinish(const char *filename, int lineno, const char *funcname)
fflush(stdout);
fflush(stderr);
+ /*
+ * Let the statistics collector know. Only mark the session as
+ * terminated by fatal error if there is no other known cause.
+ */
+ if (pgStatSessionEndCause == DISCONNECT_NORMAL)
+ pgStatSessionEndCause = DISCONNECT_FATAL;
+
/*
* Do normal process-exit cleanup, then return exit code 1 to indicate
* FATAL termination. The postmaster may or may not consider this
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index e6c7b070f6..1460786d50 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5438,6 +5438,34 @@
proname => 'pg_stat_get_db_blk_write_time', provolatile => 's',
proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
prosrc => 'pg_stat_get_db_blk_write_time' },
+{ oid => '9575', descr => 'statistics: session time, in seconds',
+ proname => 'pg_stat_get_db_session_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_session_time' },
+{ oid => '9576', descr => 'statistics: session active time, in seconds',
+ proname => 'pg_stat_get_db_active_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_active_time' },
+{ oid => '9577', descr => 'statistics: session idle in transaction time, in seconds',
+ proname => 'pg_stat_get_db_idle_in_transaction_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_idle_in_transaction_time' },
+{ oid => '9578', descr => 'statistics: total number of sessions',
+ proname => 'pg_stat_get_db_sessions', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_sessions' },
+{ oid => '9579', descr => 'statistics: number of sessions disconnected by the client closing the network connection',
+ proname => 'pg_stat_get_db_sessions_abandoned', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_sessions_abandoned' },
+{ oid => '9580', descr => 'statistics: number of sessions disconnected by fatal errors',
+ proname => 'pg_stat_get_db_sessions_fatal', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_sessions_fatal' },
+{ oid => '9581', descr => 'statistics: number of sessions killed by administrative action',
+ proname => 'pg_stat_get_db_sessions_killed', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_sessions_killed' },
{ oid => '3195', descr => 'statistics: information about WAL archiver',
proname => 'pg_stat_get_archiver', proisstrict => 'f', provolatile => 's',
proparallel => 'r', prorettype => 'record', proargtypes => '',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 5954068dec..bf37fa3547 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -41,6 +41,16 @@ typedef enum TrackFunctionsLevel
TRACK_FUNC_ALL
} TrackFunctionsLevel;
+/* Values to track the cause of session termination */
+typedef enum SessionEndType
+{
+ DISCONNECT_NOT_YET, /* still active */
+ DISCONNECT_NORMAL,
+ DISCONNECT_CLIENT_EOF,
+ DISCONNECT_FATAL,
+ DISCONNECT_KILLED
+} SessionEndType;
+
/* ----------
* The types of backend -> collector messages
* ----------
@@ -71,6 +81,7 @@ typedef enum StatMsgType
PGSTAT_MTYPE_DEADLOCK,
PGSTAT_MTYPE_CHECKSUMFAILURE,
PGSTAT_MTYPE_REPLSLOT,
+ PGSTAT_MTYPE_CONNECTION,
} StatMsgType;
/* ----------
@@ -622,6 +633,21 @@ typedef struct PgStat_MsgChecksumFailure
TimestampTz m_failure_time;
} PgStat_MsgChecksumFailure;
+/* ----------
+ * PgStat_MsgConn Sent by pgstat_connection to update connection statistics.
+ * ----------
+ */
+typedef struct PgStat_MsgConn
+{
+ PgStat_MsgHdr m_hdr;
+ Oid m_databaseid;
+ PgStat_Counter m_count;
+ PgStat_Counter m_session_time;
+ PgStat_Counter m_active_time;
+ PgStat_Counter m_idle_in_xact_time;
+ SessionEndType m_disconnect;
+} PgStat_MsgConn;
+
/* ----------
* PgStat_Msg Union over all possible messages.
@@ -654,6 +680,7 @@ typedef union PgStat_Msg
PgStat_MsgTempFile msg_tempfile;
PgStat_MsgChecksumFailure msg_checksumfailure;
PgStat_MsgReplSlot msg_replslot;
+ PgStat_MsgConn msg_conn;
} PgStat_Msg;
@@ -696,6 +723,13 @@ typedef struct PgStat_StatDBEntry
TimestampTz last_checksum_failure;
PgStat_Counter n_block_read_time; /* times in microseconds */
PgStat_Counter n_block_write_time;
+ PgStat_Counter n_sessions;
+ PgStat_Counter n_session_time;
+ PgStat_Counter n_active_time;
+ PgStat_Counter n_idle_in_xact_time;
+ PgStat_Counter n_sessions_abandoned;
+ PgStat_Counter n_sessions_fatal;
+ PgStat_Counter n_sessions_killed;
TimestampTz stat_reset_timestamp;
TimestampTz stats_timestamp; /* time of db stats file update */
@@ -1353,6 +1387,11 @@ extern PgStat_MsgWal WalStats;
extern PgStat_Counter pgStatBlockReadTime;
extern PgStat_Counter pgStatBlockWriteTime;
+/*
+ * Updated by the traffic cop and in errfinish()
+ */
+extern SessionEndType pgStatSessionEndCause;
+
/* ----------
* Functions called from postmaster
* ----------
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 6293ab57bc..a2315be720 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1848,6 +1848,13 @@ pg_stat_database| SELECT d.oid AS datid,
pg_stat_get_db_checksum_last_failure(d.oid) AS checksum_last_failure,
pg_stat_get_db_blk_read_time(d.oid) AS blk_read_time,
pg_stat_get_db_blk_write_time(d.oid) AS blk_write_time,
+ pg_stat_get_db_session_time(d.oid) AS session_time,
+ pg_stat_get_db_active_time(d.oid) AS active_time,
+ pg_stat_get_db_idle_in_transaction_time(d.oid) AS idle_in_transaction_time,
+ pg_stat_get_db_sessions(d.oid) AS sessions,
+ pg_stat_get_db_sessions_abandoned(d.oid) AS sessions_abandoned,
+ pg_stat_get_db_sessions_fatal(d.oid) AS sessions_fatal,
+ pg_stat_get_db_sessions_killed(d.oid) AS sessions_killed,
pg_stat_get_db_stat_reset_time(d.oid) AS stats_reset
FROM ( SELECT 0 AS oid,
NULL::name AS datname
--
2.26.2
Hi,
As a user, I want this feature to know whether
clients' session activities are as expected.
I have some comments about the patch.
1. pg_proc.dat
The unit of "session time" and so on says "in seconds".
But, is "in milliseconds" right?
2. monitoring.sgml
IIUC, "active_time" includes the time executes a fast-path function and
"idle in transaction" includes "idle in transaction(aborted)" time.
Why don't you reference pg_stat_activity's "state" column and
"active_time" is the total time when the state is "active" and "fast
path"?
"idle in transaction" is as same too.
3. pgstat.h
The comment of PgStat_MsgConn says "Sent by pgstat_connection".
I thought "pgstat_connection" is a function, but it doesn't exist.
Is "Sent by the backend" right?
Although this is a trivial thing, the following row has too many tabs.
Other structs have only one space.
// }<tab><tab><tab>Pgstat_MsgConn;
Regards,
--
Masahiro Ikeda
NTT DATA CORPORATION
On Fri, 2020-12-25 at 20:28 +0900, Masahiro Ikeda wrote:
As a user, I want this feature to know whether
clients' session activities are as expected.I have some comments about the patch.
1. pg_proc.dat
The unit of "session time" and so on says "in seconds".
But, is "in milliseconds" right?2. monitoring.sgml
IIUC, "active_time" includes the time executes a fast-path function and
"idle in transaction" includes "idle in transaction(aborted)" time.Why don't you reference pg_stat_activity's "state" column and
"active_time" is the total time when the state is "active" and "fast
path"?
"idle in transaction" is as same too.3. pgstat.h
The comment of PgStat_MsgConn says "Sent by pgstat_connection".
I thought "pgstat_connection" is a function, but it doesn't exist.Is "Sent by the backend" right?
Although this is a trivial thing, the following row has too many tabs.
Other structs have only one space.
// }<tab><tab><tab>Pgstat_MsgConn;
Thanks for the feedback.
I am currently on vacations and will take a look after January 7.
Yours,
Laurenz Albe
On Fri, 2020-12-25 at 20:28 +0900, Masahiro Ikeda wrote:
As a user, I want this feature to know whether
clients' session activities are as expected.I have some comments about the patch.
Thanks you for the thorough review!
1. pg_proc.dat
The unit of "session time" and so on says "in seconds".
But, is "in milliseconds" right?
You are right. Fixed.
2. monitoring.sgml
IIUC, "active_time" includes the time executes a fast-path function and
"idle in transaction" includes "idle in transaction(aborted)" time.Why don't you reference pg_stat_activity's "state" column and
"active_time" is the total time when the state is "active" and "fast
path"?
"idle in transaction" is as same too.
Good idea; I have expanded the documentation like that.
3. pgstat.h
The comment of PgStat_MsgConn says "Sent by pgstat_connection".
I thought "pgstat_connection" is a function, but it doesn't exist.Is "Sent by the backend" right?
The function was renamed and is now called "pgstat_send_connstats".
But you are right, I might as well match the surrounding code and
write "Sent by the backend".
Although this is a trivial thing, the following row has too many tabs.
Other structs have only one space.
// }<tab><tab><tab>Pgstat_MsgConn;
Yes, I messed that up during the pgindent run. Fixed.
Patch version 11 is attached.
Yours,
Laurenz Albe
Attachments:
0001-Add-session-statistics-to-pg_stat_database.v11.patchtext/x-patch; charset=UTF-8; name=0001-Add-session-statistics-to-pg_stat_database.v11.patchDownload
From 324847353f5d9e5b2899dd93d43fb345df1dcdb8 Mon Sep 17 00:00:00 2001
From: Laurenz Albe <laurenz.albe@cybertec.at>
Date: Thu, 7 Jan 2021 16:33:45 +0100
Subject: [PATCH] Add session statistics to pg_stat_database
If "track_counts" is active, track the following per database:
- total number of connections
- number of sessions that ended by loss of network connection,
fatal errors and operator intervention
- total time spent in database sessions
- total time spent executing queries
- total idle in transaction time
This is useful to check if connection pooling is working.
It also helps to estimate the size of the connection pool
required to keep the database busy, which depends on the
percentage of the transaction time that is spent idling.
Discussion: https://postgr.es/m/b07e1f9953701b90c66ed368656f2aef40cac4fb.camel@cybertec.at
Reviewed-By: Soumyadeep Chakraborty, Justin Pryzby, Masahiro Ikeda, Magnus Hagander
(This requires a catversion bump, as well as an update to PGSTAT_FILE_FORMAT_ID)
---
doc/src/sgml/monitoring.sgml | 77 +++++++++++++++
src/backend/catalog/system_views.sql | 7 ++
src/backend/postmaster/pgstat.c | 134 ++++++++++++++++++++++++++-
src/backend/tcop/postgres.c | 11 ++-
src/backend/utils/adt/pgstatfuncs.c | 94 +++++++++++++++++++
src/backend/utils/error/elog.c | 8 ++
src/include/catalog/pg_proc.dat | 32 +++++++
src/include/pgstat.h | 39 ++++++++
src/test/regress/expected/rules.out | 7 ++
9 files changed, 404 insertions(+), 5 deletions(-)
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 43fe8ae383..59622173da 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -3737,6 +3737,83 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
</para></entry>
</row>
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>session_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent by database sessions in this database, in milliseconds
+ (note that statistics are only updated when the state of a session
+ changes, so if sessions have been idle for a long time, this idle time
+ won't be included)
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>active_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent executing SQL statements in this database, in milliseconds
+ (this corresponds to the states <literal>active</literal> and
+ <literal>fastpath function call</literal> in
+ <link linkend="monitoring-pg-stat-activity-view">
+ <structname>pg_stat_activity</structname></link>)
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>idle_in_transaction_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent idling while in a transaction in this database, in milliseconds
+ (this corresponds to the states <literal>idle in transaction</literal> and
+ <literal>idle in transaction (aborted)</literal> in
+ <link linkend="monitoring-pg-stat-activity-view">
+ <structname>pg_stat_activity</structname></link>)
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>sessions</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Total number of sessions established to this database
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>sessions_abandoned</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of database sessions to this database that were terminated
+ because connection to the client was lost
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>sessions_fatal</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of database sessions to this database that were terminated
+ by fatal errors
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>sessions_killed</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of database sessions to this database that were terminated
+ by operator intervention
+ </para></entry>
+ </row>
+
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>stats_reset</structfield> <type>timestamp with time zone</type>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 5d89e77dbe..fa58afd9d7 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -924,6 +924,13 @@ CREATE VIEW pg_stat_database AS
pg_stat_get_db_checksum_last_failure(D.oid) AS checksum_last_failure,
pg_stat_get_db_blk_read_time(D.oid) AS blk_read_time,
pg_stat_get_db_blk_write_time(D.oid) AS blk_write_time,
+ pg_stat_get_db_session_time(D.oid) AS session_time,
+ pg_stat_get_db_active_time(D.oid) AS active_time,
+ pg_stat_get_db_idle_in_transaction_time(D.oid) AS idle_in_transaction_time,
+ pg_stat_get_db_sessions(D.oid) AS sessions,
+ pg_stat_get_db_sessions_abandoned(D.oid) AS sessions_abandoned,
+ pg_stat_get_db_sessions_fatal(D.oid) AS sessions_fatal,
+ pg_stat_get_db_sessions_killed(D.oid) AS sessions_killed,
pg_stat_get_db_stat_reset_time(D.oid) AS stats_reset
FROM (
SELECT 0 AS oid, NULL::name AS datname
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 3f24a33ef1..cdaddbb66c 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -258,6 +258,9 @@ static int pgStatXactCommit = 0;
static int pgStatXactRollback = 0;
PgStat_Counter pgStatBlockReadTime = 0;
PgStat_Counter pgStatBlockWriteTime = 0;
+static PgStat_Counter pgStatActiveTime = 0;
+static PgStat_Counter pgStatTransactionIdleTime = 0;
+SessionEndType pgStatSessionEndCause = DISCONNECT_NORMAL;
/* Record that's written to 2PC state file when pgstat state is persisted */
typedef struct TwoPhasePgStatRecord
@@ -343,6 +346,7 @@ static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
static void pgstat_send_funcstats(void);
static void pgstat_send_slru(void);
static HTAB *pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid);
+static void pgstat_send_connstats(bool disconnect, TimestampTz last_report);
static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared);
@@ -378,6 +382,7 @@ static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len);
static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len);
static void pgstat_recv_checksum_failure(PgStat_MsgChecksumFailure *msg, int len);
+static void pgstat_recv_connstat(PgStat_MsgConn *msg, int len);
static void pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len);
static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len);
@@ -855,10 +860,14 @@ allow_immediate_pgstat_restart(void)
* per-table and function usage statistics to the collector. Note that this
* is called only when not within a transaction, so it is fair to use
* transaction stop time as an approximation of current time.
+ *
+ * "disconnect" is "true" only for the last call before the backend
+ * exits. This makes sure that no data are lost and that interrupted
+ * sessions are reported correctly.
* ----------
*/
void
-pgstat_report_stat(bool force)
+pgstat_report_stat(bool disconnect)
{
/* we assume this inits to all zeroes: */
static const PgStat_TableCounts all_zeroes;
@@ -873,17 +882,22 @@ pgstat_report_stat(bool force)
/* Don't expend a clock check if nothing to do */
if ((pgStatTabList == NULL || pgStatTabList->tsa_used == 0) &&
pgStatXactCommit == 0 && pgStatXactRollback == 0 &&
- !have_function_stats)
+ !have_function_stats && !disconnect)
return;
/*
* Don't send a message unless it's been at least PGSTAT_STAT_INTERVAL
- * msec since we last sent one, or the caller wants to force stats out.
+ * msec since we last sent one, or the backend is about to exit.
*/
now = GetCurrentTransactionStopTimestamp();
- if (!force &&
+ if (!disconnect &&
!TimestampDifferenceExceeds(last_report, now, PGSTAT_STAT_INTERVAL))
return;
+
+ /* for backends, send connection statistics */
+ if (MyBackendType == B_BACKEND)
+ pgstat_send_connstats(disconnect, last_report);
+
last_report = now;
/*
@@ -1351,6 +1365,48 @@ pgstat_drop_relation(Oid relid)
#endif /* NOT_USED */
+/* ----------
+ * pgstat_send_connstats() -
+ *
+ * Tell the collector about session statistics.
+ * The parameter "disconnect" will be true when the backend exits.
+ * "last_report" is the last time we were called (0 if never).
+ * ----------
+ */
+static void
+pgstat_send_connstats(bool disconnect, TimestampTz last_report)
+{
+ PgStat_MsgConn msg;
+ long secs;
+ int usecs;
+
+ if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
+ return;
+
+ pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_CONNECTION);
+ msg.m_databaseid = MyDatabaseId;
+
+ /* session time since the last report */
+ TimestampDifference(((last_report == 0) ? MyStartTimestamp : last_report),
+ GetCurrentTimestamp(),
+ &secs, &usecs);
+ msg.m_session_time = secs * 1000000 + usecs;
+
+ msg.m_disconnect = disconnect ? pgStatSessionEndCause : DISCONNECT_NOT_YET;
+
+ msg.m_active_time = pgStatActiveTime;
+ pgStatActiveTime = 0;
+
+ msg.m_idle_in_xact_time = pgStatTransactionIdleTime;
+ pgStatTransactionIdleTime = 0;
+
+ /* report a new session only the first time */
+ msg.m_count = (last_report == 0) ? 1 : 0;
+
+ pgstat_send(&msg, sizeof(PgStat_MsgConn));
+}
+
+
/* ----------
* pgstat_reset_counters() -
*
@@ -3348,6 +3404,30 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
}
current_timestamp = GetCurrentTimestamp();
+ /*
+ * If the state has changed from "active" or "idle in transaction",
+ * calculate the duration.
+ */
+ if ((beentry->st_state == STATE_RUNNING ||
+ beentry->st_state == STATE_FASTPATH ||
+ beentry->st_state == STATE_IDLEINTRANSACTION ||
+ beentry->st_state == STATE_IDLEINTRANSACTION_ABORTED) &&
+ state != beentry->st_state)
+ {
+ long secs;
+ int usecs;
+
+ TimestampDifference(beentry->st_state_start_timestamp,
+ current_timestamp,
+ &secs, &usecs);
+
+ if (beentry->st_state == STATE_RUNNING ||
+ beentry->st_state == STATE_FASTPATH)
+ pgStatActiveTime += secs * 1000000 + usecs;
+ else
+ pgStatTransactionIdleTime += secs * 1000000 + usecs;
+ }
+
/*
* Now update the status entry
*/
@@ -4919,6 +4999,10 @@ PgstatCollectorMain(int argc, char *argv[])
pgstat_recv_replslot(&msg.msg_replslot, len);
break;
+ case PGSTAT_MTYPE_CONNECTION:
+ pgstat_recv_connstat(&msg.msg_conn, len);
+ break;
+
default:
break;
}
@@ -4993,6 +5077,13 @@ reset_dbentry_counters(PgStat_StatDBEntry *dbentry)
dbentry->last_checksum_failure = 0;
dbentry->n_block_read_time = 0;
dbentry->n_block_write_time = 0;
+ dbentry->n_sessions = 0;
+ dbentry->n_session_time = 0;
+ dbentry->n_active_time = 0;
+ dbentry->n_idle_in_xact_time = 0;
+ dbentry->n_sessions_abandoned = 0;
+ dbentry->n_sessions_fatal = 0;
+ dbentry->n_sessions_killed = 0;
dbentry->stat_reset_timestamp = GetCurrentTimestamp();
dbentry->stats_timestamp = 0;
@@ -6944,6 +7035,41 @@ pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len)
}
}
+/* ----------
+ * pgstat_recv_connstat() -
+ *
+ * Process connection information.
+ * ----------
+ */
+static void
+pgstat_recv_connstat(PgStat_MsgConn *msg, int len)
+{
+ PgStat_StatDBEntry *dbentry;
+
+ dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
+
+ dbentry->n_sessions += msg->m_count;
+ dbentry->n_session_time += msg->m_session_time;
+ dbentry->n_active_time += msg->m_active_time;
+ dbentry->n_idle_in_xact_time += msg->m_idle_in_xact_time;
+ switch (msg->m_disconnect)
+ {
+ case DISCONNECT_NOT_YET:
+ case DISCONNECT_NORMAL:
+ /* we don't collect these */
+ break;
+ case DISCONNECT_CLIENT_EOF:
+ dbentry->n_sessions_abandoned++;
+ break;
+ case DISCONNECT_FATAL:
+ dbentry->n_sessions_fatal++;
+ break;
+ case DISCONNECT_KILLED:
+ dbentry->n_sessions_killed++;
+ break;
+ }
+}
+
/* ----------
* pgstat_recv_tempfile() -
*
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 2b53ebf97d..882ed96087 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -2865,6 +2865,9 @@ die(SIGNAL_ARGS)
ProcDiePending = true;
}
+ /* for the statistics collector */
+ pgStatSessionEndCause = DISCONNECT_KILLED;
+
/* If we're still here, waken anything waiting on the process latch */
SetLatch(MyLatch);
@@ -4578,9 +4581,15 @@ PostgresMain(int argc, char *argv[],
* means unexpected loss of frontend connection. Either way,
* perform normal shutdown.
*/
- case 'X':
case EOF:
+ /* for the statistics collector */
+ pgStatSessionEndCause = DISCONNECT_CLIENT_EOF;
+
+ /* FALLTHROUGH */
+
+ case 'X':
+
/*
* Reset whereToSendOutput to prevent ereport from attempting
* to send any more messages to client.
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 5c12a165a1..2bb16eabf0 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1631,6 +1631,100 @@ pg_stat_get_db_blk_write_time(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(result);
}
+Datum
+pg_stat_get_db_session_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->n_session_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_active_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->n_active_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_idle_in_transaction_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->n_idle_in_xact_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_sessions(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_sessions);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_sessions_abandoned(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_sessions_abandoned);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_sessions_fatal(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_sessions_fatal);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_sessions_killed(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_sessions_killed);
+
+ PG_RETURN_INT64(result);
+}
+
Datum
pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS)
{
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index 7790f6ab25..80c2672461 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -72,6 +72,7 @@
#include "libpq/pqformat.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
+#include "pgstat.h"
#include "postmaster/bgworker.h"
#include "postmaster/postmaster.h"
#include "postmaster/syslogger.h"
@@ -656,6 +657,13 @@ errfinish(const char *filename, int lineno, const char *funcname)
fflush(stdout);
fflush(stderr);
+ /*
+ * Let the statistics collector know. Only mark the session as
+ * terminated by fatal error if there is no other known cause.
+ */
+ if (pgStatSessionEndCause == DISCONNECT_NORMAL)
+ pgStatSessionEndCause = DISCONNECT_FATAL;
+
/*
* Do normal process-exit cleanup, then return exit code 1 to indicate
* FATAL termination. The postmaster may or may not consider this
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index d7b55f57ea..f5fcc5117d 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5438,6 +5438,38 @@
proname => 'pg_stat_get_db_blk_write_time', provolatile => 's',
proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
prosrc => 'pg_stat_get_db_blk_write_time' },
+{ oid => '9575', descr => 'statistics: session time, in milliseconds',
+ proname => 'pg_stat_get_db_session_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_session_time' },
+{ oid => '9576', descr => 'statistics: session active time, in milliseconds',
+ proname => 'pg_stat_get_db_active_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_active_time' },
+{ oid => '9577',
+ descr => 'statistics: session idle in transaction time, in milliseconds',
+ proname => 'pg_stat_get_db_idle_in_transaction_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_idle_in_transaction_time' },
+{ oid => '9578', descr => 'statistics: total number of sessions',
+ proname => 'pg_stat_get_db_sessions', provolatile => 's', proparallel => 'r',
+ prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_sessions' },
+{ oid => '9579',
+ descr => 'statistics: number of sessions disconnected by the client closing the network connection',
+ proname => 'pg_stat_get_db_sessions_abandoned', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_sessions_abandoned' },
+{ oid => '9580',
+ descr => 'statistics: number of sessions disconnected by fatal errors',
+ proname => 'pg_stat_get_db_sessions_fatal', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_sessions_fatal' },
+{ oid => '9581',
+ descr => 'statistics: number of sessions killed by administrative action',
+ proname => 'pg_stat_get_db_sessions_killed', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_sessions_killed' },
{ oid => '3195', descr => 'statistics: information about WAL archiver',
proname => 'pg_stat_get_archiver', proisstrict => 'f', provolatile => 's',
proparallel => 'r', prorettype => 'record', proargtypes => '',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index c38b689710..d65fb9deca 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -41,6 +41,16 @@ typedef enum TrackFunctionsLevel
TRACK_FUNC_ALL
} TrackFunctionsLevel;
+/* Values to track the cause of session termination */
+typedef enum SessionEndType
+{
+ DISCONNECT_NOT_YET, /* still active */
+ DISCONNECT_NORMAL,
+ DISCONNECT_CLIENT_EOF,
+ DISCONNECT_FATAL,
+ DISCONNECT_KILLED
+} SessionEndType;
+
/* ----------
* The types of backend -> collector messages
* ----------
@@ -71,6 +81,7 @@ typedef enum StatMsgType
PGSTAT_MTYPE_DEADLOCK,
PGSTAT_MTYPE_CHECKSUMFAILURE,
PGSTAT_MTYPE_REPLSLOT,
+ PGSTAT_MTYPE_CONNECTION,
} StatMsgType;
/* ----------
@@ -622,6 +633,21 @@ typedef struct PgStat_MsgChecksumFailure
TimestampTz m_failure_time;
} PgStat_MsgChecksumFailure;
+/* ----------
+ * PgStat_MsgConn Sent by the backend to update connection statistics.
+ * ----------
+ */
+typedef struct PgStat_MsgConn
+{
+ PgStat_MsgHdr m_hdr;
+ Oid m_databaseid;
+ PgStat_Counter m_count;
+ PgStat_Counter m_session_time;
+ PgStat_Counter m_active_time;
+ PgStat_Counter m_idle_in_xact_time;
+ SessionEndType m_disconnect;
+} PgStat_MsgConn;
+
/* ----------
* PgStat_Msg Union over all possible messages.
@@ -654,6 +680,7 @@ typedef union PgStat_Msg
PgStat_MsgTempFile msg_tempfile;
PgStat_MsgChecksumFailure msg_checksumfailure;
PgStat_MsgReplSlot msg_replslot;
+ PgStat_MsgConn msg_conn;
} PgStat_Msg;
@@ -696,6 +723,13 @@ typedef struct PgStat_StatDBEntry
TimestampTz last_checksum_failure;
PgStat_Counter n_block_read_time; /* times in microseconds */
PgStat_Counter n_block_write_time;
+ PgStat_Counter n_sessions;
+ PgStat_Counter n_session_time;
+ PgStat_Counter n_active_time;
+ PgStat_Counter n_idle_in_xact_time;
+ PgStat_Counter n_sessions_abandoned;
+ PgStat_Counter n_sessions_fatal;
+ PgStat_Counter n_sessions_killed;
TimestampTz stat_reset_timestamp;
TimestampTz stats_timestamp; /* time of db stats file update */
@@ -1354,6 +1388,11 @@ extern PgStat_MsgWal WalStats;
extern PgStat_Counter pgStatBlockReadTime;
extern PgStat_Counter pgStatBlockWriteTime;
+/*
+ * Updated by the traffic cop and in errfinish()
+ */
+extern SessionEndType pgStatSessionEndCause;
+
/* ----------
* Functions called from postmaster
* ----------
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index a687e99d1e..6173473de9 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1848,6 +1848,13 @@ pg_stat_database| SELECT d.oid AS datid,
pg_stat_get_db_checksum_last_failure(d.oid) AS checksum_last_failure,
pg_stat_get_db_blk_read_time(d.oid) AS blk_read_time,
pg_stat_get_db_blk_write_time(d.oid) AS blk_write_time,
+ pg_stat_get_db_session_time(d.oid) AS session_time,
+ pg_stat_get_db_active_time(d.oid) AS active_time,
+ pg_stat_get_db_idle_in_transaction_time(d.oid) AS idle_in_transaction_time,
+ pg_stat_get_db_sessions(d.oid) AS sessions,
+ pg_stat_get_db_sessions_abandoned(d.oid) AS sessions_abandoned,
+ pg_stat_get_db_sessions_fatal(d.oid) AS sessions_fatal,
+ pg_stat_get_db_sessions_killed(d.oid) AS sessions_killed,
pg_stat_get_db_stat_reset_time(d.oid) AS stats_reset
FROM ( SELECT 0 AS oid,
NULL::name AS datname
--
2.26.2
On 2021-01-08 00:47, Laurenz Albe wrote:
On Fri, 2020-12-25 at 20:28 +0900, Masahiro Ikeda wrote:
As a user, I want this feature to know whether
clients' session activities are as expected.I have some comments about the patch.
Thanks you for the thorough review!
Thanks for updating the patch!
1. pg_proc.dat
The unit of "session time" and so on says "in seconds".
But, is "in milliseconds" right?You are right. Fixed.
2. monitoring.sgml
IIUC, "active_time" includes the time executes a fast-path function
and
"idle in transaction" includes "idle in transaction(aborted)" time.Why don't you reference pg_stat_activity's "state" column and
"active_time" is the total time when the state is "active" and "fast
path"?
"idle in transaction" is as same too.Good idea; I have expanded the documentation like that.
BTW, is there any reason to merge the above statistics?
IIUC, to separate statistics' cons is that two columns increase, and
there is no performance penalty. So, I wonder that there is a way to
separate them
corresponding to the state column of pg_stat_activity.
3. pgstat.h
The comment of PgStat_MsgConn says "Sent by pgstat_connection".
I thought "pgstat_connection" is a function, but it doesn't exist.Is "Sent by the backend" right?
The function was renamed and is now called "pgstat_send_connstats".
But you are right, I might as well match the surrounding code and
write "Sent by the backend".Although this is a trivial thing, the following row has too many tabs.
Other structs have only one space.
// }<tab><tab><tab>Pgstat_MsgConn;
Yes, I messed that up during the pgindent run. Fixed.
Patch version 11 is attached.
There are some following codes in pgstatfuncs.c.
int64 result = 0.0;
But, I think the following is better.
int64 result = 0;
Although now pg_stat_get_db_session_time is initialize "result" to zero
when it is declared,
another pg_stat_XXX function didn't initialize. Is it better to change
it?
Regards,
--
Masahiro Ikeda
NTT DATA CORPORATION
On Fri, 2021-01-08 at 12:00 +0900, Masahiro Ikeda wrote:
2. monitoring.sgml
IIUC, "active_time" includes the time executes a fast-path function
and
"idle in transaction" includes "idle in transaction(aborted)" time.
Why don't you reference pg_stat_activity's "state" column and
"active_time" is the total time when the state is "active" and "fast
path"?
"idle in transaction" is as same too.Good idea; I have expanded the documentation like that.
BTW, is there any reason to merge the above statistics?
IIUC, to separate statistics' cons is that two columns increase, and
there is no performance penalty. So, I wonder that there is a way to
separate them
corresponding to the state column of pg_stat_activity.
Sure, that could be done.
I decided to do it like this because I thought that few people would
be interested in "time spend doing fast-path function calls"; my guess
was that the more interesting value is "time where the database was
busy calculating results".
I tried to keep the balance between providing reasonable detail
while not creating more additional columns to "pg_stat_database"
than necessary.
This is of course a matter of taste, and it is good to hear different
opinions. If more people share your opinion, I'll change the code.
There are some following codes in pgstatfuncs.c.
int64 result = 0.0;But, I think the following is better.
int64 result = 0;
You are right. That was a silly copy-and-paste error. Fixed.
Although now pg_stat_get_db_session_time is initialize "result" to zero
when it is declared,
another pg_stat_XXX function didn't initialize. Is it better to change
it?
I looked at other similar functions, and the ones I saw returned
NULL if there were no data. In that case, it makes sense to write
char *result;
if ((result = get_stats_data()) == NULL)
PG_RETURN_NULL();
PG_RETURN_TEXT_P(cstring_to_text(result));
But I want to return 0 for the session time if there are no data yet,
so I think initializing the result to 0 in the declaration makes sense.
There are some functions that do it like this:
int32 result;
result = 0;
for (...)
{
if (...)
result++;
}
PG_RETURN_INT32(result);
Again, it is a matter of taste, and I didn't detect a clear pattern
in the existing code that I feel I should follow in this question.
Version 12 of the patch is attached.
Yours,
Laurenz Albe
Attachments:
0001-Add-session-statistics-to-pg_stat_database.v12.patchtext/x-patch; charset=UTF-8; name=0001-Add-session-statistics-to-pg_stat_database.v12.patchDownload
From 324847353f5d9e5b2899dd93d43fb345df1dcdb8 Mon Sep 17 00:00:00 2001
From: Laurenz Albe <laurenz.albe@cybertec.at>
Date: Thu, 7 Jan 2021 16:33:45 +0100
Subject: [PATCH] Add session statistics to pg_stat_database
If "track_counts" is active, track the following per database:
- total number of connections
- number of sessions that ended by loss of network connection,
fatal errors and operator intervention
- total time spent in database sessions
- total time spent executing queries
- total idle in transaction time
This is useful to check if connection pooling is working.
It also helps to estimate the size of the connection pool
required to keep the database busy, which depends on the
percentage of the transaction time that is spent idling.
Discussion: https://postgr.es/m/b07e1f9953701b90c66ed368656f2aef40cac4fb.camel@cybertec.at
Reviewed-By: Soumyadeep Chakraborty, Justin Pryzby, Masahiro Ikeda, Magnus Hagander
(This requires a catversion bump, as well as an update to PGSTAT_FILE_FORMAT_ID)
---
doc/src/sgml/monitoring.sgml | 77 +++++++++++++++
src/backend/catalog/system_views.sql | 7 ++
src/backend/postmaster/pgstat.c | 134 ++++++++++++++++++++++++++-
src/backend/tcop/postgres.c | 11 ++-
src/backend/utils/adt/pgstatfuncs.c | 94 +++++++++++++++++++
src/backend/utils/error/elog.c | 8 ++
src/include/catalog/pg_proc.dat | 32 +++++++
src/include/pgstat.h | 39 ++++++++
src/test/regress/expected/rules.out | 7 ++
9 files changed, 404 insertions(+), 5 deletions(-)
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 43fe8ae383..59622173da 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -3737,6 +3737,83 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
</para></entry>
</row>
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>session_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent by database sessions in this database, in milliseconds
+ (note that statistics are only updated when the state of a session
+ changes, so if sessions have been idle for a long time, this idle time
+ won't be included)
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>active_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent executing SQL statements in this database, in milliseconds
+ (this corresponds to the states <literal>active</literal> and
+ <literal>fastpath function call</literal> in
+ <link linkend="monitoring-pg-stat-activity-view">
+ <structname>pg_stat_activity</structname></link>)
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>idle_in_transaction_time</structfield> <type>double precision</type>
+ </para>
+ <para>
+ Time spent idling while in a transaction in this database, in milliseconds
+ (this corresponds to the states <literal>idle in transaction</literal> and
+ <literal>idle in transaction (aborted)</literal> in
+ <link linkend="monitoring-pg-stat-activity-view">
+ <structname>pg_stat_activity</structname></link>)
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>sessions</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Total number of sessions established to this database
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>sessions_abandoned</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of database sessions to this database that were terminated
+ because connection to the client was lost
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>sessions_fatal</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of database sessions to this database that were terminated
+ by fatal errors
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>sessions_killed</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of database sessions to this database that were terminated
+ by operator intervention
+ </para></entry>
+ </row>
+
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>stats_reset</structfield> <type>timestamp with time zone</type>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 5d89e77dbe..fa58afd9d7 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -924,6 +924,13 @@ CREATE VIEW pg_stat_database AS
pg_stat_get_db_checksum_last_failure(D.oid) AS checksum_last_failure,
pg_stat_get_db_blk_read_time(D.oid) AS blk_read_time,
pg_stat_get_db_blk_write_time(D.oid) AS blk_write_time,
+ pg_stat_get_db_session_time(D.oid) AS session_time,
+ pg_stat_get_db_active_time(D.oid) AS active_time,
+ pg_stat_get_db_idle_in_transaction_time(D.oid) AS idle_in_transaction_time,
+ pg_stat_get_db_sessions(D.oid) AS sessions,
+ pg_stat_get_db_sessions_abandoned(D.oid) AS sessions_abandoned,
+ pg_stat_get_db_sessions_fatal(D.oid) AS sessions_fatal,
+ pg_stat_get_db_sessions_killed(D.oid) AS sessions_killed,
pg_stat_get_db_stat_reset_time(D.oid) AS stats_reset
FROM (
SELECT 0 AS oid, NULL::name AS datname
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 3f24a33ef1..cdaddbb66c 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -258,6 +258,9 @@ static int pgStatXactCommit = 0;
static int pgStatXactRollback = 0;
PgStat_Counter pgStatBlockReadTime = 0;
PgStat_Counter pgStatBlockWriteTime = 0;
+static PgStat_Counter pgStatActiveTime = 0;
+static PgStat_Counter pgStatTransactionIdleTime = 0;
+SessionEndType pgStatSessionEndCause = DISCONNECT_NORMAL;
/* Record that's written to 2PC state file when pgstat state is persisted */
typedef struct TwoPhasePgStatRecord
@@ -343,6 +346,7 @@ static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
static void pgstat_send_funcstats(void);
static void pgstat_send_slru(void);
static HTAB *pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid);
+static void pgstat_send_connstats(bool disconnect, TimestampTz last_report);
static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared);
@@ -378,6 +382,7 @@ static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len);
static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len);
static void pgstat_recv_checksum_failure(PgStat_MsgChecksumFailure *msg, int len);
+static void pgstat_recv_connstat(PgStat_MsgConn *msg, int len);
static void pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len);
static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len);
@@ -855,10 +860,14 @@ allow_immediate_pgstat_restart(void)
* per-table and function usage statistics to the collector. Note that this
* is called only when not within a transaction, so it is fair to use
* transaction stop time as an approximation of current time.
+ *
+ * "disconnect" is "true" only for the last call before the backend
+ * exits. This makes sure that no data are lost and that interrupted
+ * sessions are reported correctly.
* ----------
*/
void
-pgstat_report_stat(bool force)
+pgstat_report_stat(bool disconnect)
{
/* we assume this inits to all zeroes: */
static const PgStat_TableCounts all_zeroes;
@@ -873,17 +882,22 @@ pgstat_report_stat(bool force)
/* Don't expend a clock check if nothing to do */
if ((pgStatTabList == NULL || pgStatTabList->tsa_used == 0) &&
pgStatXactCommit == 0 && pgStatXactRollback == 0 &&
- !have_function_stats)
+ !have_function_stats && !disconnect)
return;
/*
* Don't send a message unless it's been at least PGSTAT_STAT_INTERVAL
- * msec since we last sent one, or the caller wants to force stats out.
+ * msec since we last sent one, or the backend is about to exit.
*/
now = GetCurrentTransactionStopTimestamp();
- if (!force &&
+ if (!disconnect &&
!TimestampDifferenceExceeds(last_report, now, PGSTAT_STAT_INTERVAL))
return;
+
+ /* for backends, send connection statistics */
+ if (MyBackendType == B_BACKEND)
+ pgstat_send_connstats(disconnect, last_report);
+
last_report = now;
/*
@@ -1351,6 +1365,48 @@ pgstat_drop_relation(Oid relid)
#endif /* NOT_USED */
+/* ----------
+ * pgstat_send_connstats() -
+ *
+ * Tell the collector about session statistics.
+ * The parameter "disconnect" will be true when the backend exits.
+ * "last_report" is the last time we were called (0 if never).
+ * ----------
+ */
+static void
+pgstat_send_connstats(bool disconnect, TimestampTz last_report)
+{
+ PgStat_MsgConn msg;
+ long secs;
+ int usecs;
+
+ if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
+ return;
+
+ pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_CONNECTION);
+ msg.m_databaseid = MyDatabaseId;
+
+ /* session time since the last report */
+ TimestampDifference(((last_report == 0) ? MyStartTimestamp : last_report),
+ GetCurrentTimestamp(),
+ &secs, &usecs);
+ msg.m_session_time = secs * 1000000 + usecs;
+
+ msg.m_disconnect = disconnect ? pgStatSessionEndCause : DISCONNECT_NOT_YET;
+
+ msg.m_active_time = pgStatActiveTime;
+ pgStatActiveTime = 0;
+
+ msg.m_idle_in_xact_time = pgStatTransactionIdleTime;
+ pgStatTransactionIdleTime = 0;
+
+ /* report a new session only the first time */
+ msg.m_count = (last_report == 0) ? 1 : 0;
+
+ pgstat_send(&msg, sizeof(PgStat_MsgConn));
+}
+
+
/* ----------
* pgstat_reset_counters() -
*
@@ -3348,6 +3404,30 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
}
current_timestamp = GetCurrentTimestamp();
+ /*
+ * If the state has changed from "active" or "idle in transaction",
+ * calculate the duration.
+ */
+ if ((beentry->st_state == STATE_RUNNING ||
+ beentry->st_state == STATE_FASTPATH ||
+ beentry->st_state == STATE_IDLEINTRANSACTION ||
+ beentry->st_state == STATE_IDLEINTRANSACTION_ABORTED) &&
+ state != beentry->st_state)
+ {
+ long secs;
+ int usecs;
+
+ TimestampDifference(beentry->st_state_start_timestamp,
+ current_timestamp,
+ &secs, &usecs);
+
+ if (beentry->st_state == STATE_RUNNING ||
+ beentry->st_state == STATE_FASTPATH)
+ pgStatActiveTime += secs * 1000000 + usecs;
+ else
+ pgStatTransactionIdleTime += secs * 1000000 + usecs;
+ }
+
/*
* Now update the status entry
*/
@@ -4919,6 +4999,10 @@ PgstatCollectorMain(int argc, char *argv[])
pgstat_recv_replslot(&msg.msg_replslot, len);
break;
+ case PGSTAT_MTYPE_CONNECTION:
+ pgstat_recv_connstat(&msg.msg_conn, len);
+ break;
+
default:
break;
}
@@ -4993,6 +5077,13 @@ reset_dbentry_counters(PgStat_StatDBEntry *dbentry)
dbentry->last_checksum_failure = 0;
dbentry->n_block_read_time = 0;
dbentry->n_block_write_time = 0;
+ dbentry->n_sessions = 0;
+ dbentry->n_session_time = 0;
+ dbentry->n_active_time = 0;
+ dbentry->n_idle_in_xact_time = 0;
+ dbentry->n_sessions_abandoned = 0;
+ dbentry->n_sessions_fatal = 0;
+ dbentry->n_sessions_killed = 0;
dbentry->stat_reset_timestamp = GetCurrentTimestamp();
dbentry->stats_timestamp = 0;
@@ -6944,6 +7035,41 @@ pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len)
}
}
+/* ----------
+ * pgstat_recv_connstat() -
+ *
+ * Process connection information.
+ * ----------
+ */
+static void
+pgstat_recv_connstat(PgStat_MsgConn *msg, int len)
+{
+ PgStat_StatDBEntry *dbentry;
+
+ dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
+
+ dbentry->n_sessions += msg->m_count;
+ dbentry->n_session_time += msg->m_session_time;
+ dbentry->n_active_time += msg->m_active_time;
+ dbentry->n_idle_in_xact_time += msg->m_idle_in_xact_time;
+ switch (msg->m_disconnect)
+ {
+ case DISCONNECT_NOT_YET:
+ case DISCONNECT_NORMAL:
+ /* we don't collect these */
+ break;
+ case DISCONNECT_CLIENT_EOF:
+ dbentry->n_sessions_abandoned++;
+ break;
+ case DISCONNECT_FATAL:
+ dbentry->n_sessions_fatal++;
+ break;
+ case DISCONNECT_KILLED:
+ dbentry->n_sessions_killed++;
+ break;
+ }
+}
+
/* ----------
* pgstat_recv_tempfile() -
*
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 2b53ebf97d..882ed96087 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -2865,6 +2865,9 @@ die(SIGNAL_ARGS)
ProcDiePending = true;
}
+ /* for the statistics collector */
+ pgStatSessionEndCause = DISCONNECT_KILLED;
+
/* If we're still here, waken anything waiting on the process latch */
SetLatch(MyLatch);
@@ -4578,9 +4581,15 @@ PostgresMain(int argc, char *argv[],
* means unexpected loss of frontend connection. Either way,
* perform normal shutdown.
*/
- case 'X':
case EOF:
+ /* for the statistics collector */
+ pgStatSessionEndCause = DISCONNECT_CLIENT_EOF;
+
+ /* FALLTHROUGH */
+
+ case 'X':
+
/*
* Reset whereToSendOutput to prevent ereport from attempting
* to send any more messages to client.
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 5c12a165a1..2bb16eabf0 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1631,6 +1631,100 @@ pg_stat_get_db_blk_write_time(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(result);
}
+Datum
+pg_stat_get_db_session_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->n_session_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_active_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->n_active_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_idle_in_transaction_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->n_idle_in_xact_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_sessions(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_sessions);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_sessions_abandoned(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_sessions_abandoned);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_sessions_fatal(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_sessions_fatal);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_sessions_killed(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_sessions_killed);
+
+ PG_RETURN_INT64(result);
+}
+
Datum
pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS)
{
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index 7790f6ab25..80c2672461 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -72,6 +72,7 @@
#include "libpq/pqformat.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
+#include "pgstat.h"
#include "postmaster/bgworker.h"
#include "postmaster/postmaster.h"
#include "postmaster/syslogger.h"
@@ -656,6 +657,13 @@ errfinish(const char *filename, int lineno, const char *funcname)
fflush(stdout);
fflush(stderr);
+ /*
+ * Let the statistics collector know. Only mark the session as
+ * terminated by fatal error if there is no other known cause.
+ */
+ if (pgStatSessionEndCause == DISCONNECT_NORMAL)
+ pgStatSessionEndCause = DISCONNECT_FATAL;
+
/*
* Do normal process-exit cleanup, then return exit code 1 to indicate
* FATAL termination. The postmaster may or may not consider this
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index d7b55f57ea..f5fcc5117d 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5438,6 +5438,38 @@
proname => 'pg_stat_get_db_blk_write_time', provolatile => 's',
proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
prosrc => 'pg_stat_get_db_blk_write_time' },
+{ oid => '9575', descr => 'statistics: session time, in milliseconds',
+ proname => 'pg_stat_get_db_session_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_session_time' },
+{ oid => '9576', descr => 'statistics: session active time, in milliseconds',
+ proname => 'pg_stat_get_db_active_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_active_time' },
+{ oid => '9577',
+ descr => 'statistics: session idle in transaction time, in milliseconds',
+ proname => 'pg_stat_get_db_idle_in_transaction_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_idle_in_transaction_time' },
+{ oid => '9578', descr => 'statistics: total number of sessions',
+ proname => 'pg_stat_get_db_sessions', provolatile => 's', proparallel => 'r',
+ prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_sessions' },
+{ oid => '9579',
+ descr => 'statistics: number of sessions disconnected by the client closing the network connection',
+ proname => 'pg_stat_get_db_sessions_abandoned', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_sessions_abandoned' },
+{ oid => '9580',
+ descr => 'statistics: number of sessions disconnected by fatal errors',
+ proname => 'pg_stat_get_db_sessions_fatal', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_sessions_fatal' },
+{ oid => '9581',
+ descr => 'statistics: number of sessions killed by administrative action',
+ proname => 'pg_stat_get_db_sessions_killed', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+ prosrc => 'pg_stat_get_db_sessions_killed' },
{ oid => '3195', descr => 'statistics: information about WAL archiver',
proname => 'pg_stat_get_archiver', proisstrict => 'f', provolatile => 's',
proparallel => 'r', prorettype => 'record', proargtypes => '',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index c38b689710..d65fb9deca 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -41,6 +41,16 @@ typedef enum TrackFunctionsLevel
TRACK_FUNC_ALL
} TrackFunctionsLevel;
+/* Values to track the cause of session termination */
+typedef enum SessionEndType
+{
+ DISCONNECT_NOT_YET, /* still active */
+ DISCONNECT_NORMAL,
+ DISCONNECT_CLIENT_EOF,
+ DISCONNECT_FATAL,
+ DISCONNECT_KILLED
+} SessionEndType;
+
/* ----------
* The types of backend -> collector messages
* ----------
@@ -71,6 +81,7 @@ typedef enum StatMsgType
PGSTAT_MTYPE_DEADLOCK,
PGSTAT_MTYPE_CHECKSUMFAILURE,
PGSTAT_MTYPE_REPLSLOT,
+ PGSTAT_MTYPE_CONNECTION,
} StatMsgType;
/* ----------
@@ -622,6 +633,21 @@ typedef struct PgStat_MsgChecksumFailure
TimestampTz m_failure_time;
} PgStat_MsgChecksumFailure;
+/* ----------
+ * PgStat_MsgConn Sent by the backend to update connection statistics.
+ * ----------
+ */
+typedef struct PgStat_MsgConn
+{
+ PgStat_MsgHdr m_hdr;
+ Oid m_databaseid;
+ PgStat_Counter m_count;
+ PgStat_Counter m_session_time;
+ PgStat_Counter m_active_time;
+ PgStat_Counter m_idle_in_xact_time;
+ SessionEndType m_disconnect;
+} PgStat_MsgConn;
+
/* ----------
* PgStat_Msg Union over all possible messages.
@@ -654,6 +680,7 @@ typedef union PgStat_Msg
PgStat_MsgTempFile msg_tempfile;
PgStat_MsgChecksumFailure msg_checksumfailure;
PgStat_MsgReplSlot msg_replslot;
+ PgStat_MsgConn msg_conn;
} PgStat_Msg;
@@ -696,6 +723,13 @@ typedef struct PgStat_StatDBEntry
TimestampTz last_checksum_failure;
PgStat_Counter n_block_read_time; /* times in microseconds */
PgStat_Counter n_block_write_time;
+ PgStat_Counter n_sessions;
+ PgStat_Counter n_session_time;
+ PgStat_Counter n_active_time;
+ PgStat_Counter n_idle_in_xact_time;
+ PgStat_Counter n_sessions_abandoned;
+ PgStat_Counter n_sessions_fatal;
+ PgStat_Counter n_sessions_killed;
TimestampTz stat_reset_timestamp;
TimestampTz stats_timestamp; /* time of db stats file update */
@@ -1354,6 +1388,11 @@ extern PgStat_MsgWal WalStats;
extern PgStat_Counter pgStatBlockReadTime;
extern PgStat_Counter pgStatBlockWriteTime;
+/*
+ * Updated by the traffic cop and in errfinish()
+ */
+extern SessionEndType pgStatSessionEndCause;
+
/* ----------
* Functions called from postmaster
* ----------
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index a687e99d1e..6173473de9 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1848,6 +1848,13 @@ pg_stat_database| SELECT d.oid AS datid,
pg_stat_get_db_checksum_last_failure(d.oid) AS checksum_last_failure,
pg_stat_get_db_blk_read_time(d.oid) AS blk_read_time,
pg_stat_get_db_blk_write_time(d.oid) AS blk_write_time,
+ pg_stat_get_db_session_time(d.oid) AS session_time,
+ pg_stat_get_db_active_time(d.oid) AS active_time,
+ pg_stat_get_db_idle_in_transaction_time(d.oid) AS idle_in_transaction_time,
+ pg_stat_get_db_sessions(d.oid) AS sessions,
+ pg_stat_get_db_sessions_abandoned(d.oid) AS sessions_abandoned,
+ pg_stat_get_db_sessions_fatal(d.oid) AS sessions_fatal,
+ pg_stat_get_db_sessions_killed(d.oid) AS sessions_killed,
pg_stat_get_db_stat_reset_time(d.oid) AS stats_reset
FROM ( SELECT 0 AS oid,
NULL::name AS datname
--
2.26.2
On 2021-01-08 18:34, Laurenz Albe wrote:
On Fri, 2021-01-08 at 12:00 +0900, Masahiro Ikeda wrote:
2. monitoring.sgml
IIUC, "active_time" includes the time executes a fast-path function
and
"idle in transaction" includes "idle in transaction(aborted)" time.
Why don't you reference pg_stat_activity's "state" column and
"active_time" is the total time when the state is "active" and "fast
path"?
"idle in transaction" is as same too.Good idea; I have expanded the documentation like that.
BTW, is there any reason to merge the above statistics?
IIUC, to separate statistics' cons is that two columns increase, and
there is no performance penalty. So, I wonder that there is a way to
separate them
corresponding to the state column of pg_stat_activity.Sure, that could be done.
I decided to do it like this because I thought that few people would
be interested in "time spend doing fast-path function calls"; my guess
was that the more interesting value is "time where the database was
busy calculating results".I tried to keep the balance between providing reasonable detail
while not creating more additional columns to "pg_stat_database"
than necessary.This is of course a matter of taste, and it is good to hear different
opinions. If more people share your opinion, I'll change the code.
OK, I understood.
I don't have any strong opinions to add them.
There are some following codes in pgstatfuncs.c.
int64 result = 0.0;But, I think the following is better.
int64 result = 0;You are right. That was a silly copy-and-paste error. Fixed.
Thanks.
Although now pg_stat_get_db_session_time is initialize "result" to
zero
when it is declared,
another pg_stat_XXX function didn't initialize. Is it better to change
it?I looked at other similar functions, and the ones I saw returned
NULL if there were no data. In that case, it makes sense to writechar *result;
if ((result = get_stats_data()) == NULL)
PG_RETURN_NULL();PG_RETURN_TEXT_P(cstring_to_text(result));
But I want to return 0 for the session time if there are no data yet,
so I think initializing the result to 0 in the declaration makes sense.There are some functions that do it like this:
int32 result;
result = 0;
for (...)
{
if (...)
result++;
}PG_RETURN_INT32(result);
Again, it is a matter of taste, and I didn't detect a clear pattern
in the existing code that I feel I should follow in this question.
Thanks, I understood.
I checked my comments are fixed.
This patch looks good to me for monitoring session statistics.
Regards,
--
Masahiro Ikeda
NTT DATA CORPORATION
On Fri, Jan 8, 2021 at 10:34 AM Laurenz Albe <laurenz.albe@cybertec.at> wrote:
On Fri, 2021-01-08 at 12:00 +0900, Masahiro Ikeda wrote:
2. monitoring.sgml
IIUC, "active_time" includes the time executes a fast-path function
and
"idle in transaction" includes "idle in transaction(aborted)" time.
Why don't you reference pg_stat_activity's "state" column and
"active_time" is the total time when the state is "active" and "fast
path"?
"idle in transaction" is as same too.Good idea; I have expanded the documentation like that.
BTW, is there any reason to merge the above statistics?
IIUC, to separate statistics' cons is that two columns increase, and
there is no performance penalty. So, I wonder that there is a way to
separate them
corresponding to the state column of pg_stat_activity.Sure, that could be done.
I decided to do it like this because I thought that few people would
be interested in "time spend doing fast-path function calls"; my guess
was that the more interesting value is "time where the database was
busy calculating results".I tried to keep the balance between providing reasonable detail
while not creating more additional columns to "pg_stat_database"
than necessary.This is of course a matter of taste, and it is good to hear different
opinions. If more people share your opinion, I'll change the code.There are some following codes in pgstatfuncs.c.
int64 result = 0.0;But, I think the following is better.
int64 result = 0;You are right. That was a silly copy-and-paste error. Fixed.
Although now pg_stat_get_db_session_time is initialize "result" to zero
when it is declared,
another pg_stat_XXX function didn't initialize. Is it better to change
it?I looked at other similar functions, and the ones I saw returned
NULL if there were no data. In that case, it makes sense to writechar *result;
if ((result = get_stats_data()) == NULL)
PG_RETURN_NULL();PG_RETURN_TEXT_P(cstring_to_text(result));
But I want to return 0 for the session time if there are no data yet,
so I think initializing the result to 0 in the declaration makes sense.There are some functions that do it like this:
int32 result;
result = 0;
for (...)
{
if (...)
result++;
}PG_RETURN_INT32(result);
Again, it is a matter of taste, and I didn't detect a clear pattern
in the existing code that I feel I should follow in this question.Version 12 of the patch is attached.
Thanks! I have applied this version, with some minor changes:
* I renamed the n_<x>_time members in the struct to just
total_<x>_time. The n_ indicates "number of" and is thus wrong for
time parameters.
* Some very minor wording changes.
* catversion bump (for once I didn't forget it!)
--
Magnus Hagander
Me: https://www.hagander.net/
Work: https://www.redpill-linpro.com/
On Sun, 2021-01-17 at 14:07 +0100, Magnus Hagander wrote:
I have applied this version, with some minor changes:
* I renamed the n_<x>_time members in the struct to just
total_<x>_time. The n_ indicates "number of" and is thus wrong for
time parameters.
Right.
* Some very minor wording changes.
* catversion bump (for once I didn't forget it!)
Thank you!
You included the catversion bump, but shouldn't PGSTAT_FILE_FORMAT_ID
in "include/pgstat.h" be updated as well?
Yours,
Laurenz Albe
On Mon, Jan 18, 2021 at 5:11 PM Laurenz Albe <laurenz.albe@cybertec.at> wrote:
On Sun, 2021-01-17 at 14:07 +0100, Magnus Hagander wrote:
I have applied this version, with some minor changes:
* I renamed the n_<x>_time members in the struct to just
total_<x>_time. The n_ indicates "number of" and is thus wrong for
time parameters.Right.
* Some very minor wording changes.
* catversion bump (for once I didn't forget it!)
Thank you!
You included the catversion bump, but shouldn't PGSTAT_FILE_FORMAT_ID
in "include/pgstat.h" be updated as well?
Yup, you are absolutely correct. Will fix.
--
Magnus Hagander
Me: https://www.hagander.net/
Work: https://www.redpill-linpro.com/