Add statistics refresh materialized view

Started by Seino Yukiover 4 years ago5 messages
#1Seino Yuki
seinoyu@oss.nttdata.com
1 attachment(s)

Hi.

This is a proposal for a new feature in statistics collector.
I think we need to add statistics about refresh matview to
pg_stat_all_tables view.

When the "REFRESH MATERIALIZED VIEW" was executed, the number of times
it was executed
and date it took were not recorded anywhere.

"pg_stat_statements" can be used to get the number of executions and the
date and time of execution,
but this information is statement-based, not view-based.
Also, that method requires the high cost of "pg_stat_statements".

This patch will add statistics(count, last time) about "REFRESH
MATERIALIZED VIEW"
to pg_stat_all_tables(pg_stat_user_tables, [pg_stat_sys_tables]).

What do you think?

Regards,
Seino Yuki

Attachments:

add_statistics_refresh_matview_v1.patchtext/x-diff; name=add_statistics_refresh_matview_v1.patchDownload
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 643e1ad49f..884585af0c 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -4285,6 +4285,26 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
        daemon
       </para></entry>
      </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>last_refresh_matview_time</structfield> <type>timestamp with time zone</type>
+      </para>
+      <para>
+       Last time at which this materialized view was refreshed
+       (If this record is materialized view)
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>refresh_matview_count</structfield> <type>bigint</type>
+      </para>
+      <para>
+       Number of times this materialized view has been refreshed
+       (If this record is materialized view)
+      </para></entry>
+     </row>
     </tbody>
    </tgroup>
   </table>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 999d984068..70375c7e19 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -656,7 +656,9 @@ CREATE VIEW pg_stat_all_tables AS
             pg_stat_get_vacuum_count(C.oid) AS vacuum_count,
             pg_stat_get_autovacuum_count(C.oid) AS autovacuum_count,
             pg_stat_get_analyze_count(C.oid) AS analyze_count,
-            pg_stat_get_autoanalyze_count(C.oid) AS autoanalyze_count
+            pg_stat_get_autoanalyze_count(C.oid) AS autoanalyze_count,
+            pg_stat_get_last_refresh_matview_time(C.oid) AS last_refresh_matview_time,
+            pg_stat_get_refresh_matview_count(C.oid) AS refresh_matview_count
     FROM pg_class C LEFT JOIN
          pg_index I ON C.oid = I.indrelid
          LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace)
diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c
index 25bbd8a5c1..20733b04de 100644
--- a/src/backend/commands/matview.c
+++ b/src/backend/commands/matview.c
@@ -356,6 +356,9 @@ ExecRefreshMatView(RefreshMatViewStmt *stmt, const char *queryString,
 
 	ObjectAddressSet(address, RelationRelationId, matviewOid);
 
+	/* Report results to the stats collector */
+	pgstat_report_refresh_matview(matviewOid);
+
 	/*
 	 * Save the rowcount so that pg_stat_statements can track the total number
 	 * of rows processed by REFRESH MATERIALIZED VIEW command. Note that we
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 11702f2a80..6a378831a9 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -358,6 +358,7 @@ static void pgstat_recv_checksum_failure(PgStat_MsgChecksumFailure *msg, int len
 static void pgstat_recv_connstat(PgStat_MsgConn *msg, int len);
 static void pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len);
 static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len);
+static void pgstat_recv_refresh_matview(PgStat_MsgRefreshMatview *msg, int len);
 
 /* ------------------------------------------------------------
  * Public functions called from postmaster follow
@@ -1863,6 +1864,28 @@ pgstat_report_replslot_drop(const char *slotname)
 	pgstat_send(&msg, sizeof(PgStat_MsgReplSlot));
 }
 
+/* ---------
+ * pgstat_report_refresh_matview() -
+ *
+ *	Tell the collector about the matview we just refreshed.
+ * ---------
+ */
+void
+pgstat_report_refresh_matview(Oid tableoid)
+{
+	PgStat_MsgRefreshMatview msg;
+
+	if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
+		return;
+
+	pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_REFRESHEDMATVIEW);
+	msg.m_databaseid = MyDatabaseId;
+	msg.m_tableoid = tableoid;
+	msg.refreshmatview_time = GetCurrentTimestamp();
+	pgstat_send(&msg, sizeof(msg));
+}
+
+
 /* ----------
  * pgstat_ping() -
  *
@@ -3424,6 +3447,10 @@ PgstatCollectorMain(int argc, char *argv[])
 					pgstat_recv_connstat(&msg.msg_conn, len);
 					break;
 
+				case PGSTAT_MTYPE_REFRESHEDMATVIEW:
+					pgstat_recv_refresh_matview(&msg.msg_refreshmatview, len);
+					break;
+
 				default:
 					break;
 			}
@@ -3600,6 +3627,8 @@ pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry, Oid tableoid, bool create)
 		result->analyze_count = 0;
 		result->autovac_analyze_timestamp = 0;
 		result->autovac_analyze_count = 0;
+		result->matview_refresh_timestamp = 0;
+		result->matview_refresh_count = 0;
 	}
 
 	return result;
@@ -4883,6 +4912,8 @@ pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
 			tabentry->analyze_count = 0;
 			tabentry->autovac_analyze_timestamp = 0;
 			tabentry->autovac_analyze_count = 0;
+			tabentry->matview_refresh_timestamp = 0;
+			tabentry->matview_refresh_count = 0;
 		}
 		else
 		{
@@ -5650,6 +5681,28 @@ pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len)
 	}
 }
 
+/* ----------
+ * pgstat_recv_refresh_matview() -
+ *
+ *	Process a REFRESH MATERIALIZED VIEW message.
+ * ----------
+ */
+static void
+pgstat_recv_refresh_matview(PgStat_MsgRefreshMatview *msg, int len)
+{
+	PgStat_StatDBEntry *dbentry;
+	PgStat_StatTabEntry *tabentry;
+
+	/*
+	 * Store the data in the table's hashtable entry.
+	 */
+	dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
+	tabentry = pgstat_get_tab_entry(dbentry, msg->m_tableoid, true);
+	tabentry->matview_refresh_timestamp = msg->refreshmatview_time;
+	tabentry->matview_refresh_count++;
+}
+
+
 /* ----------
  * pgstat_write_statsfile_needed() -
  *
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index f0e09eae4d..cfd7f6715c 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -378,6 +378,39 @@ pg_stat_get_autoanalyze_count(PG_FUNCTION_ARGS)
 	PG_RETURN_INT64(result);
 }
 
+Datum
+pg_stat_get_last_refresh_matview_time(PG_FUNCTION_ARGS)
+{
+	Oid			relid = PG_GETARG_OID(0);
+	TimestampTz result;
+	PgStat_StatTabEntry *tabentry;
+
+	if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
+		result = 0;
+	else
+		result = tabentry->matview_refresh_timestamp;
+
+	if (result == 0)
+		PG_RETURN_NULL();
+	else
+		PG_RETURN_TIMESTAMPTZ(result);
+}
+
+Datum
+pg_stat_get_refresh_matview_count(PG_FUNCTION_ARGS)
+{
+	Oid			relid = PG_GETARG_OID(0);
+	int64		result;
+	PgStat_StatTabEntry *tabentry;
+
+	if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
+		result = 0;
+	else
+		result = (int64) (tabentry->matview_refresh_count);
+
+	PG_RETURN_INT64(result);
+}
+
 Datum
 pg_stat_get_function_calls(PG_FUNCTION_ARGS)
 {
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index fde251fa4f..bdf89a3f52 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5271,6 +5271,14 @@
   proname => 'pg_stat_get_autoanalyze_count', provolatile => 's',
   proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
   prosrc => 'pg_stat_get_autoanalyze_count' },
+{ oid => '6205', descr => 'statistics: number of manual vacuums for a table',
+  proname => 'pg_stat_get_last_refresh_matview_time', provolatile => 's', proparallel => 'r',
+  prorettype => 'timestamptz', proargtypes => 'oid',
+  prosrc => 'pg_stat_get_last_refresh_matview_time' },
+{ oid => '6206', descr => 'statistics: number of manual vacuums for a table',
+  proname => 'pg_stat_get_refresh_matview_count', provolatile => 's', proparallel => 'r',
+  prorettype => 'int8', proargtypes => 'oid',
+  prosrc => 'pg_stat_get_refresh_matview_count' },
 { oid => '1936', descr => 'statistics: currently active backend IDs',
   proname => 'pg_stat_get_backend_idset', prorows => '100', proretset => 't',
   provolatile => 's', proparallel => 'r', prorettype => 'int4',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 9612c0a6c2..1c3abce875 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -82,6 +82,7 @@ typedef enum StatMsgType
 	PGSTAT_MTYPE_CHECKSUMFAILURE,
 	PGSTAT_MTYPE_REPLSLOT,
 	PGSTAT_MTYPE_CONNECTION,
+	PGSTAT_MTYPE_REFRESHEDMATVIEW
 } StatMsgType;
 
 /* ----------
@@ -676,6 +677,18 @@ typedef struct PgStat_MsgConn
 	SessionEndType m_disconnect;
 } PgStat_MsgConn;
 
+/* ----------
+ * PgStat_MsgRefreshMatview			Sent by the backend to update statistics.
+									after REFRESH MATERIALIZED VIEW.
+ * ----------
+ */
+typedef struct PgStat_MsgRefreshMatview
+{
+	PgStat_MsgHdr m_hdr;
+	Oid			m_databaseid;
+	Oid			m_tableoid;
+	TimestampTz refreshmatview_time;
+} PgStat_MsgRefreshMatview;
 
 /* ----------
  * PgStat_Msg					Union over all possible messages.
@@ -710,6 +723,7 @@ typedef union PgStat_Msg
 	PgStat_MsgChecksumFailure msg_checksumfailure;
 	PgStat_MsgReplSlot msg_replslot;
 	PgStat_MsgConn msg_conn;
+	PgStat_MsgRefreshMatview msg_refreshmatview;
 } PgStat_Msg;
 
 
@@ -807,6 +821,9 @@ typedef struct PgStat_StatTabEntry
 	PgStat_Counter analyze_count;
 	TimestampTz autovac_analyze_timestamp;	/* autovacuum initiated */
 	PgStat_Counter autovac_analyze_count;
+
+	TimestampTz matview_refresh_timestamp;	/* refresh matview executed */
+	PgStat_Counter matview_refresh_count;
 } PgStat_StatTabEntry;
 
 
@@ -1011,7 +1028,7 @@ extern void pgstat_report_checksum_failure(void);
 extern void pgstat_report_replslot(const PgStat_StatReplSlotEntry *repSlotStat);
 extern void pgstat_report_replslot_create(const char *slotname);
 extern void pgstat_report_replslot_drop(const char *slotname);
-
+extern void pgstat_report_refresh_matview(Oid tableoid);
 extern void pgstat_initialize(void);
 
 
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index e5ab11275d..9471afb267 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1803,7 +1803,9 @@ pg_stat_all_tables| SELECT c.oid AS relid,
     pg_stat_get_vacuum_count(c.oid) AS vacuum_count,
     pg_stat_get_autovacuum_count(c.oid) AS autovacuum_count,
     pg_stat_get_analyze_count(c.oid) AS analyze_count,
-    pg_stat_get_autoanalyze_count(c.oid) AS autoanalyze_count
+    pg_stat_get_autoanalyze_count(c.oid) AS autoanalyze_count,
+    pg_stat_get_last_refresh_matview_time(c.oid) AS last_refresh_matview_time,
+    pg_stat_get_refresh_matview_count(c.oid) AS refresh_matview_count
    FROM ((pg_class c
      LEFT JOIN pg_index i ON ((c.oid = i.indrelid)))
      LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace)))
@@ -2126,7 +2128,9 @@ pg_stat_sys_tables| SELECT pg_stat_all_tables.relid,
     pg_stat_all_tables.vacuum_count,
     pg_stat_all_tables.autovacuum_count,
     pg_stat_all_tables.analyze_count,
-    pg_stat_all_tables.autoanalyze_count
+    pg_stat_all_tables.autoanalyze_count,
+    pg_stat_all_tables.last_refresh_matview_time,
+    pg_stat_all_tables.refresh_matview_count
    FROM pg_stat_all_tables
   WHERE ((pg_stat_all_tables.schemaname = ANY (ARRAY['pg_catalog'::name, 'information_schema'::name])) OR (pg_stat_all_tables.schemaname ~ '^pg_toast'::text));
 pg_stat_user_functions| SELECT p.oid AS funcid,
@@ -2170,7 +2174,9 @@ pg_stat_user_tables| SELECT pg_stat_all_tables.relid,
     pg_stat_all_tables.vacuum_count,
     pg_stat_all_tables.autovacuum_count,
     pg_stat_all_tables.analyze_count,
-    pg_stat_all_tables.autoanalyze_count
+    pg_stat_all_tables.autoanalyze_count,
+    pg_stat_all_tables.last_refresh_matview_time,
+    pg_stat_all_tables.refresh_matview_count
    FROM pg_stat_all_tables
   WHERE ((pg_stat_all_tables.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_stat_all_tables.schemaname !~ '^pg_toast'::text));
 pg_stat_wal| SELECT w.wal_records,
#2Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Seino Yuki (#1)
Re: Add statistics refresh materialized view

On 2021/07/09 1:39, Seino Yuki wrote:

Hi.

This is a proposal for a new feature in statistics collector.
I think we need to add statistics about refresh matview to pg_stat_all_tables view.

Why do you want to treat only REFRESH MATERIALIZED VIEW command special?
What about other utility commands like TRUNCATE, CLUSTER, etc?

It's not good design to add new columns per utility command into
pg_stat_all_tables. Otherwise pg_stat_all_tables will have to have lots of
columns to expose the stats of many utility commands at last. Which is
ugly and very user-unfriendly.

Most entries in pg_stat_all_tables are basically for tables. So the columns
about REFRESH MATERIALIZED VIEW are useless for those most entries.
This is another reason why I think the design is not good.

When the "REFRESH MATERIALIZED VIEW" was executed, the number of times it was executed
and date it took were not recorded anywhere.

pg_stat_statements and log_statement would help?

"pg_stat_statements" can be used to get the number of executions and the date and time of execution,
but this information is statement-based, not view-based.

pg_stat_statements reports different records for REFRESH MATERIALIZED VIEW
commands on different views. So ISTM that we can aggregate the information
per view, from pg_stat_statements. No?

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

#3Seino Yuki
seinoyu@oss.nttdata.com
In reply to: Fujii Masao (#2)
Re: Add statistics refresh materialized view

On 2021-09-01 23:15, Fujii Masao wrote:

Why do you want to treat only REFRESH MATERIALIZED VIEW command
special?
What about other utility commands like TRUNCATE, CLUSTER, etc?

First of all, knowing the update date and time of the MATVIEW is
essential for actual operation.
Without that information, users will not be able to trust the MATVIEW.

In terms of the reliability of the information in the table,
I think the priority of the REFRESHED MATVIEW is higher than that of
TRUNCATE and CLUSTER.

It's not good design to add new columns per utility command into
pg_stat_all_tables. Otherwise pg_stat_all_tables will have to have lots
of
columns to expose the stats of many utility commands at last. Which is
ugly and very user-unfriendly.

Most entries in pg_stat_all_tables are basically for tables. So the
columns
about REFRESH MATERIALIZED VIEW are useless for those most entries.
This is another reason why I think the design is not good.

I agree with this opinion.
Initially, I thought about storing this information in pg_matviews,
but decided against it because of the overhead of adding it to the
system catalog.

pg_stat_statements reports different records for REFRESH MATERIALIZED
VIEW
commands on different views. So ISTM that we can aggregate the
information
per view, from pg_stat_statements. No?

I made this suggestion based on the premise that the last update date
and time of the Mateview should always be retained.
I think the same concept applies to Oracle Database.
https://docs.oracle.com/cd/F19136_01/refrn/ALL_MVIEWS.html#GUID-8B9432B5-6B66-411A-936E-590D9D7671E9
I thought it would be useless to enable pg_stat_statements and
log_statement to see this information.

However, as you said, for most use cases, pg_stat_statements and
log_statement may be sufficient.
I would like to withdraw this proposal.

Regards,

#4Michael Paquier
michael@paquier.xyz
In reply to: Seino Yuki (#3)
Re: Add statistics refresh materialized view

On Tue, Sep 07, 2021 at 06:11:14PM +0900, Seino Yuki wrote:

I would like to withdraw this proposal.

This was registered in the CF, so marked as RwF.
--
Michael

#5Said Assemlal
sassemlal@neurorx.com
In reply to: Seino Yuki (#3)
Re: Add statistics refresh materialized view

Hi,

However, as you said, for most use cases, pg_stat_statements and
log_statement may be sufficient.
I would like to withdraw this proposal.

Well, they either require extensions or parameters to be set properly.
One advantage I see to store those kind of information is that it can be
queried by application developers (users are reporting old data for
example).

We currently have to rely on other ways to figure out if materialized
views were properly refreshed.