From 9e3ac989d8583c6ba5e74945602aeaacfaee127f Mon Sep 17 00:00:00 2001
From: amit <amitlangote09@gmail.com>
Date: Mon, 7 Mar 2016 14:38:34 +0900
Subject: [PATCH 2/3] WIP: Implement progress reporting for VACUUM command.

This basically utilizes the pgstat_progress* API to report a handful of
paramters to indicate its progress.  lazy_vacuum_rel() and lazy_scan_heap()
have been altered to report command start, command target table, and the
following parameters: processing phase, number of heap blocks, number of
index blocks (all indexes), current heap block number in the main scan loop
(whenever changes), index blocks vacuumed (once per finished index vacuum),
and number of index vacuum passes (every time when all indexes are vacuumed).
Following processing phases are identified and reported whenever one changes
to another: 'scanning heap', 'vacuuming indexes', 'vacuuming heap', and
'cleanup'.

TODO: find a way to report index pages vacuumed in a more granular manner than
the current report per index vacuumed.

A view named pg_stat_progress_vacuum has been added that shows these values.
---
 doc/src/sgml/monitoring.sgml         |  111 ++++++++++++++++++++++++++++++++++
 src/backend/catalog/system_views.sql |   25 ++++++++
 src/backend/commands/vacuumlazy.c    |   73 ++++++++++++++++++++++-
 src/test/regress/expected/rules.out  |   23 +++++++
 4 files changed, 231 insertions(+), 1 deletions(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 85459d0..6b26ba9 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -507,6 +507,12 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
       yet included in <structname>pg_stat_user_functions</>).</entry>
      </row>
 
+     <row>
+      <entry><structname>pg_stat_progress_vacuum</><indexterm><primary>pg_stat_progress_vacuum</primary></indexterm></entry>
+      <entry>One row for each backend (including autovacuum worker processes) running
+      <command>VACUUM</>, showing current progress in terms of heap pages it
+      has finished processing.</entry>
+     </row>
     </tbody>
    </tgroup>
   </table>
@@ -1822,6 +1828,111 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
    controls exactly which functions are tracked.
   </para>
 
+  <table id="pg-stat-progress-vacuum" xreflabel="pg_stat_progress_vacuum">
+   <title><structname>pg_stat_progress_vacuum</structname> View</title>
+   <tgroup cols="3">
+    <thead>
+    <row>
+      <entry>Column</entry>
+      <entry>Type</entry>
+      <entry>Description</entry>
+     </row>
+    </thead>
+
+   <tbody>
+    <row>
+     <entry><structfield>pid</></entry>
+     <entry><type>integer</></entry>
+     <entry>Process ID of backend</entry>
+    </row>
+    <row>
+     <entry><structfield>datid</></entry>
+     <entry><type>oid</></entry>
+     <entry>OID of the database this backend is connected to</entry>
+    </row>
+    <row>
+     <entry><structfield>datname</></entry>
+     <entry><type>name</></entry>
+     <entry>Name of the database this backend is connected to</entry>
+    </row>
+    <row>
+     <entry><structfield>relid</></entry>
+     <entry><type>oid</></entry>
+     <entry>OID of the table being vacuumed</entry>
+    </row>
+    <row>
+     <entry><structfield>processing_phase</></entry>
+     <entry><type>text</></entry>
+     <entry>Current processing phase of vacuum.
+       Possible values are:
+       <itemizedlist>
+        <listitem>
+         <para>
+          <literal>scanning heap</>
+         </para>
+        </listitem>
+        <listitem>
+         <para>
+          <literal>vacuuming indexes</>
+         </para>
+        </listitem>
+        <listitem>
+         <para>
+          <literal>vacuuming heap</>
+         </para>
+        </listitem>
+        <listitem>
+         <para>
+          <literal>cleanup</>
+         </para>
+        </listitem>
+       </itemizedlist>
+     </entry>
+    </row>
+    <row>
+     <entry><structfield>total_heap_blocks</></entry>
+     <entry><type>bigint</></entry>
+     <entry>Total number of heap blocks in the table</entry>
+    </row>
+    <row>
+     <entry><structfield>current_heap_block</></entry>
+     <entry><type>bigint</></entry>
+     <entry>Current heap block being processed</entry>
+    </row>
+    <row>
+     <entry><structfield>total_index_blocks</></entry>
+     <entry><type>bigint</></entry>
+     <entry>Total number of index blocks to be processed</entry>
+    </row>
+    <row>
+     <entry><structfield>index_blocks_done</></entry>
+     <entry><type>bigint</></entry>
+     <entry>Number of index blocks processed in current vacuum round</entry>
+    </row>
+    <row>
+     <entry><structfield>index_vacuum_count</></entry>
+     <entry><type>bigint</></entry>
+     <entry>Number of times index vacuum round has been performed so far</entry>
+    </row>
+    <row>
+     <entry><structfield>percent_done</></entry>
+     <entry><type>numeric</></entry>
+     <entry>
+      Amount of work finished in percent in terms of table blocks processed
+     </entry>
+    </row>
+   </tbody>
+   </tgroup>
+  </table>
+
+  <para>
+   The <structname>pg_stat_progress_vacuum</structname> view will contain
+   one row for each backend (including autovacuum worker processes), showing
+   parameters that can help determine the progress of <command>VACUUM</command>
+   command running in it. Note that the backends running
+   <command>VACUUM FULL</command> are not shown.
+  </para>
+
  </sect2>
 
  <sect2 id="monitoring-stats-functions">
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index abf9a70..5d9caf7 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -971,3 +971,28 @@ RETURNS jsonb
 LANGUAGE INTERNAL
 STRICT IMMUTABLE
 AS 'jsonb_set';
+
+CREATE VIEW pg_stat_progress_vacuum AS
+    SELECT
+            S.pid AS pid,
+            S.datid AS datid,
+            D.datname AS datname,
+            S.relid AS relid,
+            CASE S.param1
+                WHEN 1 THEN 'scanning heap'
+                WHEN 2 THEN 'vacuuming indexes'
+                WHEN 3 THEN 'vacuuming heap'
+                WHEN 4 THEN 'cleanup'
+                ELSE 'unknown phase'
+            END AS processing_phase,
+            S.param2 AS total_heap_blocks,
+            S.param3 AS current_heap_block,
+            S.param4 AS total_index_blocks,
+            S.param5 AS index_blocks_done,
+            S.param6 AS index_vacuum_count,
+            CASE S.param2
+                WHEN 0 THEN round(100.0, 2)
+			    ELSE round((S.param3 + 1) * 100.0 / S.param2, 2)
+            END AS percent_done
+    FROM pg_database D, pg_stat_get_progress_info('VACUUM') AS S
+    WHERE S.datid = D.oid;
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index 61d2edd..0771b91 100644
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -97,6 +97,29 @@
  */
 #define SKIP_PAGES_THRESHOLD	((BlockNumber) 32)
 
+/*
+ * Progress parameters of (lazy) vacuum reported to pgstat progress tracking
+ * facility
+ */
+#define PROG_PARAM_VAC_PHASE			0
+#define PROG_PARAM_VAC_HEAP_BLKS		1
+#define PROG_PARAM_VAC_CURR_HEAP_BLK	2
+#define PROG_PARAM_VAC_IDX_BLKS			3
+#define PROG_PARAM_VAC_IDX_BLKS_DONE	4
+#define PROG_PARAM_VAC_IDX_VAC_COUNT	5
+
+/*
+ * Following distinct phases of lazy vacuum are identified.  #1, #2 and #3
+ * run in a cyclical manner due to possibly limited memory to work with,
+ * whereby #1 is periodically interrupted to run #2, followed by #3, and
+ * back to #1.  Cycle repeats until all blocks of the relation have been
+ * covered by #1.
+ */
+#define LV_PHASE_SCAN_HEAP			1
+#define LV_PHASE_VACUUM_INDEX		2
+#define LV_PHASE_VACUUM_HEAP		3
+#define LV_PHASE_CLEANUP			4
+
 typedef struct LVRelStats
 {
 	/* hasindex = true means two-pass strategy; false means one-pass */
@@ -437,7 +460,9 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 			   Relation *Irel, int nindexes, bool scan_all)
 {
 	BlockNumber nblocks,
-				blkno;
+				blkno,
+				total_index_blks,
+			   *current_index_blks;
 	HeapTupleData tuple;
 	char	   *relname;
 	BlockNumber empty_pages,
@@ -478,6 +503,24 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 	lazy_space_alloc(vacrelstats, nblocks);
 	frozen = palloc(sizeof(xl_heap_freeze_tuple) * MaxHeapTuplesPerPage);
 
+	/* We're about to begin heap scan. */
+	pgstat_progress_update_param(PROG_PARAM_VAC_PHASE, LV_PHASE_SCAN_HEAP);
+
+	/* total_heap_blks */
+	pgstat_progress_update_param(PROG_PARAM_VAC_HEAP_BLKS, nblocks);
+
+	/* total_index_blks */
+	current_index_blks = (BlockNumber *) palloc(nindexes * sizeof(BlockNumber));
+	total_index_blks = 0;
+	for (i = 0; i < nindexes; i++)
+	{
+		BlockNumber		nblocks = RelationGetNumberOfBlocks(Irel[i]);
+
+		current_index_blks[i] = nblocks;
+		total_index_blks += nblocks;
+	}
+	pgstat_progress_update_param(PROG_PARAM_VAC_IDX_BLKS, total_index_blks);
+
 	/*
 	 * We want to skip pages that don't require vacuuming according to the
 	 * visibility map, but only when we can skip at least SKIP_PAGES_THRESHOLD
@@ -585,6 +628,9 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 
 		vacuum_delay_point();
 
+		/* current_heap_blkno: 0..nblocks-1 */
+		pgstat_progress_update_param(PROG_PARAM_VAC_CURR_HEAP_BLK, blkno);
+
 		/*
 		 * If we are close to overrunning the available space for dead-tuple
 		 * TIDs, pause and do a cycle of vacuuming before we tackle this page.
@@ -608,11 +654,22 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 			vacuum_log_cleanup_info(onerel, vacrelstats);
 
 			/* Remove index entries */
+			pgstat_progress_update_param(PROG_PARAM_VAC_PHASE, LV_PHASE_VACUUM_INDEX);
 			for (i = 0; i < nindexes; i++)
+			{
 				lazy_vacuum_index(Irel[i],
 								  &indstats[i],
 								  vacrelstats);
+
+				pgstat_progress_update_param(PROG_PARAM_VAC_IDX_BLKS_DONE,
+											 current_index_blks[i]);
+			}
+
+			pgstat_progress_update_param(PROG_PARAM_VAC_IDX_VAC_COUNT,
+										 vacrelstats->num_index_scans+1);
+
 			/* Remove tuples from heap */
+			pgstat_progress_update_param(PROG_PARAM_VAC_PHASE, LV_PHASE_VACUUM_HEAP);
 			lazy_vacuum_heap(onerel, vacrelstats);
 
 			/*
@@ -622,6 +679,9 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 			 */
 			vacrelstats->num_dead_tuples = 0;
 			vacrelstats->num_index_scans++;
+
+			/* Going back to scanning the heap */
+			pgstat_progress_update_param(PROG_PARAM_VAC_PHASE, LV_PHASE_SCAN_HEAP);
 		}
 
 		/*
@@ -1151,16 +1211,27 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 		vacuum_log_cleanup_info(onerel, vacrelstats);
 
 		/* Remove index entries */
+		pgstat_progress_update_param(PROG_PARAM_VAC_PHASE, LV_PHASE_VACUUM_INDEX);
 		for (i = 0; i < nindexes; i++)
+		{
 			lazy_vacuum_index(Irel[i],
 							  &indstats[i],
 							  vacrelstats);
+
+			pgstat_progress_update_param(PROG_PARAM_VAC_IDX_BLKS_DONE,
+										 current_index_blks[i]);
+		}
+		pgstat_progress_update_param(PROG_PARAM_VAC_IDX_VAC_COUNT,
+									 vacrelstats->num_index_scans + 1);
+
 		/* Remove tuples from heap */
+		pgstat_progress_update_param(PROG_PARAM_VAC_PHASE, LV_PHASE_VACUUM_HEAP);
 		lazy_vacuum_heap(onerel, vacrelstats);
 		vacrelstats->num_index_scans++;
 	}
 
 	/* Do post-vacuum cleanup and statistics update for each index */
+	pgstat_progress_update_param(PROG_PARAM_VAC_PHASE, LV_PHASE_CLEANUP);
 	for (i = 0; i < nindexes; i++)
 		lazy_cleanup_index(Irel[i], indstats[i], vacrelstats);
 
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 81bc5c9..97d4e12 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1746,6 +1746,29 @@ pg_stat_database_conflicts| SELECT d.oid AS datid,
     pg_stat_get_db_conflict_bufferpin(d.oid) AS confl_bufferpin,
     pg_stat_get_db_conflict_startup_deadlock(d.oid) AS confl_deadlock
    FROM pg_database d;
+pg_stat_progress_vacuum| SELECT s.pid,
+    s.datid,
+    d.datname,
+    s.relid,
+        CASE s.param1
+            WHEN 1 THEN 'scanning heap'::text
+            WHEN 2 THEN 'vacuuming indexes'::text
+            WHEN 3 THEN 'vacuuming heap'::text
+            WHEN 4 THEN 'cleanup'::text
+            ELSE 'unknown phase'::text
+        END AS processing_phase,
+    s.param2 AS total_heap_blocks,
+    s.param3 AS current_heap_block,
+    s.param4 AS total_index_blocks,
+    s.param5 AS index_blocks_done,
+    s.param6 AS index_vacuum_count,
+        CASE s.param2
+            WHEN 0 THEN round(100.0, 2)
+            ELSE round(((((s.param3 + 1))::numeric * 100.0) / (s.param2)::numeric), 2)
+        END AS percent_done
+   FROM pg_database d,
+    pg_stat_get_progress_info('VACUUM'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10)
+  WHERE (s.datid = d.oid);
 pg_stat_replication| SELECT s.pid,
     s.usesysid,
     u.rolname AS usename,
-- 
1.7.1

