From e12a1aff3f1b423da5277cccf2a76ec09318567a Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@otacoo.com>
Date: Tue, 18 Nov 2014 16:30:23 +0900
Subject: [PATCH] Track number of files marked as ready for archiving in
 pg_stat_archiver

This number of files is directly tracked by the archiver process that then
reports the number it finds to the stat machinery. Note that when archiver
marks a file as successfully archived, it decrements by one the number of
files waiting to be archived, giving more precise information to the user.
---
 doc/src/sgml/monitoring.sgml         |  5 +++++
 src/backend/catalog/system_views.sql |  1 +
 src/backend/postmaster/pgarch.c      | 33 +++++++++++++++++++--------------
 src/backend/postmaster/pgstat.c      |  6 +++++-
 src/backend/utils/adt/pgstatfuncs.c  | 21 +++++++++++++++------
 src/include/catalog/pg_proc.h        |  2 +-
 src/include/pgstat.h                 |  5 ++++-
 src/test/regress/expected/rules.out  |  3 ++-
 8 files changed, 52 insertions(+), 24 deletions(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index b29e5e6..4f4ac73 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -870,6 +870,11 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
       <entry>Time of the last failed archival operation</entry>
      </row>
      <row>
+      <entry><structfield>ready_count</></entry>
+      <entry><type>bigint</type></entry>
+      <entry>Number of files waiting to be archived</entry>
+     </row>
+     <row>
       <entry><structfield>stats_reset</></entry>
       <entry><type>timestamp with time zone</type></entry>
       <entry>Time at which these statistics were last reset</entry>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index a819952..195769c 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -729,6 +729,7 @@ CREATE VIEW pg_stat_archiver AS
         s.failed_count,
         s.last_failed_wal,
         s.last_failed_time,
+        s.ready_count,
         s.stats_reset
     FROM pg_stat_get_archiver() s;
 
diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c
index 6a5c5b0..7f5b813 100644
--- a/src/backend/postmaster/pgarch.c
+++ b/src/backend/postmaster/pgarch.c
@@ -100,7 +100,7 @@ static void pgarch_waken_stop(SIGNAL_ARGS);
 static void pgarch_MainLoop(void);
 static void pgarch_ArchiverCopyLoop(void);
 static bool pgarch_archiveXlog(char *xlog);
-static bool pgarch_readyXlog(char *xlog);
+static int64 pgarch_readyXlog(char *xlog);
 static void pgarch_archiveDone(char *xlog);
 
 
@@ -440,6 +440,7 @@ static void
 pgarch_ArchiverCopyLoop(void)
 {
 	char		xlog[MAX_XFN_CHARS + 1];
+	int64		ready_count;
 
 	/*
 	 * loop through all xlogs with archive_status of .ready and archive
@@ -447,7 +448,7 @@ pgarch_ArchiverCopyLoop(void)
 	 * some backend will add files onto the list of those that need archiving
 	 * while we are still copying earlier archives
 	 */
-	while (pgarch_readyXlog(xlog))
+	while ((ready_count = pgarch_readyXlog(xlog)) != 0)
 	{
 		int			failures = 0;
 
@@ -488,11 +489,16 @@ pgarch_ArchiverCopyLoop(void)
 				pgarch_archiveDone(xlog);
 
 				/*
+				 * File has been archived, reducing by one the entries waiting
+				 * to be archived.
+				 */
+				ready_count--;
+
+				/*
 				 * Tell the collector about the WAL file that we successfully
 				 * archived
 				 */
-				pgstat_send_archiver(xlog, false);
-
+				pgstat_send_archiver(xlog, ready_count, false);
 				break;			/* out of inner retry loop */
 			}
 			else
@@ -501,7 +507,7 @@ pgarch_ArchiverCopyLoop(void)
 				 * Tell the collector about the WAL file that we failed to
 				 * archive
 				 */
-				pgstat_send_archiver(xlog, true);
+				pgstat_send_archiver(xlog, ready_count, true);
 
 				if (++failures >= NUM_ARCHIVE_RETRIES)
 				{
@@ -668,7 +674,8 @@ pgarch_archiveXlog(char *xlog)
  * No notification is set that file archiving is now in progress, so
  * this would need to be extended if multiple concurrent archival
  * tasks were created. If a failure occurs, we will completely
- * re-copy the file at the next available opportunity.
+ * re-copy the file at the next available opportunity. This function
+ * returns the number of files counted as in ready state.
  *
  * It is important that we return the oldest, so that we archive xlogs
  * in order that they were written, for two reasons:
@@ -682,7 +689,7 @@ pgarch_archiveXlog(char *xlog)
  * higher priority for archiving.  This seems okay, or at least not
  * obviously worth changing.
  */
-static bool
+static int64
 pgarch_readyXlog(char *xlog)
 {
 	/*
@@ -695,7 +702,7 @@ pgarch_readyXlog(char *xlog)
 	char		newxlog[MAX_XFN_CHARS + 6 + 1];
 	DIR		   *rldir;
 	struct dirent *rlde;
-	bool		found = false;
+	int64		ready_count = 0;
 
 	snprintf(XLogArchiveStatusDir, MAXPGPATH, XLOGDIR "/archive_status");
 	rldir = AllocateDir(XLogArchiveStatusDir);
@@ -714,27 +721,25 @@ pgarch_readyXlog(char *xlog)
 			strspn(rlde->d_name, VALID_XFN_CHARS) >= basenamelen &&
 			strcmp(rlde->d_name + basenamelen, ".ready") == 0)
 		{
-			if (!found)
-			{
+			if (ready_count == 0)
 				strcpy(newxlog, rlde->d_name);
-				found = true;
-			}
 			else
 			{
 				if (strcmp(rlde->d_name, newxlog) < 0)
 					strcpy(newxlog, rlde->d_name);
 			}
+			ready_count++;
 		}
 	}
 	FreeDir(rldir);
 
-	if (found)
+	if (ready_count > 0)
 	{
 		/* truncate off the .ready */
 		newxlog[strlen(newxlog) - 6] = '\0';
 		strcpy(xlog, newxlog);
 	}
-	return found;
+	return ready_count;
 }
 
 /*
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index c7f41a5..2e9b276 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -3088,10 +3088,12 @@ pgstat_send(void *msg, int len)
  * ----------
  */
 void
-pgstat_send_archiver(const char *xlog, bool failed)
+pgstat_send_archiver(const char *xlog, int64 ready_count, bool failed)
 {
 	PgStat_MsgArchiver msg;
 
+	Assert(ready_count >= 0);
+
 	/*
 	 * Prepare and send the message
 	 */
@@ -3099,6 +3101,7 @@ pgstat_send_archiver(const char *xlog, bool failed)
 	msg.m_failed = failed;
 	strncpy(msg.m_xlog, xlog, sizeof(msg.m_xlog));
 	msg.m_timestamp = GetCurrentTimestamp();
+	msg.m_ready_count = ready_count;
 	pgstat_send(&msg, sizeof(msg));
 }
 
@@ -5000,6 +5003,7 @@ pgstat_recv_archiver(PgStat_MsgArchiver *msg, int len)
 			   sizeof(archiverStats.last_archived_wal));
 		archiverStats.last_archived_timestamp = msg->m_timestamp;
 	}
+	archiverStats.ready_count = msg->m_ready_count;
 }
 
 /* ----------
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index d621a68..37fd7d2 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -15,11 +15,13 @@
 #include "postgres.h"
 
 #include "access/htup_details.h"
+#include "access/xlog_internal.h"
 #include "catalog/pg_type.h"
 #include "funcapi.h"
 #include "libpq/ip.h"
 #include "miscadmin.h"
 #include "pgstat.h"
+#include "storage/fd.h"
 #include "utils/builtins.h"
 #include "utils/inet.h"
 #include "utils/timestamp.h"
@@ -1737,8 +1739,8 @@ Datum
 pg_stat_get_archiver(PG_FUNCTION_ARGS)
 {
 	TupleDesc	tupdesc;
-	Datum		values[7];
-	bool		nulls[7];
+	Datum		values[8];
+	bool		nulls[8];
 	PgStat_ArchiverStats *archiver_stats;
 
 	/* Initialise values and NULL flags arrays */
@@ -1746,7 +1748,7 @@ pg_stat_get_archiver(PG_FUNCTION_ARGS)
 	MemSet(nulls, 0, sizeof(nulls));
 
 	/* Initialise attributes information in the tuple descriptor */
-	tupdesc = CreateTemplateTupleDesc(7, false);
+	tupdesc = CreateTemplateTupleDesc(8, false);
 	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "archived_count",
 					   INT8OID, -1, 0);
 	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "last_archived_wal",
@@ -1759,7 +1761,9 @@ pg_stat_get_archiver(PG_FUNCTION_ARGS)
 					   TEXTOID, -1, 0);
 	TupleDescInitEntry(tupdesc, (AttrNumber) 6, "last_failed_time",
 					   TIMESTAMPTZOID, -1, 0);
-	TupleDescInitEntry(tupdesc, (AttrNumber) 7, "stats_reset",
+	TupleDescInitEntry(tupdesc, (AttrNumber) 7, "ready_count",
+					   INT8OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 8, "stats_reset",
 					   TIMESTAMPTZOID, -1, 0);
 
 	BlessTupleDesc(tupdesc);
@@ -1790,10 +1794,15 @@ pg_stat_get_archiver(PG_FUNCTION_ARGS)
 	else
 		values[5] = TimestampTzGetDatum(archiver_stats->last_failed_timestamp);
 
-	if (archiver_stats->stat_reset_timestamp == 0)
+	if (archiver_stats->ready_count == 0)
 		nulls[6] = true;
 	else
-		values[6] = TimestampTzGetDatum(archiver_stats->stat_reset_timestamp);
+		values[6] = Int64GetDatum(archiver_stats->ready_count);
+
+	if (archiver_stats->stat_reset_timestamp == 0)
+		nulls[7] = true;
+	else
+		values[7] = TimestampTzGetDatum(archiver_stats->stat_reset_timestamp);
 
 	/* Returns the record as Datum */
 	PG_RETURN_DATUM(HeapTupleGetDatum(
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 5d4e889..28e3b46 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -2788,7 +2788,7 @@ DATA(insert OID = 2844 (  pg_stat_get_db_blk_read_time	PGNSP PGUID 12 1 0 0 0 f
 DESCR("statistics: block read time, in msec");
 DATA(insert OID = 2845 (  pg_stat_get_db_blk_write_time PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 701 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_blk_write_time _null_ _null_ _null_ ));
 DESCR("statistics: block write time, in msec");
-DATA(insert OID = 3195 (  pg_stat_get_archiver		PGNSP PGUID 12 1 0 0 0 f f f f f f s 0 0 2249 "" "{20,25,1184,20,25,1184,1184}" "{o,o,o,o,o,o,o}" "{archived_count,last_archived_wal,last_archived_time,failed_count,last_failed_wal,last_failed_time,stats_reset}" _null_ pg_stat_get_archiver _null_ _null_ _null_ ));
+DATA(insert OID = 3195 (  pg_stat_get_archiver		PGNSP PGUID 12 1 0 0 0 f f f f f f s 0 0 2249 "" "{20,25,1184,20,25,1184,20,1184}" "{o,o,o,o,o,o,o,o}" "{archived_count,last_archived_wal,last_archived_time,failed_count,last_failed_wal,last_failed_time,ready_count,stats_reset}" _null_ pg_stat_get_archiver _null_ _null_ _null_ ));
 DESCR("statistics: information about WAL archiver");
 DATA(insert OID = 2769 ( pg_stat_get_bgwriter_timed_checkpoints PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 20 "" _null_ _null_ _null_ _null_ pg_stat_get_bgwriter_timed_checkpoints _null_ _null_ _null_ ));
 DESCR("statistics: number of timed checkpoints started by the bgwriter");
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 0892533..55d046d 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -377,6 +377,7 @@ typedef struct PgStat_MsgArchiver
 {
 	PgStat_MsgHdr m_hdr;
 	bool		m_failed;		/* Failed attempt */
+	uint64		m_ready_count;
 	char		m_xlog[MAX_XFN_CHARS + 1];
 	TimestampTz m_timestamp;
 } PgStat_MsgArchiver;
@@ -650,6 +651,7 @@ typedef struct PgStat_ArchiverStats
 	PgStat_Counter failed_count;	/* failed archival attempts */
 	char		last_failed_wal[MAX_XFN_CHARS + 1];		/* WAL file involved in
 														 * last failure */
+	int64		ready_count;			/* files ready to be archived */
 	TimestampTz last_failed_timestamp;	/* last archival failure time */
 	TimestampTz stat_reset_timestamp;
 } PgStat_ArchiverStats;
@@ -934,7 +936,8 @@ extern void pgstat_twophase_postcommit(TransactionId xid, uint16 info,
 extern void pgstat_twophase_postabort(TransactionId xid, uint16 info,
 						  void *recdata, uint32 len);
 
-extern void pgstat_send_archiver(const char *xlog, bool failed);
+extern void pgstat_send_archiver(const char *xlog, int64 ready_count,
+						  bool failed);
 extern void pgstat_send_bgwriter(void);
 
 /* ----------
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index c79b45c..5eaf138 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1686,8 +1686,9 @@ pg_stat_archiver| SELECT s.archived_count,
     s.failed_count,
     s.last_failed_wal,
     s.last_failed_time,
+    s.ready_count,
     s.stats_reset
-   FROM pg_stat_get_archiver() s(archived_count, last_archived_wal, last_archived_time, failed_count, last_failed_wal, last_failed_time, stats_reset);
+   FROM pg_stat_get_archiver() s(archived_count, last_archived_wal, last_archived_time, failed_count, last_failed_wal, last_failed_time, ready_count, stats_reset);
 pg_stat_bgwriter| SELECT pg_stat_get_bgwriter_timed_checkpoints() AS checkpoints_timed,
     pg_stat_get_bgwriter_requested_checkpoints() AS checkpoints_req,
     pg_stat_get_checkpoint_write_time() AS checkpoint_write_time,
-- 
2.1.3

