New statistics for WAL buffer dirty writes

Started by Satoshi Nagayasuover 13 years ago35 messages
#1Satoshi Nagayasu
snaga@uptime.jp
1 attachment(s)

Hi all,

I've created new patch to get/reset statistics of WAL buffer
writes (flushes) caused by WAL buffer full.

This patch provides two new functions, pg_stat_get_xlog_dirty_write()
and pg_stat_reset_xlog_dirty_write(), which have been designed to
determine an appropriate value for WAL buffer size.

If this counter is increasing in the production environment,
it would mean that the WAL buffer size is too small to hold
xlog records generated the transactions. So, you can increase
your WAL buffer size to keep xlog records and to reduce WAL writes.

I think this patch would not affect to WAL write performance,
but still paying attention to it.

Any comments or suggestions?

Regards,

-----------------------------------------------------------
[snaga@devvm03 src]$ psql -p 15432 postgres
psql (9.3devel)
Type "help" for help.

postgres=# SELECT pg_stat_get_xlog_dirty_write();
pg_stat_get_xlog_dirty_write
------------------------------
0
(1 row)

postgres=# \q
[snaga@devvm03 src]$ pgbench -p 15432 -s 10 -c 32 -t 1000 postgres
Scale option ignored, using pgbench_branches table count = 10
starting vacuum...end.
transaction type: TPC-B (sort of)
scaling factor: 10
query mode: simple
number of clients: 32
number of threads: 1
number of transactions per client: 1000
number of transactions actually processed: 32000/32000
tps = 141.937738 (including connections establishing)
tps = 142.123457 (excluding connections establishing)
[snaga@devvm03 src]$ psql -p 15432 postgres
psql (9.3devel)
Type "help" for help.

postgres=# SELECT pg_stat_get_xlog_dirty_write();
pg_stat_get_xlog_dirty_write
------------------------------
0
(1 row)

postgres=# begin;
BEGIN
postgres=# DELETE FROM pgbench_accounts;
DELETE 1000000
postgres=# commit;
COMMIT
postgres=# SELECT pg_stat_get_xlog_dirty_write();
pg_stat_get_xlog_dirty_write
------------------------------
19229
(1 row)

postgres=# SELECT pg_stat_reset_xlog_dirty_write();
pg_stat_reset_xlog_dirty_write
--------------------------------

(1 row)

postgres=# SELECT pg_stat_get_xlog_dirty_write();
pg_stat_get_xlog_dirty_write
------------------------------
0
(1 row)

postgres=# \q
[snaga@devvm03 src]$
-----------------------------------------------------------

--
Satoshi Nagayasu <snaga@uptime.jp>
Uptime Technologies, LLC. http://www.uptime.jp

Attachments:

xlogdirtywrite.difftext/plain; charset=Shift_JIS; name=xlogdirtywrite.diffDownload
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 642c129..df1e6d4 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -280,6 +280,11 @@ static XLogRecPtr RedoRecPtr;
  */
 static XLogRecPtr RedoStartLSN = InvalidXLogRecPtr;
 
+/*
+ * Counter for WAL dirty buffer writes.
+ */
+static uint64			WalBufferWriteDirtyCount = 0;
+
 /*----------
  * Shared-memory data structures for XLOG control
  *
@@ -1513,6 +1518,7 @@ AdvanceXLInsertBuffer(bool new_segment)
 				WriteRqst.Flush = 0;
 				XLogWrite(WriteRqst, false, false);
 				LWLockRelease(WALWriteLock);
+				WalBufferWriteDirtyCount++;
 				TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
 			}
 		}
@@ -10492,3 +10498,15 @@ SetWalWriterSleeping(bool sleeping)
 	xlogctl->WalWriterSleeping = sleeping;
 	SpinLockRelease(&xlogctl->info_lck);
 }
+
+uint64
+xlog_dirty_write_counter_get()
+{
+	return WalBufferWriteDirtyCount;
+}
+
+void
+xlog_dirty_write_counter_reset()
+{
+	WalBufferWriteDirtyCount = 0;
+}
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 7c0705a..d544a5b 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -117,6 +117,9 @@ extern Datum pg_stat_reset_shared(PG_FUNCTION_ARGS);
 extern Datum pg_stat_reset_single_table_counters(PG_FUNCTION_ARGS);
 extern Datum pg_stat_reset_single_function_counters(PG_FUNCTION_ARGS);
 
+extern Datum pg_stat_get_xlog_dirty_write(PG_FUNCTION_ARGS);
+extern Datum pg_stat_reset_xlog_dirty_write(PG_FUNCTION_ARGS);
+
 /* Global bgwriter statistics, from bgwriter.c */
 extern PgStat_MsgBgWriter bgwriterStats;
 
@@ -1700,3 +1703,16 @@ pg_stat_reset_single_function_counters(PG_FUNCTION_ARGS)
 
 	PG_RETURN_VOID();
 }
+
+Datum
+pg_stat_get_xlog_dirty_write(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_INT64(xlog_dirty_write_counter_get());
+}
+
+Datum
+pg_stat_reset_xlog_dirty_write(PG_FUNCTION_ARGS)
+{
+	xlog_dirty_write_counter_reset();
+	PG_RETURN_VOID();
+}
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index ec79870..01343b9 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -325,6 +325,9 @@ extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast, char **
 extern XLogRecPtr do_pg_stop_backup(char *labelfile, bool waitforarchive);
 extern void do_pg_abort_backup(void);
 
+extern uint64 xlog_dirty_write_counter_get(void);
+extern void xlog_dirty_write_counter_reset(void);
+
 /* File path names (all relative to $PGDATA) */
 #define BACKUP_LABEL_FILE		"backup_label"
 #define BACKUP_LABEL_OLD		"backup_label.old"
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index bee7154..e21f57e 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -2736,6 +2736,11 @@ DESCR("statistics: reset collected statistics for a single table or index in the
 DATA(insert OID = 3777 (  pg_stat_reset_single_function_counters	PGNSP PGUID 12 1 0 0 0 f f f f f f v 1 0 2278 "26" _null_ _null_ _null_ _null_	pg_stat_reset_single_function_counters _null_ _null_ _null_ ));
 DESCR("statistics: reset collected statistics for a single function in the current database");
 
+DATA(insert OID = 3766 (  pg_stat_get_xlog_dirty_write  PGNSP PGUID 12 1 0 0 0 f f f f f f v 0 0 20 "" _null_ _null_ _null_ _null_ pg_stat_get_xlog_dirty_write _null_ _null_ _null_ ));
+DESCR("statistics: get xlog dirty buffer write statistics");
+DATA(insert OID = 3767 (  pg_stat_reset_xlog_dirty_write  PGNSP PGUID 12 1 0 0 0 f f f f f f v 0 0 2278 "" _null_ _null_ _null_ _null_ pg_stat_reset_xlog_dirty_write _null_ _null_ _null_ ));
+DESCR("statistics: reset xlog dirty buffer write statistics");
+
 DATA(insert OID = 3163 (  pg_trigger_depth				PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 23 "" _null_ _null_ _null_ _null_ pg_trigger_depth _null_ _null_ _null_ ));
 DESCR("current trigger depth");
 
#2Euler Taveira
euler@timbira.com
In reply to: Satoshi Nagayasu (#1)
Re: New statistics for WAL buffer dirty writes

On 07-07-2012 09:00, Satoshi Nagayasu wrote:

I've created new patch to get/reset statistics of WAL buffer
writes (flushes) caused by WAL buffer full.

This new statistic doesn't solve your problem (tune wal_buffers). It doesn't
give you the wal_buffers value. It only says "hey, I needed more buffers so I
write those dirty ones". It doesn't say how many. I would like to have
something that says "hey, you have 1000 buffers available and you are using
100 buffers (10%)". This new statistic is only useful for decreasing the
WALWriteLock contention.

--
Euler Taveira de Oliveira - Timbira http://www.timbira.com.br/
PostgreSQL: Consultoria, Desenvolvimento, Suporte 24x7 e Treinamento

#3Satoshi Nagayasu
snaga@uptime.jp
In reply to: Euler Taveira (#2)
Re: New statistics for WAL buffer dirty writes

2012/07/07 22:07, Euler Taveira wrote:

On 07-07-2012 09:00, Satoshi Nagayasu wrote:

I've created new patch to get/reset statistics of WAL buffer
writes (flushes) caused by WAL buffer full.

This new statistic doesn't solve your problem (tune wal_buffers). It doesn't
give you the wal_buffers value. It only says "hey, I needed more buffers so I
write those dirty ones". It doesn't say how many. I would like to have
something that says "hey, you have 1000 buffers available and you are using
100 buffers (10%)". This new statistic is only useful for decreasing the
WALWriteLock contention.

I agree with that it would not tell the exact number for wal_buffers,
but it would help DBA understand what's actually happening around WAL
buffers.

Also, decreasing the WALWriteLock contention is obviously important
for DBA in terms of improving database performance.

Actually, that's the reason why I'm working on another statistics. :)
http://archives.postgresql.org/pgsql-hackers/2012-06/msg01489.php

Regards,
--
Satoshi Nagayasu <snaga@uptime.jp>
Uptime Technologies, LLC. http://www.uptime.jp

#4Robert Haas
robertmhaas@gmail.com
In reply to: Euler Taveira (#2)
Re: New statistics for WAL buffer dirty writes

On Jul 7, 2012, at 9:07 AM, Euler Taveira <euler@timbira.com> wrote:

On 07-07-2012 09:00, Satoshi Nagayasu wrote:

I've created new patch to get/reset statistics of WAL buffer
writes (flushes) caused by WAL buffer full.

This new statistic doesn't solve your problem (tune wal_buffers). It doesn't
give you the wal_buffers value. It only says "hey, I needed more buffers so I
write those dirty ones". It doesn't say how many. I would like to have
something that says "hey, you have 1000 buffers available and you are using
100 buffers (10%)". This new statistic is only useful for decreasing the
WALWriteLock contention.

The number of WAL buffers that you are using is going to change so quickly as to be utterly meaningless. I don't really see that there's any statistic we could gather that would tell us how many WAL buffers are needed. This patch seems like it's on the right track, at least telling you how often you're running out.

I'm interested to run some benchmarks with this; I think it could be quite informative.

...Robert

#5Magnus Hagander
magnus@hagander.net
In reply to: Robert Haas (#4)
Re: New statistics for WAL buffer dirty writes

On Sat, Jul 7, 2012 at 3:52 PM, Robert Haas <robertmhaas@gmail.com> wrote:

On Jul 7, 2012, at 9:07 AM, Euler Taveira <euler@timbira.com> wrote:

On 07-07-2012 09:00, Satoshi Nagayasu wrote:

I've created new patch to get/reset statistics of WAL buffer
writes (flushes) caused by WAL buffer full.

This new statistic doesn't solve your problem (tune wal_buffers). It doesn't
give you the wal_buffers value. It only says "hey, I needed more buffers so I
write those dirty ones". It doesn't say how many. I would like to have
something that says "hey, you have 1000 buffers available and you are using
100 buffers (10%)". This new statistic is only useful for decreasing the
WALWriteLock contention.

The number of WAL buffers that you are using is going to change so quickly as to be utterly meaningless. I don't really see that there's any statistic we could gather that would tell us how many WAL buffers are needed. This patch seems like it's on the right track, at least telling you how often you're running out.

We could keep a high watermark of "what's the largest percentage we've
used", perhaps?

--
Magnus Hagander
Me: http://www.hagander.net/
Work: http://www.redpill-linpro.com/

#6Robert Haas
robertmhaas@gmail.com
In reply to: Magnus Hagander (#5)
Re: New statistics for WAL buffer dirty writes

On Jul 7, 2012, at 8:54 AM, Magnus Hagander <magnus@hagander.net> wrote:

On Sat, Jul 7, 2012 at 3:52 PM, Robert Haas <robertmhaas@gmail.com> wrote:

On Jul 7, 2012, at 9:07 AM, Euler Taveira <euler@timbira.com> wrote:

On 07-07-2012 09:00, Satoshi Nagayasu wrote:

I've created new patch to get/reset statistics of WAL buffer
writes (flushes) caused by WAL buffer full.

This new statistic doesn't solve your problem (tune wal_buffers). It doesn't
give you the wal_buffers value. It only says "hey, I needed more buffers so I
write those dirty ones". It doesn't say how many. I would like to have
something that says "hey, you have 1000 buffers available and you are using
100 buffers (10%)". This new statistic is only useful for decreasing the
WALWriteLock contention.

The number of WAL buffers that you are using is going to change so quickly as to be utterly meaningless. I don't really see that there's any statistic we could gather that would tell us how many WAL buffers are needed. This patch seems like it's on the right track, at least telling you how often you're running out.

We could keep a high watermark of "what's the largest percentage we've
used", perhaps?

Sure, but I doubt that would be as informative as this. It's no big deal if you hit 100% every once in a while; what you really want to know is whether it's happening once per second or once per week.

...Robert

#7Magnus Hagander
magnus@hagander.net
In reply to: Robert Haas (#6)
Re: New statistics for WAL buffer dirty writes

On Sat, Jul 7, 2012 at 7:06 PM, Robert Haas <robertmhaas@gmail.com> wrote:

On Jul 7, 2012, at 8:54 AM, Magnus Hagander <magnus@hagander.net> wrote:

On Sat, Jul 7, 2012 at 3:52 PM, Robert Haas <robertmhaas@gmail.com> wrote:

On Jul 7, 2012, at 9:07 AM, Euler Taveira <euler@timbira.com> wrote:

On 07-07-2012 09:00, Satoshi Nagayasu wrote:

I've created new patch to get/reset statistics of WAL buffer
writes (flushes) caused by WAL buffer full.

This new statistic doesn't solve your problem (tune wal_buffers). It doesn't
give you the wal_buffers value. It only says "hey, I needed more buffers so I
write those dirty ones". It doesn't say how many. I would like to have
something that says "hey, you have 1000 buffers available and you are using
100 buffers (10%)". This new statistic is only useful for decreasing the
WALWriteLock contention.

The number of WAL buffers that you are using is going to change so quickly as to be utterly meaningless. I don't really see that there's any statistic we could gather that would tell us how many WAL buffers are needed. This patch seems like it's on the right track, at least telling you how often you're running out.

We could keep a high watermark of "what's the largest percentage we've
used", perhaps?

Sure, but I doubt that would be as informative as this. It's no big deal if you hit 100% every once in a while; what you really want to know is whether it's happening once per second or once per week.

I'm not suggesting one or the other, I'm suggesting that both values
might be interesting. Though in reality, you'd want that high
watermark to only count if it was the state for more than <n>, which
is a lot more difficult to get. so yeah, maybe that's overkill to even
try.

--
Magnus Hagander
Me: http://www.hagander.net/
Work: http://www.redpill-linpro.com/

#8Satoshi Nagayasu
snaga@uptime.jp
In reply to: Satoshi Nagayasu (#1)
1 attachment(s)
Re: New statistics for WAL buffer dirty writes

Hi,

Jeff Janes has pointed out that my previous patch could hold
a number of the dirty writes only in single local backend, and
it could not hold all over the cluster, because the counter
was allocated in the local process memory.

That's true, and I have fixed it with moving the counter into
the shared memory, as a member of XLogCtlWrite, to keep total
dirty writes in the cluster.

Regards,

2012/07/07 21:00, Satoshi Nagayasu wrote:

Hi all,

I've created new patch to get/reset statistics of WAL buffer
writes (flushes) caused by WAL buffer full.

This patch provides two new functions, pg_stat_get_xlog_dirty_write()
and pg_stat_reset_xlog_dirty_write(), which have been designed to
determine an appropriate value for WAL buffer size.

If this counter is increasing in the production environment,
it would mean that the WAL buffer size is too small to hold
xlog records generated the transactions. So, you can increase
your WAL buffer size to keep xlog records and to reduce WAL writes.

I think this patch would not affect to WAL write performance,
but still paying attention to it.

Any comments or suggestions?

Regards,

-----------------------------------------------------------
[snaga@devvm03 src]$ psql -p 15432 postgres
psql (9.3devel)
Type "help" for help.

postgres=# SELECT pg_stat_get_xlog_dirty_write();
pg_stat_get_xlog_dirty_write
------------------------------
0
(1 row)

postgres=# \q
[snaga@devvm03 src]$ pgbench -p 15432 -s 10 -c 32 -t 1000 postgres
Scale option ignored, using pgbench_branches table count = 10
starting vacuum...end.
transaction type: TPC-B (sort of)
scaling factor: 10
query mode: simple
number of clients: 32
number of threads: 1
number of transactions per client: 1000
number of transactions actually processed: 32000/32000
tps = 141.937738 (including connections establishing)
tps = 142.123457 (excluding connections establishing)
[snaga@devvm03 src]$ psql -p 15432 postgres
psql (9.3devel)
Type "help" for help.

postgres=# SELECT pg_stat_get_xlog_dirty_write();
pg_stat_get_xlog_dirty_write
------------------------------
0
(1 row)

postgres=# begin;
BEGIN
postgres=# DELETE FROM pgbench_accounts;
DELETE 1000000
postgres=# commit;
COMMIT
postgres=# SELECT pg_stat_get_xlog_dirty_write();
pg_stat_get_xlog_dirty_write
------------------------------
19229
(1 row)

postgres=# SELECT pg_stat_reset_xlog_dirty_write();
pg_stat_reset_xlog_dirty_write
--------------------------------

(1 row)

postgres=# SELECT pg_stat_get_xlog_dirty_write();
pg_stat_get_xlog_dirty_write
------------------------------
0
(1 row)

postgres=# \q
[snaga@devvm03 src]$
-----------------------------------------------------------

--
Satoshi Nagayasu <snaga@uptime.jp>
Uptime Technologies, LLC. http://www.uptime.jp

Attachments:

xlogdirtywrite2.difftext/plain; charset=Shift_JIS; name=xlogdirtywrite2.diffDownload
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 642c129..893acf8 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -370,6 +370,11 @@ typedef struct XLogCtlWrite
 {
 	int			curridx;		/* cache index of next block to write */
 	pg_time_t	lastSegSwitchTime;		/* time of last xlog segment switch */
+
+	/*
+	 * Counter for WAL dirty buffer writes.
+	 */
+	uint64		WalBufferWriteDirtyCount;
 } XLogCtlWrite;
 
 /*
@@ -1504,6 +1509,8 @@ AdvanceXLInsertBuffer(bool new_segment)
 			}
 			else
 			{
+				XLogCtlWrite *Write = &XLogCtl->Write;
+
 				/*
 				 * Have to write buffers while holding insert lock. This is
 				 * not good, so only write as much as we absolutely must.
@@ -1512,6 +1519,10 @@ AdvanceXLInsertBuffer(bool new_segment)
 				WriteRqst.Write = OldPageRqstPtr;
 				WriteRqst.Flush = 0;
 				XLogWrite(WriteRqst, false, false);
+				/*
+				 * XLogCtrlWrite must be protected with WALWriteLock.
+				 */
+				Write->WalBufferWriteDirtyCount++;
 				LWLockRelease(WALWriteLock);
 				TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
 			}
@@ -10492,3 +10503,26 @@ SetWalWriterSleeping(bool sleeping)
 	xlogctl->WalWriterSleeping = sleeping;
 	SpinLockRelease(&xlogctl->info_lck);
 }
+
+uint64
+xlog_dirty_write_counter_get()
+{
+	XLogCtlWrite *Write = &XLogCtl->Write;
+	uint64 count;
+
+	LWLockAcquire(WALWriteLock, LW_SHARED);
+	count =	Write->WalBufferWriteDirtyCount;
+	LWLockRelease(WALWriteLock);
+
+	return count;
+}
+
+void
+xlog_dirty_write_counter_reset()
+{
+	XLogCtlWrite *Write = &XLogCtl->Write;
+
+	LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
+	Write->WalBufferWriteDirtyCount = 0;
+	LWLockRelease(WALWriteLock);
+}
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 7c0705a..d544a5b 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -117,6 +117,9 @@ extern Datum pg_stat_reset_shared(PG_FUNCTION_ARGS);
 extern Datum pg_stat_reset_single_table_counters(PG_FUNCTION_ARGS);
 extern Datum pg_stat_reset_single_function_counters(PG_FUNCTION_ARGS);
 
+extern Datum pg_stat_get_xlog_dirty_write(PG_FUNCTION_ARGS);
+extern Datum pg_stat_reset_xlog_dirty_write(PG_FUNCTION_ARGS);
+
 /* Global bgwriter statistics, from bgwriter.c */
 extern PgStat_MsgBgWriter bgwriterStats;
 
@@ -1700,3 +1703,16 @@ pg_stat_reset_single_function_counters(PG_FUNCTION_ARGS)
 
 	PG_RETURN_VOID();
 }
+
+Datum
+pg_stat_get_xlog_dirty_write(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_INT64(xlog_dirty_write_counter_get());
+}
+
+Datum
+pg_stat_reset_xlog_dirty_write(PG_FUNCTION_ARGS)
+{
+	xlog_dirty_write_counter_reset();
+	PG_RETURN_VOID();
+}
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index ec79870..01343b9 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -325,6 +325,9 @@ extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast, char **
 extern XLogRecPtr do_pg_stop_backup(char *labelfile, bool waitforarchive);
 extern void do_pg_abort_backup(void);
 
+extern uint64 xlog_dirty_write_counter_get(void);
+extern void xlog_dirty_write_counter_reset(void);
+
 /* File path names (all relative to $PGDATA) */
 #define BACKUP_LABEL_FILE		"backup_label"
 #define BACKUP_LABEL_OLD		"backup_label.old"
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index bee7154..e21f57e 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -2736,6 +2736,11 @@ DESCR("statistics: reset collected statistics for a single table or index in the
 DATA(insert OID = 3777 (  pg_stat_reset_single_function_counters	PGNSP PGUID 12 1 0 0 0 f f f f f f v 1 0 2278 "26" _null_ _null_ _null_ _null_	pg_stat_reset_single_function_counters _null_ _null_ _null_ ));
 DESCR("statistics: reset collected statistics for a single function in the current database");
 
+DATA(insert OID = 3766 (  pg_stat_get_xlog_dirty_write  PGNSP PGUID 12 1 0 0 0 f f f f f f v 0 0 20 "" _null_ _null_ _null_ _null_ pg_stat_get_xlog_dirty_write _null_ _null_ _null_ ));
+DESCR("statistics: get xlog dirty buffer write statistics");
+DATA(insert OID = 3767 (  pg_stat_reset_xlog_dirty_write  PGNSP PGUID 12 1 0 0 0 f f f f f f v 0 0 2278 "" _null_ _null_ _null_ _null_ pg_stat_reset_xlog_dirty_write _null_ _null_ _null_ ));
+DESCR("statistics: reset xlog dirty buffer write statistics");
+
 DATA(insert OID = 3163 (  pg_trigger_depth				PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 23 "" _null_ _null_ _null_ _null_ pg_trigger_depth _null_ _null_ _null_ ));
 DESCR("current trigger depth");
 
#9Jeff Janes
jeff.janes@gmail.com
In reply to: Satoshi Nagayasu (#8)
Re: New statistics for WAL buffer dirty writes

On Sat, Jul 7, 2012 at 9:17 PM, Satoshi Nagayasu <snaga@uptime.jp> wrote:

Hi,

Jeff Janes has pointed out that my previous patch could hold
a number of the dirty writes only in single local backend, and
it could not hold all over the cluster, because the counter
was allocated in the local process memory.

That's true, and I have fixed it with moving the counter into
the shared memory, as a member of XLogCtlWrite, to keep total
dirty writes in the cluster.

A concern I have is whether the XLogCtlWrite *Write pointer needs to
be declared volatile, to prevent the compiler from pushing operations
on them outside of the locks (and so memory barriers) that formally
protect them. However I see that existing code with Insert also does
not use volatile, so maybe my concern is baseless. Perhaps the
compiler guarantees to not move operations on pointers over the
boundaries of function calls? The pattern elsewhere in the code seems
to be to use volatiles for things protected by spin-locks (implemented
by macros) but not for things protected by LWLocks.

The comment "XLogCtrlWrite must be protected with WALWriteLock"
mis-spells XLogCtlWrite.

The final patch will need to add a sections to the documentation.

Cheers,

Jeff

#10Simon Riggs
simon@2ndQuadrant.com
In reply to: Robert Haas (#6)
Re: New statistics for WAL buffer dirty writes

On 7 July 2012 18:06, Robert Haas <robertmhaas@gmail.com> wrote:

Sure, but I doubt that would be as informative as this. It's no big deal if you hit 100% every once in a while; what you really want to know is whether it's happening once per second or once per week.

Agreed.

I can't see an easy way of recording the high water mark % and I'm not
sure how we'd use it if we had it.

Let's just track how often we run out of space because that is when
bad things happen, not before.

--
Simon Riggs http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services

#11Robert Haas
robertmhaas@gmail.com
In reply to: Jeff Janes (#9)
Re: New statistics for WAL buffer dirty writes

On Sat, Jul 28, 2012 at 6:33 PM, Jeff Janes <jeff.janes@gmail.com> wrote:

A concern I have is whether the XLogCtlWrite *Write pointer needs to
be declared volatile, to prevent the compiler from pushing operations
on them outside of the locks (and so memory barriers) that formally
protect them. However I see that existing code with Insert also does
not use volatile, so maybe my concern is baseless. Perhaps the
compiler guarantees to not move operations on pointers over the
boundaries of function calls? The pattern elsewhere in the code seems
to be to use volatiles for things protected by spin-locks (implemented
by macros) but not for things protected by LWLocks.

Yes, our code is only correct if we assume that the compiler performs
no global optimizations - i.e. no movement of code between functions.

IMHO, the way we have it now is kind of a mess. SpinLockAcquire and
SpinLockRelease are required to be CPU barriers, but they are not
required to be compiler barriers. If we changed that so that they
were required to act as barriers of both flavors, then (1) we wouldn't
need volatile in as many places, (2) we would be less prone to bugs
caused by the omission of not-obviously-necessary volatile markings,
and (3) we would remove one possible source of breakage that might be
induced by a globally optimizing compiler. As things stand today,
making a previously-global function static could result in working
code breaking, because the static function might be inlined where the
global function wasn't. Ouch.

Anyway, unless and until we make a definitional change of the sort
described above, any pointers used within a spinlock critical section
must be volatile; and pray that the compiler doesn't inline anything
you weren't expecting.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#12Tom Lane
tgl@sss.pgh.pa.us
In reply to: Robert Haas (#11)
Re: New statistics for WAL buffer dirty writes

Robert Haas <robertmhaas@gmail.com> writes:

IMHO, the way we have it now is kind of a mess. SpinLockAcquire and
SpinLockRelease are required to be CPU barriers, but they are not
required to be compiler barriers. If we changed that so that they
were required to act as barriers of both flavors,

Since they are macros, how do you propose to do that exactly?

I agree that volatile-izing everything in the vicinity is a sucky
solution, but the last time we looked at this there did not seem to
be a better one.

regards, tom lane

#13Robert Haas
robertmhaas@gmail.com
In reply to: Tom Lane (#12)
Re: New statistics for WAL buffer dirty writes

On Tue, Jul 31, 2012 at 4:06 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:

Robert Haas <robertmhaas@gmail.com> writes:

IMHO, the way we have it now is kind of a mess. SpinLockAcquire and
SpinLockRelease are required to be CPU barriers, but they are not
required to be compiler barriers. If we changed that so that they
were required to act as barriers of both flavors,

Since they are macros, how do you propose to do that exactly?

Why does it matter that they are macros?

I agree that volatile-izing everything in the vicinity is a sucky
solution, but the last time we looked at this there did not seem to
be a better one.

Well, Linux has a barrier() primitive which is defined as a
compiler-barrier, so I don't see why we shouldn't be able to manage
the same thing. In fact, we've already got it, though it's presently
unused; see storage/barrier.h.

Looking over s_lock.h, it looks like TAS is typically defined using
__asm__ __volatile__, and the __asm__ is marked as clobbering memory.
As the fine comments say "this prevents gcc from thinking it can cache
the values of shared-memory fields across the asm code", which is
another way of saying that it's a compiler barrier. However, there's
no similar guard in S_UNLOCK, which is simply declared as a volatile
store, and therefore compiler ordering is guaranteed only with respect
to other volatile pointer references. If we added something of the
form __asm__ __volatile__("" : : : "memory") in there, it should
serve as a full compiler barrier. That might have to go in a static
inline function as we do with TAS, but I think it should work.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#14Tom Lane
tgl@sss.pgh.pa.us
In reply to: Robert Haas (#13)
Re: New statistics for WAL buffer dirty writes

Robert Haas <robertmhaas@gmail.com> writes:

On Tue, Jul 31, 2012 at 4:06 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:

I agree that volatile-izing everything in the vicinity is a sucky
solution, but the last time we looked at this there did not seem to
be a better one.

Well, Linux has a barrier() primitive which is defined as a
compiler-barrier, so I don't see why we shouldn't be able to manage
the same thing. In fact, we've already got it, though it's presently
unused; see storage/barrier.h.

Solving the problem for linux only, or gcc only, isn't going to get us
to a place where we can stop volatile-izing call sites. We need to be
sure it works for every single case supported by s_lock.h.

I think you may be right that using __asm__ __volatile__ in gcc
S_UNLOCK cases would be a big step forward, but it needs more research
to see if that's the only fix needed.

regards, tom lane

#15Robert Haas
robertmhaas@gmail.com
In reply to: Tom Lane (#14)
Re: New statistics for WAL buffer dirty writes

On Wed, Aug 1, 2012 at 10:12 AM, Tom Lane <tgl@sss.pgh.pa.us> wrote:

Solving the problem for linux only, or gcc only, isn't going to get us
to a place where we can stop volatile-izing call sites. We need to be
sure it works for every single case supported by s_lock.h.

Yep, that's the problem all right.

I think you may be right that using __asm__ __volatile__ in gcc
S_UNLOCK cases would be a big step forward, but it needs more research
to see if that's the only fix needed.

I agree, but I will note that I have done a fair bit of research on
this already, and there are definitions in storage/barrier.h for
pg_compiler_barrier() that cover gcc, icc, HP's aCC, MSVC, and Borland
C. There are probably other wacky compilers out there, though:
looking at the build farm, I see Sun Studio and sco cc as cases that
would likely need some attention. Are there any compilers not
represented in the build-farm that we'd mind breaking?

If we can get working pg_compiler_barrier() definitions for all the
compilers we care about, the rest is probably mostly a question of
going through s_lock.h and inserting compiler barriers anywhere that
they aren't already implied by the existing code.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#16Jeff Janes
jeff.janes@gmail.com
In reply to: Jeff Janes (#9)
Re: New statistics for WAL buffer dirty writes

On Sat, Jul 28, 2012 at 3:33 PM, Jeff Janes <jeff.janes@gmail.com> wrote:

On Sat, Jul 7, 2012 at 9:17 PM, Satoshi Nagayasu <snaga@uptime.jp> wrote:

Hi,

Jeff Janes has pointed out that my previous patch could hold
a number of the dirty writes only in single local backend, and
it could not hold all over the cluster, because the counter
was allocated in the local process memory.

That's true, and I have fixed it with moving the counter into
the shared memory, as a member of XLogCtlWrite, to keep total
dirty writes in the cluster.

...

The comment "XLogCtrlWrite must be protected with WALWriteLock"
mis-spells XLogCtlWrite.

The final patch will need to add a sections to the documentation.

Thanks to Robert and Tom for addressing my concerns about the pointer
volatility.

I think there is enough consensus that this is useful without adding
more things to it, like histograms or high water marks.

However, I do think we will want to add a way to query for the time of
the last reset, as other monitoring features are going that way.

Is it OK that the count is reset upon a server restart?
pg_stat_bgwriter, for example, does not do that. Unfortunately I
think fixing this in an acceptable way will be harder than the entire
rest of the patch was.

The coding looks OK to me, it applies and builds, and passes make
check, and does what it says. I didn't do performance testing, as it
is hard to believe it would have a meaningful effect.

I'll marked it as waiting on author, for the documentation and reset
time. I'd ask a more senior hacker to comment on the durability over
restarts.

Cheers,

Jeff

#17Robert Haas
robertmhaas@gmail.com
In reply to: Jeff Janes (#16)
Re: New statistics for WAL buffer dirty writes

On Sat, Aug 11, 2012 at 6:11 PM, Jeff Janes <jeff.janes@gmail.com> wrote:

However, I do think we will want to add a way to query for the time of
the last reset, as other monitoring features are going that way.

That should be easy to add.

Is it OK that the count is reset upon a server restart?

I think it's OK. The reason why many of our stats are kept in the
stats file is because we have a limited amount of shared memory and
therefore can't guarantee (for example) that there's enough to keep
stats about EVERY table, since the number of tables is unlimited.
However, in cases where the data to be stored is fixed-size, and
especially when it's fixed-size and small, there's a lot of sense to
keeping the data in shared memory rather than sending stats collector
messages. It's a lot less overhead, for one thing. Maybe at some
point someone will want to devise a way to hibernate such stats to
disk at shutdown (or periodically) and reload them on startup, but it
doesn't seem like a must-have to me.

Other opinions may vary, of course.

I'll marked it as waiting on author, for the documentation and reset
time.

Yeah, we definitely need some documentation.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#18Satoshi Nagayasu
snaga@uptime.jp
In reply to: Jeff Janes (#16)
1 attachment(s)
Re: New statistics for WAL buffer dirty writes

Hi,

2012/08/12 7:11, Jeff Janes wrote:

On Sat, Jul 28, 2012 at 3:33 PM, Jeff Janes <jeff.janes@gmail.com> wrote:

On Sat, Jul 7, 2012 at 9:17 PM, Satoshi Nagayasu <snaga@uptime.jp> wrote:

Hi,

Jeff Janes has pointed out that my previous patch could hold
a number of the dirty writes only in single local backend, and
it could not hold all over the cluster, because the counter
was allocated in the local process memory.

That's true, and I have fixed it with moving the counter into
the shared memory, as a member of XLogCtlWrite, to keep total
dirty writes in the cluster.

...

The comment "XLogCtrlWrite must be protected with WALWriteLock"
mis-spells XLogCtlWrite.

The final patch will need to add a sections to the documentation.

Thanks to Robert and Tom for addressing my concerns about the pointer
volatility.

I think there is enough consensus that this is useful without adding
more things to it, like histograms or high water marks.

However, I do think we will want to add a way to query for the time of
the last reset, as other monitoring features are going that way.

Is it OK that the count is reset upon a server restart?
pg_stat_bgwriter, for example, does not do that. Unfortunately I
think fixing this in an acceptable way will be harder than the entire
rest of the patch was.

The coding looks OK to me, it applies and builds, and passes make
check, and does what it says. I didn't do performance testing, as it
is hard to believe it would have a meaningful effect.

I'll marked it as waiting on author, for the documentation and reset
time. I'd ask a more senior hacker to comment on the durability over
restarts.

I have rewritten the patch to deal with dirty write statistics
through pgstat collector as bgwriter does.
Yeah, it's a bit bigger rewrite.

With this patch, walwriter process and each backend process
would sum up dirty writes, and send it to the stat collector.
So, the value could be saved in the stat file, and could be
kept on restarting.

The statistics could be retreive with using
pg_stat_get_xlog_dirty_writes() function, and could be reset
with calling pg_stat_reset_shared('walwriter').

Now, I have one concern.

The reset time could be captured in globalStats.stat_reset_timestamp,
but this value is the same with the bgwriter one.

So, once pg_stat_reset_shared('walwriter') is called,
stats_reset column in pg_stat_bgwriter does represent
the reset time for walwriter, not for bgwriter.

How should we handle this? Should we split this value?
And should we have new system view for walwriter?

Of course, I will work on documentation next.

Regards,

Cheers,

Jeff

--
Satoshi Nagayasu <snaga@uptime.jp>
Uptime Technologies, LLC. http://www.uptime.jp

Attachments:

xlogdirtywrite_v3.difftext/plain; charset=Shift_JIS; name=xlogdirtywrite_v3.diffDownload
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index ff56c26..234d568 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -1518,6 +1518,7 @@ AdvanceXLInsertBuffer(bool new_segment)
 				WriteRqst.Write = OldPageRqstPtr;
 				WriteRqst.Flush = 0;
 				XLogWrite(WriteRqst, false, false);
+				WalWriterStats.m_xlog_dirty_writes++;
 				LWLockRelease(WALWriteLock);
 				TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
 			}
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 8389d5c..f031be1 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -125,6 +125,15 @@ char	   *pgstat_stat_tmpname = NULL;
  */
 PgStat_MsgBgWriter BgWriterStats;
 
+/*
+ * WalWriter global statistics counter.
+ * Despite its name, this counter is actually used not only in walwriter,
+ * but also in each backend process to sum up xlog dirty writes.
+ * Those processes would increment this counter in each XLogWrite call,
+ * then send it to the stat collector process.
+ */
+PgStat_MsgWalWriter WalWriterStats;
+
 /* ----------
  * Local data
  * ----------
@@ -279,6 +288,7 @@ static void pgstat_recv_autovac(PgStat_MsgAutovacStart *msg, int len);
 static void pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len);
 static void pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len);
 static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len);
+static void pgstat_recv_walwriter(PgStat_MsgWalWriter *msg, int len);
 static void pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len);
 static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
 static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len);
@@ -1188,6 +1198,8 @@ pgstat_reset_shared_counters(const char *target)
 
 	if (strcmp(target, "bgwriter") == 0)
 		msg.m_resettarget = RESET_BGWRITER;
+	else if (strcmp(target, "walwriter") == 0)
+		msg.m_resettarget = RESET_WALWRITER;
 	else
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@@ -2988,6 +3000,38 @@ pgstat_send_bgwriter(void)
 	MemSet(&BgWriterStats, 0, sizeof(BgWriterStats));
 }
 
+/* ----------
+ * pgstat_send_walwriter() -
+ *
+ *		Send walwriter statistics to the collector
+ * ----------
+ */
+void
+pgstat_send_walwriter(void)
+{
+	/* We assume this initializes to zeroes */
+	static const PgStat_MsgBgWriter all_zeroes;
+
+	/*
+	 * This function can be called even if nothing at all has happened. In
+	 * this case, avoid sending a completely empty message to the stats
+	 * collector.
+	 */
+	if (memcmp(&WalWriterStats, &all_zeroes, sizeof(PgStat_MsgWalWriter)) == 0)
+		return;
+
+	/*
+	 * Prepare and send the message
+	 */
+	pgstat_setheader(&WalWriterStats.m_hdr, PGSTAT_MTYPE_WALWRITER);
+	pgstat_send(&WalWriterStats, sizeof(WalWriterStats));
+
+	/*
+	 * Clear out the statistics buffer, so it can be re-used.
+	 */
+	MemSet(&WalWriterStats, 0, sizeof(WalWriterStats));
+}
+
 
 /* ----------
  * PgstatCollectorMain() -
@@ -3207,6 +3251,10 @@ PgstatCollectorMain(int argc, char *argv[])
 					pgstat_recv_bgwriter((PgStat_MsgBgWriter *) &msg, len);
 					break;
 
+				case PGSTAT_MTYPE_WALWRITER:
+					pgstat_recv_walwriter((PgStat_MsgWalWriter *) &msg, len);
+					break;
+
 				case PGSTAT_MTYPE_FUNCSTAT:
 					pgstat_recv_funcstat((PgStat_MsgFuncstat *) &msg, len);
 					break;
@@ -4382,6 +4430,12 @@ pgstat_recv_resetsharedcounter(PgStat_MsgResetsharedcounter *msg, int len)
 		memset(&globalStats, 0, sizeof(globalStats));
 		globalStats.stat_reset_timestamp = GetCurrentTimestamp();
 	}
+	else if (msg->m_resettarget == RESET_WALWRITER)
+	{
+		/* Reset the global walwriter statistics for the cluster. */
+		memset(&globalStats, 0, sizeof(globalStats));
+		globalStats.stat_reset_timestamp = GetCurrentTimestamp();
+	}
 
 	/*
 	 * Presumably the sender of this message validated the target, don't
@@ -4534,6 +4588,18 @@ pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len)
 }
 
 /* ----------
+ * pgstat_recv_walwriter() -
+ *
+ *	Process a WALWRITER message.
+ * ----------
+ */
+static void
+pgstat_recv_walwriter(PgStat_MsgWalWriter *msg, int len)
+{
+	globalStats.xlog_dirty_writes += msg->m_xlog_dirty_writes;
+}
+
+/* ----------
  * pgstat_recv_recoveryconflict() -
  *
  *	Process a RECOVERYCONFLICT message.
diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c
index 4313901..6fbc5f9 100644
--- a/src/backend/postmaster/walwriter.c
+++ b/src/backend/postmaster/walwriter.c
@@ -289,6 +289,8 @@ WalWriterMain(void)
 		else if (left_till_hibernate > 0)
 			left_till_hibernate--;
 
+		pgstat_send_walwriter();
+
 		/*
 		 * Sleep until we are signaled or WalWriterDelay has elapsed.  If we
 		 * haven't done anything useful for quite some time, lengthen the
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index f1248a8..1ed2c0a 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -3918,6 +3918,8 @@ PostgresMain(int argc, char *argv[], const char *username)
 				pgstat_report_activity(STATE_IDLE, NULL);
 			}
 
+			pgstat_send_walwriter();
+
 			ReadyForQuery(whereToSendOutput);
 			send_ready_for_query = false;
 		}
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 7d4059f..f707c9a 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -118,6 +118,8 @@ extern Datum pg_stat_reset_shared(PG_FUNCTION_ARGS);
 extern Datum pg_stat_reset_single_table_counters(PG_FUNCTION_ARGS);
 extern Datum pg_stat_reset_single_function_counters(PG_FUNCTION_ARGS);
 
+extern Datum pg_stat_get_xlog_dirty_writes(PG_FUNCTION_ARGS);
+
 /* Global bgwriter statistics, from bgwriter.c */
 extern PgStat_MsgBgWriter bgwriterStats;
 
@@ -1701,3 +1703,9 @@ pg_stat_reset_single_function_counters(PG_FUNCTION_ARGS)
 
 	PG_RETURN_VOID();
 }
+
+Datum
+pg_stat_get_xlog_dirty_writes(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_INT64(pgstat_fetch_global()->xlog_dirty_writes);
+}
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 77a3b41..da2225c 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -2740,6 +2740,9 @@ DESCR("statistics: reset collected statistics for a single table or index in the
 DATA(insert OID = 3777 (  pg_stat_reset_single_function_counters	PGNSP PGUID 12 1 0 0 0 f f f f f f v 1 0 2278 "26" _null_ _null_ _null_ _null_	pg_stat_reset_single_function_counters _null_ _null_ _null_ ));
 DESCR("statistics: reset collected statistics for a single function in the current database");
 
+DATA(insert OID = 3766 (  pg_stat_get_xlog_dirty_writes  PGNSP PGUID 12 1 0 0 0 f f f f f f v 0 0 20 "" _null_ _null_ _null_ _null_ pg_stat_get_xlog_dirty_writes _null_ _null_ _null_ ));
+DESCR("statistics: get xlog dirty buffer write statistics");
+
 DATA(insert OID = 3163 (  pg_trigger_depth				PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 23 "" _null_ _null_ _null_ _null_ pg_trigger_depth _null_ _null_ _null_ ));
 DESCR("current trigger depth");
 
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 613c1c2..59aa1ba 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -45,6 +45,7 @@ typedef enum StatMsgType
 	PGSTAT_MTYPE_VACUUM,
 	PGSTAT_MTYPE_ANALYZE,
 	PGSTAT_MTYPE_BGWRITER,
+	PGSTAT_MTYPE_WALWRITER,
 	PGSTAT_MTYPE_FUNCSTAT,
 	PGSTAT_MTYPE_FUNCPURGE,
 	PGSTAT_MTYPE_RECOVERYCONFLICT,
@@ -102,7 +103,8 @@ typedef struct PgStat_TableCounts
 /* Possible targets for resetting cluster-wide shared values */
 typedef enum PgStat_Shared_Reset_Target
 {
-	RESET_BGWRITER
+	RESET_BGWRITER,
+	RESET_WALWRITER
 } PgStat_Shared_Reset_Target;
 
 /* Possible object types for resetting single counters */
@@ -372,6 +374,17 @@ typedef struct PgStat_MsgBgWriter
 } PgStat_MsgBgWriter;
 
 /* ----------
+ * PgStat_MsgWalWriter			Sent by the walwriter to update statistics.
+ * ----------
+ */
+typedef struct PgStat_MsgWalWriter
+{
+	PgStat_MsgHdr m_hdr;
+
+	PgStat_Counter m_xlog_dirty_writes;
+} PgStat_MsgWalWriter;
+
+/* ----------
  * PgStat_MsgRecoveryConflict	Sent by the backend upon recovery conflict
  * ----------
  */
@@ -499,6 +512,7 @@ typedef union PgStat_Msg
 	PgStat_MsgVacuum msg_vacuum;
 	PgStat_MsgAnalyze msg_analyze;
 	PgStat_MsgBgWriter msg_bgwriter;
+	PgStat_MsgWalWriter msg_walwriter;
 	PgStat_MsgFuncstat msg_funcstat;
 	PgStat_MsgFuncpurge msg_funcpurge;
 	PgStat_MsgRecoveryConflict msg_recoveryconflict;
@@ -622,6 +636,7 @@ typedef struct PgStat_GlobalStats
 	PgStat_Counter buf_written_backend;
 	PgStat_Counter buf_fsync_backend;
 	PgStat_Counter buf_alloc;
+	PgStat_Counter xlog_dirty_writes;
 	TimestampTz stat_reset_timestamp;
 } PgStat_GlobalStats;
 
@@ -730,6 +745,8 @@ extern char *pgstat_stat_filename;
  */
 extern PgStat_MsgBgWriter BgWriterStats;
 
+extern PgStat_MsgWalWriter WalWriterStats;
+
 /*
  * Updated by pgstat_count_buffer_*_time macros
  */
@@ -858,6 +875,7 @@ extern void pgstat_twophase_postabort(TransactionId xid, uint16 info,
 						  void *recdata, uint32 len);
 
 extern void pgstat_send_bgwriter(void);
+extern void pgstat_send_walwriter(void);
 
 /* ----------
  * Support functions for the SQL-callable functions to
#19Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: Satoshi Nagayasu (#18)
Re: New statistics for WAL buffer dirty writes

Satoshi Nagayasu escribió:

With this patch, walwriter process and each backend process
would sum up dirty writes, and send it to the stat collector.
So, the value could be saved in the stat file, and could be
kept on restarting.

The statistics could be retreive with using
pg_stat_get_xlog_dirty_writes() function, and could be reset
with calling pg_stat_reset_shared('walwriter').

Now, I have one concern.

The reset time could be captured in globalStats.stat_reset_timestamp,
but this value is the same with the bgwriter one.

So, once pg_stat_reset_shared('walwriter') is called,
stats_reset column in pg_stat_bgwriter does represent
the reset time for walwriter, not for bgwriter.

How should we handle this? Should we split this value?
And should we have new system view for walwriter?

I think the answer to the two last questions is yes. It doesn't seem to
make sense, to me, to have a single reset timings for what are
effectively two separate things.

Please submit an updated patch to next CF. I'm marking this one
returned with feedback. Thanks.

--
Álvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services

#20Satoshi Nagayasu
snaga@uptime.jp
In reply to: Alvaro Herrera (#19)
1 attachment(s)
Re: New statistics for WAL buffer dirty writes

2012/10/24 1:12, Alvaro Herrera wrote:

Satoshi Nagayasu escribi�:

With this patch, walwriter process and each backend process
would sum up dirty writes, and send it to the stat collector.
So, the value could be saved in the stat file, and could be
kept on restarting.

The statistics could be retreive with using
pg_stat_get_xlog_dirty_writes() function, and could be reset
with calling pg_stat_reset_shared('walwriter').

Now, I have one concern.

The reset time could be captured in globalStats.stat_reset_timestamp,
but this value is the same with the bgwriter one.

So, once pg_stat_reset_shared('walwriter') is called,
stats_reset column in pg_stat_bgwriter does represent
the reset time for walwriter, not for bgwriter.

How should we handle this? Should we split this value?
And should we have new system view for walwriter?

I think the answer to the two last questions is yes. It doesn't seem to
make sense, to me, to have a single reset timings for what are
effectively two separate things.

Please submit an updated patch to next CF. I'm marking this one
returned with feedback. Thanks.

I attached the latest one, which splits the reset_time
for bgwriter and walwriter, and provides new system view,
called pg_stat_walwriter, to show the dirty write counter
and the reset time.

Regards,
--
Satoshi Nagayasu <snaga@uptime.jp>
Uptime Technologies, LLC. http://www.uptime.jp

Attachments:

xlogdirtywrite_v4.difftext/plain; charset=Shift_JIS; name=xlogdirtywrite_v4.diffDownload
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index b4fcbaf..0ae885b 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1907,6 +1907,13 @@ include 'filename'
         results in most cases.
        </para>
 
+       <para>
+        When you see pg_stat_walwriter.dirty_write, which means number
+        of buffer flushing at buffer full, is continuously increasing
+        in your running server, you may need to enlarge this buffer
+        size.
+       </para>
+
       </listitem>
      </varlistentry>
 
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 39ccfbb..3117f91 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -278,6 +278,14 @@ postgres: <replaceable>user</> <replaceable>database</> <replaceable>host</> <re
      </row>
 
      <row>
+      <entry><structname>pg_stat_walwriter</><indexterm><primary>pg_stat_walwriter</primary></indexterm></entry>
+      <entry>One row only, showing statistics about the wal writer
+       process's activity. See <xref linkend="pg-stat-walwriter-view">
+       for details.
+     </entry>
+     </row>
+
+     <row>
       <entry><structname>pg_stat_database</><indexterm><primary>pg_stat_database</primary></indexterm></entry>
       <entry>One row per database, showing database-wide statistics. See
        <xref linkend="pg-stat-database-view"> for details.
@@ -735,6 +743,39 @@ postgres: <replaceable>user</> <replaceable>database</> <replaceable>host</> <re
    single row, containing global data for the cluster.
   </para>
 
+  <table id="pg-stat-walwriter-view" xreflabel="pg_stat_walwriter">
+   <title><structname>pg_stat_walwriter</structname> View</title>
+
+   <tgroup cols="3">
+    <thead>
+    <row>
+      <entry>Column</entry>
+      <entry>Type</entry>
+      <entry>Description</entry>
+     </row>
+    </thead>
+
+    <tbody>
+     <row>
+      <entry><structfield>dirty_writes</></entry>
+      <entry><type>bigint</type></entry>
+      <entry>Number of dirty writes, which means flushing wal buffers
+       because of its full.</entry>
+     </row>
+     <row>
+      <entry><structfield>stats_reset</></entry>
+      <entry><type>timestamp with time zone</type></entry>
+      <entry>Time at which these statistics were last reset</entry>
+     </row>
+    </tbody>
+    </tgroup>
+  </table>
+
+  <para>
+   The <structname>pg_stat_walwriter</structname> view will always have a
+   single row, containing global data for the cluster.
+  </para>
+
   <table id="pg-stat-database-view" xreflabel="pg_stat_database">
    <title><structname>pg_stat_database</structname> View</title>
    <tgroup cols="3">
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index d251d08..631a0af 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -1347,6 +1347,7 @@ AdvanceXLInsertBuffer(bool new_segment)
 				WriteRqst.Write = OldPageRqstPtr;
 				WriteRqst.Flush = 0;
 				XLogWrite(WriteRqst, false, false);
+				WalWriterStats.m_xlog_dirty_writes++;
 				LWLockRelease(WALWriteLock);
 				TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
 			}
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 607a72f..40f0c34 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -671,6 +671,11 @@ CREATE VIEW pg_stat_bgwriter AS
         pg_stat_get_buf_alloc() AS buffers_alloc,
         pg_stat_get_bgwriter_stat_reset_time() AS stats_reset;
 
+CREATE VIEW pg_stat_walwriter AS
+    SELECT
+        pg_stat_get_xlog_dirty_writes() AS dirty_writes,
+        pg_stat_get_wal_stat_reset_time() AS stats_reset;
+
 CREATE VIEW pg_user_mappings AS
     SELECT
         U.oid       AS umid,
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index be3adf1..5be78c6 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -125,6 +125,15 @@ char	   *pgstat_stat_tmpname = NULL;
  */
 PgStat_MsgBgWriter BgWriterStats;
 
+/*
+ * WalWriter global statistics counter.
+ * Despite its name, this counter is actually used not only in walwriter,
+ * but also in each backend process to sum up xlog dirty writes.
+ * Those processes would increment this counter in each XLogWrite call,
+ * then send it to the stat collector process.
+ */
+PgStat_MsgWalWriter WalWriterStats;
+
 /* ----------
  * Local data
  * ----------
@@ -279,6 +288,7 @@ static void pgstat_recv_autovac(PgStat_MsgAutovacStart *msg, int len);
 static void pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len);
 static void pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len);
 static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len);
+static void pgstat_recv_walwriter(PgStat_MsgWalWriter *msg, int len);
 static void pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len);
 static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
 static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len);
@@ -762,6 +772,9 @@ pgstat_report_stat(bool force)
 
 	/* Now, send function statistics */
 	pgstat_send_funcstats();
+
+	/* Now, send wal buffer flush statistics */
+	pgstat_send_walwriter();
 }
 
 /*
@@ -1188,11 +1201,13 @@ pgstat_reset_shared_counters(const char *target)
 
 	if (strcmp(target, "bgwriter") == 0)
 		msg.m_resettarget = RESET_BGWRITER;
+	else if (strcmp(target, "walwriter") == 0)
+		msg.m_resettarget = RESET_WALWRITER;
 	else
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("unrecognized reset target: \"%s\"", target),
-				 errhint("Target must be \"bgwriter\".")));
+				 errhint("Target must be \"bgwriter\" or \"walwriter\".")));
 
 	pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RESETSHAREDCOUNTER);
 	pgstat_send(&msg, sizeof(msg));
@@ -2988,6 +3003,38 @@ pgstat_send_bgwriter(void)
 	MemSet(&BgWriterStats, 0, sizeof(BgWriterStats));
 }
 
+/* ----------
+ * pgstat_send_walwriter() -
+ *
+ *		Send walwriter statistics to the collector
+ * ----------
+ */
+void
+pgstat_send_walwriter(void)
+{
+	/* We assume this initializes to zeroes */
+	static const PgStat_MsgBgWriter all_zeroes;
+
+	/*
+	 * This function can be called even if nothing at all has happened. In
+	 * this case, avoid sending a completely empty message to the stats
+	 * collector.
+	 */
+	if (memcmp(&WalWriterStats, &all_zeroes, sizeof(PgStat_MsgWalWriter)) == 0)
+		return;
+
+	/*
+	 * Prepare and send the message
+	 */
+	pgstat_setheader(&WalWriterStats.m_hdr, PGSTAT_MTYPE_WALWRITER);
+	pgstat_send(&WalWriterStats, sizeof(WalWriterStats));
+
+	/*
+	 * Clear out the statistics buffer, so it can be re-used.
+	 */
+	MemSet(&WalWriterStats, 0, sizeof(WalWriterStats));
+}
+
 
 /* ----------
  * PgstatCollectorMain() -
@@ -3209,6 +3256,10 @@ PgstatCollectorMain(int argc, char *argv[])
 					pgstat_recv_bgwriter((PgStat_MsgBgWriter *) &msg, len);
 					break;
 
+				case PGSTAT_MTYPE_WALWRITER:
+					pgstat_recv_walwriter((PgStat_MsgWalWriter *) &msg, len);
+					break;
+
 				case PGSTAT_MTYPE_FUNCSTAT:
 					pgstat_recv_funcstat((PgStat_MsgFuncstat *) &msg, len);
 					break;
@@ -3638,7 +3689,8 @@ pgstat_read_statsfile(Oid onlydb, bool permanent)
 	 * Set the current timestamp (will be kept only in case we can't load an
 	 * existing statsfile).
 	 */
-	globalStats.stat_reset_timestamp = GetCurrentTimestamp();
+	globalStats.stat_bgw_reset_timestamp = GetCurrentTimestamp();
+	globalStats.stat_wal_reset_timestamp = GetCurrentTimestamp();
 
 	/*
 	 * Try to open the status file. If it doesn't exist, the backends simply
@@ -4381,8 +4433,23 @@ pgstat_recv_resetsharedcounter(PgStat_MsgResetsharedcounter *msg, int len)
 	if (msg->m_resettarget == RESET_BGWRITER)
 	{
 		/* Reset the global background writer statistics for the cluster. */
-		memset(&globalStats, 0, sizeof(globalStats));
-		globalStats.stat_reset_timestamp = GetCurrentTimestamp();
+		globalStats.timed_checkpoints       = 0;
+		globalStats.requested_checkpoints   = 0;
+		globalStats.checkpoint_write_time   = 0;
+		globalStats.checkpoint_sync_time    = 0;
+		globalStats.buf_written_checkpoints = 0;
+		globalStats.buf_written_clean       = 0;
+		globalStats.maxwritten_clean        = 0;
+		globalStats.buf_written_backend     = 0;
+		globalStats.buf_fsync_backend       = 0;
+		globalStats.buf_alloc               = 0;
+		globalStats.stat_bgw_reset_timestamp = GetCurrentTimestamp();
+	}
+	else if (msg->m_resettarget == RESET_WALWRITER)
+	{
+		/* Reset the global walwriter statistics for the cluster. */
+		globalStats.xlog_dirty_writes = 0;
+		globalStats.stat_wal_reset_timestamp = GetCurrentTimestamp();
 	}
 
 	/*
@@ -4536,6 +4603,18 @@ pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len)
 }
 
 /* ----------
+ * pgstat_recv_walwriter() -
+ *
+ *	Process a WALWRITER message.
+ * ----------
+ */
+static void
+pgstat_recv_walwriter(PgStat_MsgWalWriter *msg, int len)
+{
+	globalStats.xlog_dirty_writes += msg->m_xlog_dirty_writes;
+}
+
+/* ----------
  * pgstat_recv_recoveryconflict() -
  *
  *	Process a RECOVERYCONFLICT message.
diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c
index c3e15ef..cd294fb 100644
--- a/src/backend/postmaster/walwriter.c
+++ b/src/backend/postmaster/walwriter.c
@@ -290,6 +290,8 @@ WalWriterMain(void)
 		else if (left_till_hibernate > 0)
 			left_till_hibernate--;
 
+		pgstat_send_walwriter();
+
 		/*
 		 * Sleep until we are signaled or WalWriterDelay has elapsed.  If we
 		 * haven't done anything useful for quite some time, lengthen the
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 7d4059f..fc25dda 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -97,6 +97,7 @@ extern Datum pg_stat_get_bgwriter_stat_reset_time(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_buf_written_backend(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_buf_fsync_backend(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_buf_alloc(PG_FUNCTION_ARGS);
+extern Datum pg_stat_get_wal_stat_reset_time(PG_FUNCTION_ARGS);
 
 extern Datum pg_stat_get_xact_numscans(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_xact_tuples_returned(PG_FUNCTION_ARGS);
@@ -118,6 +119,8 @@ extern Datum pg_stat_reset_shared(PG_FUNCTION_ARGS);
 extern Datum pg_stat_reset_single_table_counters(PG_FUNCTION_ARGS);
 extern Datum pg_stat_reset_single_function_counters(PG_FUNCTION_ARGS);
 
+extern Datum pg_stat_get_xlog_dirty_writes(PG_FUNCTION_ARGS);
+
 /* Global bgwriter statistics, from bgwriter.c */
 extern PgStat_MsgBgWriter bgwriterStats;
 
@@ -1443,7 +1446,7 @@ pg_stat_get_checkpoint_sync_time(PG_FUNCTION_ARGS)
 Datum
 pg_stat_get_bgwriter_stat_reset_time(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_TIMESTAMPTZ(pgstat_fetch_global()->stat_reset_timestamp);
+	PG_RETURN_TIMESTAMPTZ(pgstat_fetch_global()->stat_bgw_reset_timestamp);
 }
 
 Datum
@@ -1465,6 +1468,12 @@ pg_stat_get_buf_alloc(PG_FUNCTION_ARGS)
 }
 
 Datum
+pg_stat_get_wal_stat_reset_time(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_TIMESTAMPTZ(pgstat_fetch_global()->stat_wal_reset_timestamp);
+}
+
+Datum
 pg_stat_get_xact_numscans(PG_FUNCTION_ARGS)
 {
 	Oid			relid = PG_GETARG_OID(0);
@@ -1701,3 +1710,9 @@ pg_stat_reset_single_function_counters(PG_FUNCTION_ARGS)
 
 	PG_RETURN_VOID();
 }
+
+Datum
+pg_stat_get_xlog_dirty_writes(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_INT64(pgstat_fetch_global()->xlog_dirty_writes);
+}
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index f935eb1..d6edb5a 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -2702,6 +2702,8 @@ DATA(insert OID = 3063 ( pg_stat_get_buf_fsync_backend PGNSP PGUID 12 1 0 0 0 f
 DESCR("statistics: number of backend buffer writes that did their own fsync");
 DATA(insert OID = 2859 ( pg_stat_get_buf_alloc			PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 20 "" _null_ _null_ _null_ _null_ pg_stat_get_buf_alloc _null_ _null_ _null_ ));
 DESCR("statistics: number of buffer allocations");
+DATA(insert OID = 2860 ( pg_stat_get_wal_stat_reset_time PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 1184 "" _null_ _null_ _null_ _null_	pg_stat_get_wal_stat_reset_time _null_ _null_ _null_ ));
+DESCR("statistics: last reset for the wal");
 
 DATA(insert OID = 2978 (  pg_stat_get_function_calls		PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_function_calls _null_ _null_ _null_ ));
 DESCR("statistics: number of function calls");
@@ -2746,6 +2748,9 @@ DESCR("statistics: reset collected statistics for a single table or index in the
 DATA(insert OID = 3777 (  pg_stat_reset_single_function_counters	PGNSP PGUID 12 1 0 0 0 f f f f f f v 1 0 2278 "26" _null_ _null_ _null_ _null_	pg_stat_reset_single_function_counters _null_ _null_ _null_ ));
 DESCR("statistics: reset collected statistics for a single function in the current database");
 
+DATA(insert OID = 3766 (  pg_stat_get_xlog_dirty_writes  PGNSP PGUID 12 1 0 0 0 f f f f f f v 0 0 20 "" _null_ _null_ _null_ _null_ pg_stat_get_xlog_dirty_writes _null_ _null_ _null_ ));
+DESCR("statistics: get xlog dirty buffer write statistics");
+
 DATA(insert OID = 3163 (  pg_trigger_depth				PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 23 "" _null_ _null_ _null_ _null_ pg_trigger_depth _null_ _null_ _null_ ));
 DESCR("current trigger depth");
 
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 613c1c2..f71c538 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -45,6 +45,7 @@ typedef enum StatMsgType
 	PGSTAT_MTYPE_VACUUM,
 	PGSTAT_MTYPE_ANALYZE,
 	PGSTAT_MTYPE_BGWRITER,
+	PGSTAT_MTYPE_WALWRITER,
 	PGSTAT_MTYPE_FUNCSTAT,
 	PGSTAT_MTYPE_FUNCPURGE,
 	PGSTAT_MTYPE_RECOVERYCONFLICT,
@@ -102,7 +103,8 @@ typedef struct PgStat_TableCounts
 /* Possible targets for resetting cluster-wide shared values */
 typedef enum PgStat_Shared_Reset_Target
 {
-	RESET_BGWRITER
+	RESET_BGWRITER,
+	RESET_WALWRITER
 } PgStat_Shared_Reset_Target;
 
 /* Possible object types for resetting single counters */
@@ -372,6 +374,17 @@ typedef struct PgStat_MsgBgWriter
 } PgStat_MsgBgWriter;
 
 /* ----------
+ * PgStat_MsgWalWriter			Sent by the walwriter to update statistics.
+ * ----------
+ */
+typedef struct PgStat_MsgWalWriter
+{
+	PgStat_MsgHdr m_hdr;
+
+	PgStat_Counter m_xlog_dirty_writes;
+} PgStat_MsgWalWriter;
+
+/* ----------
  * PgStat_MsgRecoveryConflict	Sent by the backend upon recovery conflict
  * ----------
  */
@@ -499,6 +512,7 @@ typedef union PgStat_Msg
 	PgStat_MsgVacuum msg_vacuum;
 	PgStat_MsgAnalyze msg_analyze;
 	PgStat_MsgBgWriter msg_bgwriter;
+	PgStat_MsgWalWriter msg_walwriter;
 	PgStat_MsgFuncstat msg_funcstat;
 	PgStat_MsgFuncpurge msg_funcpurge;
 	PgStat_MsgRecoveryConflict msg_recoveryconflict;
@@ -622,7 +636,10 @@ typedef struct PgStat_GlobalStats
 	PgStat_Counter buf_written_backend;
 	PgStat_Counter buf_fsync_backend;
 	PgStat_Counter buf_alloc;
-	TimestampTz stat_reset_timestamp;
+	TimestampTz stat_bgw_reset_timestamp;
+
+	PgStat_Counter xlog_dirty_writes;
+	TimestampTz stat_wal_reset_timestamp;
 } PgStat_GlobalStats;
 
 
@@ -730,6 +747,8 @@ extern char *pgstat_stat_filename;
  */
 extern PgStat_MsgBgWriter BgWriterStats;
 
+extern PgStat_MsgWalWriter WalWriterStats;
+
 /*
  * Updated by pgstat_count_buffer_*_time macros
  */
@@ -858,6 +877,7 @@ extern void pgstat_twophase_postabort(TransactionId xid, uint16 info,
 						  void *recdata, uint32 len);
 
 extern void pgstat_send_bgwriter(void);
+extern void pgstat_send_walwriter(void);
 
 /* ----------
  * Support functions for the SQL-callable functions to
#21Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: Satoshi Nagayasu (#20)
Re: New statistics for WAL buffer dirty writes

Satoshi Nagayasu escribió:

I attached the latest one, which splits the reset_time
for bgwriter and walwriter, and provides new system view,
called pg_stat_walwriter, to show the dirty write counter
and the reset time.

Thanks. I gave this a look and I have a couple of comments:

1. The counter is called dirty_write. I imagine that this comes
directly from the name of the nearby DTrace probe,
TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_START. That probe comes from
email 494C1565.3060105@sun.com committed in 4ee79fd20d9a. But there
wasn't much discussion about the name back then. Maybe that was fine at
the time because it was pretty much an internal matter, being so deep in
the code. But we're now going to expose it to regular users, so we'd
better choose a very good name because we're going to be stuck with it
for a very long time. And the name "dirty" doesn't ring with me too
well; what matters here is not that we're writing a buffer that is
dirty, but the fact that we're writing while holding the WalInsertLock,
so the name should convey the fact that this is a "locked" write or
something like that. Or at least that's how I see the issue. Note the
documentation wording:

+      <entry><structfield>dirty_writes</></entry>
+      <entry><type>bigint</type></entry>
+      <entry>Number of dirty writes, which means flushing wal buffers
+       because of its full.</entry>

2. Should we put bgwriter and walwriter data in separate structs? I
don't think this is necessary, but maybe someone opines differently?

3. 
+/*
+ * WalWriter global statistics counter.
+ * Despite its name, this counter is actually used not only in walwriter,
+ * but also in each backend process to sum up xlog dirty writes.
+ * Those processes would increment this counter in each XLogWrite call,
+ * then send it to the stat collector process.
+ */
+PgStat_MsgWalWriter WalWriterStats;

Maybe we should use a different name for the struct, to avoid having to
excuse ourselves for the name being wrong ...

--
Álvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#22Tomas Vondra
tv@fuzzy.cz
In reply to: Satoshi Nagayasu (#20)
1 attachment(s)
Re: New statistics for WAL buffer dirty writes

On 29.10.2012 04:58, Satoshi Nagayasu wrote:

2012/10/24 1:12, Alvaro Herrera wrote:

Satoshi Nagayasu escribi�:

With this patch, walwriter process and each backend process
would sum up dirty writes, and send it to the stat collector.
So, the value could be saved in the stat file, and could be
kept on restarting.

The statistics could be retreive with using
pg_stat_get_xlog_dirty_writes() function, and could be reset
with calling pg_stat_reset_shared('walwriter').

Now, I have one concern.

The reset time could be captured in globalStats.stat_reset_timestamp,
but this value is the same with the bgwriter one.

So, once pg_stat_reset_shared('walwriter') is called,
stats_reset column in pg_stat_bgwriter does represent
the reset time for walwriter, not for bgwriter.

How should we handle this? Should we split this value?
And should we have new system view for walwriter?

I think the answer to the two last questions is yes. It doesn't seem to
make sense, to me, to have a single reset timings for what are
effectively two separate things.

Please submit an updated patch to next CF. I'm marking this one
returned with feedback. Thanks.

I attached the latest one, which splits the reset_time
for bgwriter and walwriter, and provides new system view,
called pg_stat_walwriter, to show the dirty write counter
and the reset time.

I've done a quick review of the v4 patch:

1) applies fine on HEAD, compiles fine

2) "make installcheck" fails because of a difference in the 'rules'
test suite (there's a new view "pg_stat_walwriter" - see the
attached patch for a fixed version or expected/rules.out)

3) I do agree with Alvaro that using the same struct for two separate
components (bgwriter and walwriter) seems a bit awkward. For example
you need to have two separate stat_reset fields, the reset code
becomes much more verbose (because you need to list individual
fields) etc.

So I'd vote to either split this into two structures or keeping it
as a single structure (although with two views on top of it).

4) Are there any other fields that might be interesting? Right now
there's just "dirty_writes" but I guess there are other values. E.g.
how much data was actually written etc.?

Tomas

Attachments:

xlogdirtywrite_v5.difftext/plain; charset=UTF-8; name=xlogdirtywrite_v5.diffDownload
diff --git a/contrib/pgbench/pgbench.c b/contrib/pgbench/pgbench.c
index e376452..334ce4c 100644
--- a/contrib/pgbench/pgbench.c
+++ b/contrib/pgbench/pgbench.c
@@ -135,6 +135,11 @@ int			unlogged_tables = 0;
 double		sample_rate = 0.0;
 
 /*
+ * logging steps (seconds between log messages)
+ */
+int			log_step_seconds = 5;
+
+/*
  * tablespace selection
  */
 char	   *tablespace = NULL;
@@ -1362,6 +1367,11 @@ init(bool is_no_vacuum)
 	char		sql[256];
 	int			i;
 
+	/* used to track elapsed time and estimate of the remaining time */
+	instr_time	start, diff;
+	double		elapsed_sec, remaining_sec;
+	int			log_interval = 1;
+
 	if ((con = doConnect()) == NULL)
 		exit(1);
 
@@ -1430,6 +1440,8 @@ init(bool is_no_vacuum)
 	}
 	PQclear(res);
 
+	INSTR_TIME_SET_CURRENT(start);
+
 	for (i = 0; i < naccounts * scale; i++)
 	{
 		int			j = i + 1;
@@ -1441,10 +1453,27 @@ init(bool is_no_vacuum)
 			exit(1);
 		}
 
-		if (j % 100000 == 0)
-			fprintf(stderr, "%d of %d tuples (%d%%) done.\n",
-					j, naccounts * scale,
-					(int) (((int64) j * 100) / (naccounts * scale)));
+		/* let's not call the timing for each row, but only each 100 rows */
+		if (j % 100 == 0 || j == scale * naccounts)
+		{
+			INSTR_TIME_SET_CURRENT(diff);
+			INSTR_TIME_SUBTRACT(diff, start);
+
+			elapsed_sec = INSTR_TIME_GET_DOUBLE(diff);
+			remaining_sec = (scale * naccounts - j) * elapsed_sec / j;
+
+			/* have we reached the next interval? */
+			if (elapsed_sec >= log_interval * log_step_seconds) {
+
+				fprintf(stderr, "%d of %d tuples (%d%%) done (elapsed %.2f s, remaining %.2f s).\n",
+						j, naccounts * scale,
+						(int) (((int64) j * 100) / (naccounts * scale)), elapsed_sec, remaining_sec);
+
+				/* skip to the next interval */
+				log_interval = (int)ceil(elapsed_sec/log_step_seconds);
+			}
+		}
+
 	}
 	if (PQputline(con, "\\.\n"))
 	{
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index b7df8ce..bf9acc5 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1919,6 +1919,13 @@ include 'filename'
         results in most cases.
        </para>
 
+       <para>
+        When you see pg_stat_walwriter.dirty_write, which means number
+        of buffer flushing at buffer full, is continuously increasing
+        in your running server, you may need to enlarge this buffer
+        size.
+       </para>
+
       </listitem>
      </varlistentry>
 
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 39ccfbb..3117f91 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -278,6 +278,14 @@ postgres: <replaceable>user</> <replaceable>database</> <replaceable>host</> <re
      </row>
 
      <row>
+      <entry><structname>pg_stat_walwriter</><indexterm><primary>pg_stat_walwriter</primary></indexterm></entry>
+      <entry>One row only, showing statistics about the wal writer
+       process's activity. See <xref linkend="pg-stat-walwriter-view">
+       for details.
+     </entry>
+     </row>
+
+     <row>
       <entry><structname>pg_stat_database</><indexterm><primary>pg_stat_database</primary></indexterm></entry>
       <entry>One row per database, showing database-wide statistics. See
        <xref linkend="pg-stat-database-view"> for details.
@@ -735,6 +743,39 @@ postgres: <replaceable>user</> <replaceable>database</> <replaceable>host</> <re
    single row, containing global data for the cluster.
   </para>
 
+  <table id="pg-stat-walwriter-view" xreflabel="pg_stat_walwriter">
+   <title><structname>pg_stat_walwriter</structname> View</title>
+
+   <tgroup cols="3">
+    <thead>
+    <row>
+      <entry>Column</entry>
+      <entry>Type</entry>
+      <entry>Description</entry>
+     </row>
+    </thead>
+
+    <tbody>
+     <row>
+      <entry><structfield>dirty_writes</></entry>
+      <entry><type>bigint</type></entry>
+      <entry>Number of dirty writes, which means flushing wal buffers
+       because of its full.</entry>
+     </row>
+     <row>
+      <entry><structfield>stats_reset</></entry>
+      <entry><type>timestamp with time zone</type></entry>
+      <entry>Time at which these statistics were last reset</entry>
+     </row>
+    </tbody>
+    </tgroup>
+  </table>
+
+  <para>
+   The <structname>pg_stat_walwriter</structname> view will always have a
+   single row, containing global data for the cluster.
+  </para>
+
   <table id="pg-stat-database-view" xreflabel="pg_stat_database">
    <title><structname>pg_stat_database</structname> View</title>
    <tgroup cols="3">
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 2618c8d..0bf92fa 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -1338,6 +1338,7 @@ AdvanceXLInsertBuffer(bool new_segment)
 				WriteRqst.Write = OldPageRqstPtr;
 				WriteRqst.Flush = 0;
 				XLogWrite(WriteRqst, false, false);
+				WalWriterStats.m_xlog_dirty_writes++;
 				LWLockRelease(WALWriteLock);
 				TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
 			}
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 607a72f..40f0c34 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -671,6 +671,11 @@ CREATE VIEW pg_stat_bgwriter AS
         pg_stat_get_buf_alloc() AS buffers_alloc,
         pg_stat_get_bgwriter_stat_reset_time() AS stats_reset;
 
+CREATE VIEW pg_stat_walwriter AS
+    SELECT
+        pg_stat_get_xlog_dirty_writes() AS dirty_writes,
+        pg_stat_get_wal_stat_reset_time() AS stats_reset;
+
 CREATE VIEW pg_user_mappings AS
     SELECT
         U.oid       AS umid,
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index be3adf1..5be78c6 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -125,6 +125,15 @@ char	   *pgstat_stat_tmpname = NULL;
  */
 PgStat_MsgBgWriter BgWriterStats;
 
+/*
+ * WalWriter global statistics counter.
+ * Despite its name, this counter is actually used not only in walwriter,
+ * but also in each backend process to sum up xlog dirty writes.
+ * Those processes would increment this counter in each XLogWrite call,
+ * then send it to the stat collector process.
+ */
+PgStat_MsgWalWriter WalWriterStats;
+
 /* ----------
  * Local data
  * ----------
@@ -279,6 +288,7 @@ static void pgstat_recv_autovac(PgStat_MsgAutovacStart *msg, int len);
 static void pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len);
 static void pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len);
 static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len);
+static void pgstat_recv_walwriter(PgStat_MsgWalWriter *msg, int len);
 static void pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len);
 static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
 static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len);
@@ -762,6 +772,9 @@ pgstat_report_stat(bool force)
 
 	/* Now, send function statistics */
 	pgstat_send_funcstats();
+
+	/* Now, send wal buffer flush statistics */
+	pgstat_send_walwriter();
 }
 
 /*
@@ -1188,11 +1201,13 @@ pgstat_reset_shared_counters(const char *target)
 
 	if (strcmp(target, "bgwriter") == 0)
 		msg.m_resettarget = RESET_BGWRITER;
+	else if (strcmp(target, "walwriter") == 0)
+		msg.m_resettarget = RESET_WALWRITER;
 	else
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("unrecognized reset target: \"%s\"", target),
-				 errhint("Target must be \"bgwriter\".")));
+				 errhint("Target must be \"bgwriter\" or \"walwriter\".")));
 
 	pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RESETSHAREDCOUNTER);
 	pgstat_send(&msg, sizeof(msg));
@@ -2988,6 +3003,38 @@ pgstat_send_bgwriter(void)
 	MemSet(&BgWriterStats, 0, sizeof(BgWriterStats));
 }
 
+/* ----------
+ * pgstat_send_walwriter() -
+ *
+ *		Send walwriter statistics to the collector
+ * ----------
+ */
+void
+pgstat_send_walwriter(void)
+{
+	/* We assume this initializes to zeroes */
+	static const PgStat_MsgBgWriter all_zeroes;
+
+	/*
+	 * This function can be called even if nothing at all has happened. In
+	 * this case, avoid sending a completely empty message to the stats
+	 * collector.
+	 */
+	if (memcmp(&WalWriterStats, &all_zeroes, sizeof(PgStat_MsgWalWriter)) == 0)
+		return;
+
+	/*
+	 * Prepare and send the message
+	 */
+	pgstat_setheader(&WalWriterStats.m_hdr, PGSTAT_MTYPE_WALWRITER);
+	pgstat_send(&WalWriterStats, sizeof(WalWriterStats));
+
+	/*
+	 * Clear out the statistics buffer, so it can be re-used.
+	 */
+	MemSet(&WalWriterStats, 0, sizeof(WalWriterStats));
+}
+
 
 /* ----------
  * PgstatCollectorMain() -
@@ -3209,6 +3256,10 @@ PgstatCollectorMain(int argc, char *argv[])
 					pgstat_recv_bgwriter((PgStat_MsgBgWriter *) &msg, len);
 					break;
 
+				case PGSTAT_MTYPE_WALWRITER:
+					pgstat_recv_walwriter((PgStat_MsgWalWriter *) &msg, len);
+					break;
+
 				case PGSTAT_MTYPE_FUNCSTAT:
 					pgstat_recv_funcstat((PgStat_MsgFuncstat *) &msg, len);
 					break;
@@ -3638,7 +3689,8 @@ pgstat_read_statsfile(Oid onlydb, bool permanent)
 	 * Set the current timestamp (will be kept only in case we can't load an
 	 * existing statsfile).
 	 */
-	globalStats.stat_reset_timestamp = GetCurrentTimestamp();
+	globalStats.stat_bgw_reset_timestamp = GetCurrentTimestamp();
+	globalStats.stat_wal_reset_timestamp = GetCurrentTimestamp();
 
 	/*
 	 * Try to open the status file. If it doesn't exist, the backends simply
@@ -4381,8 +4433,23 @@ pgstat_recv_resetsharedcounter(PgStat_MsgResetsharedcounter *msg, int len)
 	if (msg->m_resettarget == RESET_BGWRITER)
 	{
 		/* Reset the global background writer statistics for the cluster. */
-		memset(&globalStats, 0, sizeof(globalStats));
-		globalStats.stat_reset_timestamp = GetCurrentTimestamp();
+		globalStats.timed_checkpoints       = 0;
+		globalStats.requested_checkpoints   = 0;
+		globalStats.checkpoint_write_time   = 0;
+		globalStats.checkpoint_sync_time    = 0;
+		globalStats.buf_written_checkpoints = 0;
+		globalStats.buf_written_clean       = 0;
+		globalStats.maxwritten_clean        = 0;
+		globalStats.buf_written_backend     = 0;
+		globalStats.buf_fsync_backend       = 0;
+		globalStats.buf_alloc               = 0;
+		globalStats.stat_bgw_reset_timestamp = GetCurrentTimestamp();
+	}
+	else if (msg->m_resettarget == RESET_WALWRITER)
+	{
+		/* Reset the global walwriter statistics for the cluster. */
+		globalStats.xlog_dirty_writes = 0;
+		globalStats.stat_wal_reset_timestamp = GetCurrentTimestamp();
 	}
 
 	/*
@@ -4536,6 +4603,18 @@ pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len)
 }
 
 /* ----------
+ * pgstat_recv_walwriter() -
+ *
+ *	Process a WALWRITER message.
+ * ----------
+ */
+static void
+pgstat_recv_walwriter(PgStat_MsgWalWriter *msg, int len)
+{
+	globalStats.xlog_dirty_writes += msg->m_xlog_dirty_writes;
+}
+
+/* ----------
  * pgstat_recv_recoveryconflict() -
  *
  *	Process a RECOVERYCONFLICT message.
diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c
index c3e15ef..cd294fb 100644
--- a/src/backend/postmaster/walwriter.c
+++ b/src/backend/postmaster/walwriter.c
@@ -290,6 +290,8 @@ WalWriterMain(void)
 		else if (left_till_hibernate > 0)
 			left_till_hibernate--;
 
+		pgstat_send_walwriter();
+
 		/*
 		 * Sleep until we are signaled or WalWriterDelay has elapsed.  If we
 		 * haven't done anything useful for quite some time, lengthen the
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 7d4059f..fc25dda 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -97,6 +97,7 @@ extern Datum pg_stat_get_bgwriter_stat_reset_time(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_buf_written_backend(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_buf_fsync_backend(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_buf_alloc(PG_FUNCTION_ARGS);
+extern Datum pg_stat_get_wal_stat_reset_time(PG_FUNCTION_ARGS);
 
 extern Datum pg_stat_get_xact_numscans(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_xact_tuples_returned(PG_FUNCTION_ARGS);
@@ -118,6 +119,8 @@ extern Datum pg_stat_reset_shared(PG_FUNCTION_ARGS);
 extern Datum pg_stat_reset_single_table_counters(PG_FUNCTION_ARGS);
 extern Datum pg_stat_reset_single_function_counters(PG_FUNCTION_ARGS);
 
+extern Datum pg_stat_get_xlog_dirty_writes(PG_FUNCTION_ARGS);
+
 /* Global bgwriter statistics, from bgwriter.c */
 extern PgStat_MsgBgWriter bgwriterStats;
 
@@ -1443,7 +1446,7 @@ pg_stat_get_checkpoint_sync_time(PG_FUNCTION_ARGS)
 Datum
 pg_stat_get_bgwriter_stat_reset_time(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_TIMESTAMPTZ(pgstat_fetch_global()->stat_reset_timestamp);
+	PG_RETURN_TIMESTAMPTZ(pgstat_fetch_global()->stat_bgw_reset_timestamp);
 }
 
 Datum
@@ -1465,6 +1468,12 @@ pg_stat_get_buf_alloc(PG_FUNCTION_ARGS)
 }
 
 Datum
+pg_stat_get_wal_stat_reset_time(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_TIMESTAMPTZ(pgstat_fetch_global()->stat_wal_reset_timestamp);
+}
+
+Datum
 pg_stat_get_xact_numscans(PG_FUNCTION_ARGS)
 {
 	Oid			relid = PG_GETARG_OID(0);
@@ -1701,3 +1710,9 @@ pg_stat_reset_single_function_counters(PG_FUNCTION_ARGS)
 
 	PG_RETURN_VOID();
 }
+
+Datum
+pg_stat_get_xlog_dirty_writes(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_INT64(pgstat_fetch_global()->xlog_dirty_writes);
+}
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index f935eb1..d6edb5a 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -2702,6 +2702,8 @@ DATA(insert OID = 3063 ( pg_stat_get_buf_fsync_backend PGNSP PGUID 12 1 0 0 0 f
 DESCR("statistics: number of backend buffer writes that did their own fsync");
 DATA(insert OID = 2859 ( pg_stat_get_buf_alloc			PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 20 "" _null_ _null_ _null_ _null_ pg_stat_get_buf_alloc _null_ _null_ _null_ ));
 DESCR("statistics: number of buffer allocations");
+DATA(insert OID = 2860 ( pg_stat_get_wal_stat_reset_time PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 1184 "" _null_ _null_ _null_ _null_	pg_stat_get_wal_stat_reset_time _null_ _null_ _null_ ));
+DESCR("statistics: last reset for the wal");
 
 DATA(insert OID = 2978 (  pg_stat_get_function_calls		PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_function_calls _null_ _null_ _null_ ));
 DESCR("statistics: number of function calls");
@@ -2746,6 +2748,9 @@ DESCR("statistics: reset collected statistics for a single table or index in the
 DATA(insert OID = 3777 (  pg_stat_reset_single_function_counters	PGNSP PGUID 12 1 0 0 0 f f f f f f v 1 0 2278 "26" _null_ _null_ _null_ _null_	pg_stat_reset_single_function_counters _null_ _null_ _null_ ));
 DESCR("statistics: reset collected statistics for a single function in the current database");
 
+DATA(insert OID = 3766 (  pg_stat_get_xlog_dirty_writes  PGNSP PGUID 12 1 0 0 0 f f f f f f v 0 0 20 "" _null_ _null_ _null_ _null_ pg_stat_get_xlog_dirty_writes _null_ _null_ _null_ ));
+DESCR("statistics: get xlog dirty buffer write statistics");
+
 DATA(insert OID = 3163 (  pg_trigger_depth				PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 23 "" _null_ _null_ _null_ _null_ pg_trigger_depth _null_ _null_ _null_ ));
 DESCR("current trigger depth");
 
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 613c1c2..f71c538 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -45,6 +45,7 @@ typedef enum StatMsgType
 	PGSTAT_MTYPE_VACUUM,
 	PGSTAT_MTYPE_ANALYZE,
 	PGSTAT_MTYPE_BGWRITER,
+	PGSTAT_MTYPE_WALWRITER,
 	PGSTAT_MTYPE_FUNCSTAT,
 	PGSTAT_MTYPE_FUNCPURGE,
 	PGSTAT_MTYPE_RECOVERYCONFLICT,
@@ -102,7 +103,8 @@ typedef struct PgStat_TableCounts
 /* Possible targets for resetting cluster-wide shared values */
 typedef enum PgStat_Shared_Reset_Target
 {
-	RESET_BGWRITER
+	RESET_BGWRITER,
+	RESET_WALWRITER
 } PgStat_Shared_Reset_Target;
 
 /* Possible object types for resetting single counters */
@@ -372,6 +374,17 @@ typedef struct PgStat_MsgBgWriter
 } PgStat_MsgBgWriter;
 
 /* ----------
+ * PgStat_MsgWalWriter			Sent by the walwriter to update statistics.
+ * ----------
+ */
+typedef struct PgStat_MsgWalWriter
+{
+	PgStat_MsgHdr m_hdr;
+
+	PgStat_Counter m_xlog_dirty_writes;
+} PgStat_MsgWalWriter;
+
+/* ----------
  * PgStat_MsgRecoveryConflict	Sent by the backend upon recovery conflict
  * ----------
  */
@@ -499,6 +512,7 @@ typedef union PgStat_Msg
 	PgStat_MsgVacuum msg_vacuum;
 	PgStat_MsgAnalyze msg_analyze;
 	PgStat_MsgBgWriter msg_bgwriter;
+	PgStat_MsgWalWriter msg_walwriter;
 	PgStat_MsgFuncstat msg_funcstat;
 	PgStat_MsgFuncpurge msg_funcpurge;
 	PgStat_MsgRecoveryConflict msg_recoveryconflict;
@@ -622,7 +636,10 @@ typedef struct PgStat_GlobalStats
 	PgStat_Counter buf_written_backend;
 	PgStat_Counter buf_fsync_backend;
 	PgStat_Counter buf_alloc;
-	TimestampTz stat_reset_timestamp;
+	TimestampTz stat_bgw_reset_timestamp;
+
+	PgStat_Counter xlog_dirty_writes;
+	TimestampTz stat_wal_reset_timestamp;
 } PgStat_GlobalStats;
 
 
@@ -730,6 +747,8 @@ extern char *pgstat_stat_filename;
  */
 extern PgStat_MsgBgWriter BgWriterStats;
 
+extern PgStat_MsgWalWriter WalWriterStats;
+
 /*
  * Updated by pgstat_count_buffer_*_time macros
  */
@@ -858,6 +877,7 @@ extern void pgstat_twophase_postabort(TransactionId xid, uint16 info,
 						  void *recdata, uint32 len);
 
 extern void pgstat_send_bgwriter(void);
+extern void pgstat_send_walwriter(void);
 
 /* ----------
  * Support functions for the SQL-callable functions to
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index a235571..1612cc0 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1305,6 +1305,7 @@ SELECT viewname, definition FROM pg_views WHERE schemaname <> 'information_schem
  pg_stat_user_functions          | SELECT p.oid AS funcid, n.nspname AS schemaname, p.proname AS funcname, pg_stat_get_function_calls(p.oid) AS calls, pg_stat_get_function_total_time(p.oid) AS total_time, pg_stat_get_function_self_time(p.oid) AS self_time FROM (pg_proc p LEFT JOIN pg_namespace n ON ((n.oid = p.pronamespace))) WHERE ((p.prolang <> (12)::oid) AND (pg_stat_get_function_calls(p.oid) IS NOT NULL));
  pg_stat_user_indexes            | SELECT pg_stat_all_indexes.relid, pg_stat_all_indexes.indexrelid, pg_stat_all_indexes.schemaname, pg_stat_all_indexes.relname, pg_stat_all_indexes.indexrelname, pg_stat_all_indexes.idx_scan, pg_stat_all_indexes.idx_tup_read, pg_stat_all_indexes.idx_tup_fetch FROM pg_stat_all_indexes WHERE ((pg_stat_all_indexes.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_stat_all_indexes.schemaname !~ '^pg_toast'::text));
  pg_stat_user_tables             | SELECT pg_stat_all_tables.relid, pg_stat_all_tables.schemaname, pg_stat_all_tables.relname, pg_stat_all_tables.seq_scan, pg_stat_all_tables.seq_tup_read, pg_stat_all_tables.idx_scan, pg_stat_all_tables.idx_tup_fetch, pg_stat_all_tables.n_tup_ins, pg_stat_all_tables.n_tup_upd, pg_stat_all_tables.n_tup_del, pg_stat_all_tables.n_tup_hot_upd, pg_stat_all_tables.n_live_tup, pg_stat_all_tables.n_dead_tup, pg_stat_all_tables.last_vacuum, pg_stat_all_tables.last_autovacuum, pg_stat_all_tables.last_analyze, pg_stat_all_tables.last_autoanalyze, pg_stat_all_tables.vacuum_count, pg_stat_all_tables.autovacuum_count, pg_stat_all_tables.analyze_count, pg_stat_all_tables.autoanalyze_count FROM pg_stat_all_tables WHERE ((pg_stat_all_tables.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_stat_all_tables.schemaname !~ '^pg_toast'::text));
+ pg_stat_walwriter               | SELECT pg_stat_get_xlog_dirty_writes() AS dirty_writes, pg_stat_get_wal_stat_reset_time() AS stats_reset;
  pg_stat_xact_all_tables         | SELECT c.oid AS relid, n.nspname AS schemaname, c.relname, pg_stat_get_xact_numscans(c.oid) AS seq_scan, pg_stat_get_xact_tuples_returned(c.oid) AS seq_tup_read, (sum(pg_stat_get_xact_numscans(i.indexrelid)))::bigint AS idx_scan, ((sum(pg_stat_get_xact_tuples_fetched(i.indexrelid)))::bigint + pg_stat_get_xact_tuples_fetched(c.oid)) AS idx_tup_fetch, pg_stat_get_xact_tuples_inserted(c.oid) AS n_tup_ins, pg_stat_get_xact_tuples_updated(c.oid) AS n_tup_upd, pg_stat_get_xact_tuples_deleted(c.oid) AS n_tup_del, pg_stat_get_xact_tuples_hot_updated(c.oid) AS n_tup_hot_upd FROM ((pg_class c LEFT JOIN pg_index i ON ((c.oid = i.indrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"])) GROUP BY c.oid, n.nspname, c.relname;
  pg_stat_xact_sys_tables         | SELECT pg_stat_xact_all_tables.relid, pg_stat_xact_all_tables.schemaname, pg_stat_xact_all_tables.relname, pg_stat_xact_all_tables.seq_scan, pg_stat_xact_all_tables.seq_tup_read, pg_stat_xact_all_tables.idx_scan, pg_stat_xact_all_tables.idx_tup_fetch, pg_stat_xact_all_tables.n_tup_ins, pg_stat_xact_all_tables.n_tup_upd, pg_stat_xact_all_tables.n_tup_del, pg_stat_xact_all_tables.n_tup_hot_upd FROM pg_stat_xact_all_tables WHERE ((pg_stat_xact_all_tables.schemaname = ANY (ARRAY['pg_catalog'::name, 'information_schema'::name])) OR (pg_stat_xact_all_tables.schemaname ~ '^pg_toast'::text));
  pg_stat_xact_user_functions     | SELECT p.oid AS funcid, n.nspname AS schemaname, p.proname AS funcname, pg_stat_get_xact_function_calls(p.oid) AS calls, pg_stat_get_xact_function_total_time(p.oid) AS total_time, pg_stat_get_xact_function_self_time(p.oid) AS self_time FROM (pg_proc p LEFT JOIN pg_namespace n ON ((n.oid = p.pronamespace))) WHERE ((p.prolang <> (12)::oid) AND (pg_stat_get_xact_function_calls(p.oid) IS NOT NULL));
@@ -1339,7 +1340,7 @@ SELECT viewname, definition FROM pg_views WHERE schemaname <> 'information_schem
  shoelace_obsolete               | SELECT shoelace.sl_name, shoelace.sl_avail, shoelace.sl_color, shoelace.sl_len, shoelace.sl_unit, shoelace.sl_len_cm FROM shoelace WHERE (NOT (EXISTS (SELECT shoe.shoename FROM shoe WHERE (shoe.slcolor = shoelace.sl_color))));
  street                          | SELECT r.name, r.thepath, c.cname FROM ONLY road r, real_city c WHERE (c.outline ## r.thepath);
  toyemp                          | SELECT emp.name, emp.age, emp.location, (12 * emp.salary) AS annualsal FROM emp;
-(60 rows)
+(61 rows)
 
 SELECT tablename, rulename, definition FROM pg_rules
 	ORDER BY tablename, rulename;
#23Satoshi Nagayasu
snaga@uptime.jp
In reply to: Alvaro Herrera (#21)
Re: New statistics for WAL buffer dirty writes

(2012/11/27 7:42), Alvaro Herrera wrote:

Satoshi Nagayasu escribi�:

I attached the latest one, which splits the reset_time
for bgwriter and walwriter, and provides new system view,
called pg_stat_walwriter, to show the dirty write counter
and the reset time.

Thanks. I gave this a look and I have a couple of comments:

1. The counter is called dirty_write. I imagine that this comes
directly from the name of the nearby DTrace probe,
TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_START. That probe comes from
email 494C1565.3060105@sun.com committed in 4ee79fd20d9a. But there
wasn't much discussion about the name back then. Maybe that was fine at
the time because it was pretty much an internal matter, being so deep in
the code. But we're now going to expose it to regular users, so we'd
better choose a very good name because we're going to be stuck with it
for a very long time. And the name "dirty" doesn't ring with me too
well; what matters here is not that we're writing a buffer that is
dirty, but the fact that we're writing while holding the WalInsertLock,
so the name should convey the fact that this is a "locked" write or
something like that. Or at least that's how I see the issue. Note the
documentation wording:

+      <entry><structfield>dirty_writes</></entry>
+      <entry><type>bigint</type></entry>
+      <entry>Number of dirty writes, which means flushing wal buffers
+       because of its full.</entry>

Yes, "dirty_writes" came from the probe name, and if it needs to be
changed, "buffers_flush" would make sense for me in this situation,
because this counter is intended to show WAL writes due to "wal buffer
full".

2. Should we put bgwriter and walwriter data in separate structs? I
don't think this is necessary, but maybe someone opines differently?

I tried to minimize an impact of this patch, but if I can change this
struct, yes, I'd like to split into two structs.

3.
+/*
+ * WalWriter global statistics counter.
+ * Despite its name, this counter is actually used not only in walwriter,
+ * but also in each backend process to sum up xlog dirty writes.
+ * Those processes would increment this counter in each XLogWrite call,
+ * then send it to the stat collector process.
+ */
+PgStat_MsgWalWriter WalWriterStats;

Maybe we should use a different name for the struct, to avoid having to
excuse ourselves for the name being wrong ...

Ok. How about WalBufferStats? I think this name could be accepted in
both the wal writer and each backend process.

--
Satoshi Nagayasu <snaga@uptime.jp>
Uptime Technologies, LLC. http://www.uptime.jp

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#24Satoshi Nagayasu
snaga@uptime.jp
In reply to: Tomas Vondra (#22)
Re: New statistics for WAL buffer dirty writes

(2012/12/10 3:06), Tomas Vondra wrote:

On 29.10.2012 04:58, Satoshi Nagayasu wrote:

2012/10/24 1:12, Alvaro Herrera wrote:

Satoshi Nagayasu escribi�:

With this patch, walwriter process and each backend process
would sum up dirty writes, and send it to the stat collector.
So, the value could be saved in the stat file, and could be
kept on restarting.

The statistics could be retreive with using
pg_stat_get_xlog_dirty_writes() function, and could be reset
with calling pg_stat_reset_shared('walwriter').

Now, I have one concern.

The reset time could be captured in globalStats.stat_reset_timestamp,
but this value is the same with the bgwriter one.

So, once pg_stat_reset_shared('walwriter') is called,
stats_reset column in pg_stat_bgwriter does represent
the reset time for walwriter, not for bgwriter.

How should we handle this? Should we split this value?
And should we have new system view for walwriter?

I think the answer to the two last questions is yes. It doesn't seem to
make sense, to me, to have a single reset timings for what are
effectively two separate things.

Please submit an updated patch to next CF. I'm marking this one
returned with feedback. Thanks.

I attached the latest one, which splits the reset_time
for bgwriter and walwriter, and provides new system view,
called pg_stat_walwriter, to show the dirty write counter
and the reset time.

I've done a quick review of the v4 patch:

Thanks for the review, and sorry for my delayed response.

1) applies fine on HEAD, compiles fine

2) "make installcheck" fails because of a difference in the 'rules'
test suite (there's a new view "pg_stat_walwriter" - see the
attached patch for a fixed version or expected/rules.out)

Ah, I forgot about the regression test. I will fix it. Thanks.

3) I do agree with Alvaro that using the same struct for two separate
components (bgwriter and walwriter) seems a bit awkward. For example
you need to have two separate stat_reset fields, the reset code
becomes much more verbose (because you need to list individual
fields) etc.

So I'd vote to either split this into two structures or keeping it
as a single structure (although with two views on top of it).

Ok, I will split it into two structs, PgStat_BgWriterGlobalStats and
PgStat_WalWriterGlobalStats, and will modify PgStat_GlobalStats to hold
those two structs in the stat collector.

4) Are there any other fields that might be interesting? Right now
there's just "dirty_writes" but I guess there are other values. E.g.
how much data was actually written etc.?

AFAIK, I think those numbers can be obtained by calling
pg_current_xlog_insert_location() or pg_current_xlog_location(),
but if we need it, I will add it.

Regards,

Tomas

--
Satoshi Nagayasu <snaga@uptime.jp>
Uptime Technologies, LLC. http://www.uptime.jp

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#25Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: Satoshi Nagayasu (#24)
Re: New statistics for WAL buffer dirty writes

What happened to this patch? We were waiting on an updated version from
you.

Satoshi Nagayasu wrote:

(2012/12/10 3:06), Tomas Vondra wrote:

On 29.10.2012 04:58, Satoshi Nagayasu wrote:

2012/10/24 1:12, Alvaro Herrera wrote:

Satoshi Nagayasu escribi�:

With this patch, walwriter process and each backend process
would sum up dirty writes, and send it to the stat collector.
So, the value could be saved in the stat file, and could be
kept on restarting.

The statistics could be retreive with using
pg_stat_get_xlog_dirty_writes() function, and could be reset
with calling pg_stat_reset_shared('walwriter').

Now, I have one concern.

The reset time could be captured in globalStats.stat_reset_timestamp,
but this value is the same with the bgwriter one.

So, once pg_stat_reset_shared('walwriter') is called,
stats_reset column in pg_stat_bgwriter does represent
the reset time for walwriter, not for bgwriter.

How should we handle this? Should we split this value?
And should we have new system view for walwriter?

I think the answer to the two last questions is yes. It doesn't seem to
make sense, to me, to have a single reset timings for what are
effectively two separate things.

Please submit an updated patch to next CF. I'm marking this one
returned with feedback. Thanks.

I attached the latest one, which splits the reset_time
for bgwriter and walwriter, and provides new system view,
called pg_stat_walwriter, to show the dirty write counter
and the reset time.

I've done a quick review of the v4 patch:

Thanks for the review, and sorry for my delayed response.

1) applies fine on HEAD, compiles fine

2) "make installcheck" fails because of a difference in the 'rules'
test suite (there's a new view "pg_stat_walwriter" - see the
attached patch for a fixed version or expected/rules.out)

Ah, I forgot about the regression test. I will fix it. Thanks.

3) I do agree with Alvaro that using the same struct for two separate
components (bgwriter and walwriter) seems a bit awkward. For example
you need to have two separate stat_reset fields, the reset code
becomes much more verbose (because you need to list individual
fields) etc.

So I'd vote to either split this into two structures or keeping it
as a single structure (although with two views on top of it).

Ok, I will split it into two structs, PgStat_BgWriterGlobalStats and
PgStat_WalWriterGlobalStats, and will modify PgStat_GlobalStats to
hold those two structs in the stat collector.

4) Are there any other fields that might be interesting? Right now
there's just "dirty_writes" but I guess there are other values. E.g.
how much data was actually written etc.?

AFAIK, I think those numbers can be obtained by calling
pg_current_xlog_insert_location() or pg_current_xlog_location(),
but if we need it, I will add it.

Regards,

--
Álvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#26Satoshi Nagayasu
snaga@uptime.jp
In reply to: Alvaro Herrera (#25)
Re: New statistics for WAL buffer dirty writes

Will revise and re-resubmit for the next CF.

Regards,

2013/07/19 1:06, Alvaro Herrera wrote:

What happened to this patch? We were waiting on an updated version from
you.

Satoshi Nagayasu wrote:

(2012/12/10 3:06), Tomas Vondra wrote:

On 29.10.2012 04:58, Satoshi Nagayasu wrote:

2012/10/24 1:12, Alvaro Herrera wrote:

Satoshi Nagayasu escribi�:

With this patch, walwriter process and each backend process
would sum up dirty writes, and send it to the stat collector.
So, the value could be saved in the stat file, and could be
kept on restarting.

The statistics could be retreive with using
pg_stat_get_xlog_dirty_writes() function, and could be reset
with calling pg_stat_reset_shared('walwriter').

Now, I have one concern.

The reset time could be captured in globalStats.stat_reset_timestamp,
but this value is the same with the bgwriter one.

So, once pg_stat_reset_shared('walwriter') is called,
stats_reset column in pg_stat_bgwriter does represent
the reset time for walwriter, not for bgwriter.

How should we handle this? Should we split this value?
And should we have new system view for walwriter?

I think the answer to the two last questions is yes. It doesn't seem to
make sense, to me, to have a single reset timings for what are
effectively two separate things.

Please submit an updated patch to next CF. I'm marking this one
returned with feedback. Thanks.

I attached the latest one, which splits the reset_time
for bgwriter and walwriter, and provides new system view,
called pg_stat_walwriter, to show the dirty write counter
and the reset time.

I've done a quick review of the v4 patch:

Thanks for the review, and sorry for my delayed response.

1) applies fine on HEAD, compiles fine

2) "make installcheck" fails because of a difference in the 'rules'
test suite (there's a new view "pg_stat_walwriter" - see the
attached patch for a fixed version or expected/rules.out)

Ah, I forgot about the regression test. I will fix it. Thanks.

3) I do agree with Alvaro that using the same struct for two separate
components (bgwriter and walwriter) seems a bit awkward. For example
you need to have two separate stat_reset fields, the reset code
becomes much more verbose (because you need to list individual
fields) etc.

So I'd vote to either split this into two structures or keeping it
as a single structure (although with two views on top of it).

Ok, I will split it into two structs, PgStat_BgWriterGlobalStats and
PgStat_WalWriterGlobalStats, and will modify PgStat_GlobalStats to
hold those two structs in the stat collector.

4) Are there any other fields that might be interesting? Right now
there's just "dirty_writes" but I guess there are other values. E.g.
how much data was actually written etc.?

AFAIK, I think those numbers can be obtained by calling
pg_current_xlog_insert_location() or pg_current_xlog_location(),
but if we need it, I will add it.

Regards,

--
Satoshi Nagayasu <snaga@uptime.jp>
Uptime Technologies, LLC. http://www.uptime.jp

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#27Satoshi Nagayasu
snaga@uptime.jp
In reply to: Satoshi Nagayasu (#26)
2 attachment(s)
Re: New statistics for WAL buffer dirty writes

Hi,

The revised patch for wal buffer statistics is attached.
A test script is also attached. Please take a look.

Regards,

(2013/07/19 7:49), Satoshi Nagayasu wrote:

Will revise and re-resubmit for the next CF.

Regards,

2013/07/19 1:06, Alvaro Herrera wrote:

What happened to this patch? We were waiting on an updated version from
you.

Satoshi Nagayasu wrote:

(2012/12/10 3:06), Tomas Vondra wrote:

On 29.10.2012 04:58, Satoshi Nagayasu wrote:

2012/10/24 1:12, Alvaro Herrera wrote:

Satoshi Nagayasu escribi�:

With this patch, walwriter process and each backend process
would sum up dirty writes, and send it to the stat collector.
So, the value could be saved in the stat file, and could be
kept on restarting.

The statistics could be retreive with using
pg_stat_get_xlog_dirty_writes() function, and could be reset
with calling pg_stat_reset_shared('walwriter').

Now, I have one concern.

The reset time could be captured in
globalStats.stat_reset_timestamp,
but this value is the same with the bgwriter one.

So, once pg_stat_reset_shared('walwriter') is called,
stats_reset column in pg_stat_bgwriter does represent
the reset time for walwriter, not for bgwriter.

How should we handle this? Should we split this value?
And should we have new system view for walwriter?

I think the answer to the two last questions is yes. It doesn't
seem to
make sense, to me, to have a single reset timings for what are
effectively two separate things.

Please submit an updated patch to next CF. I'm marking this one
returned with feedback. Thanks.

I attached the latest one, which splits the reset_time
for bgwriter and walwriter, and provides new system view,
called pg_stat_walwriter, to show the dirty write counter
and the reset time.

I've done a quick review of the v4 patch:

Thanks for the review, and sorry for my delayed response.

1) applies fine on HEAD, compiles fine

2) "make installcheck" fails because of a difference in the 'rules'
test suite (there's a new view "pg_stat_walwriter" - see the
attached patch for a fixed version or expected/rules.out)

Ah, I forgot about the regression test. I will fix it. Thanks.

3) I do agree with Alvaro that using the same struct for two separate
components (bgwriter and walwriter) seems a bit awkward. For
example
you need to have two separate stat_reset fields, the reset code
becomes much more verbose (because you need to list individual
fields) etc.

So I'd vote to either split this into two structures or keeping it
as a single structure (although with two views on top of it).

Ok, I will split it into two structs, PgStat_BgWriterGlobalStats and
PgStat_WalWriterGlobalStats, and will modify PgStat_GlobalStats to
hold those two structs in the stat collector.

4) Are there any other fields that might be interesting? Right now
there's just "dirty_writes" but I guess there are other values.
E.g.
how much data was actually written etc.?

AFAIK, I think those numbers can be obtained by calling
pg_current_xlog_insert_location() or pg_current_xlog_location(),
but if we need it, I will add it.

Regards,

--
Satoshi Nagayasu <snaga@uptime.jp>
Uptime Technologies, LLC. http://www.uptime.jp

Attachments:

xlogdirtywrite_v5.difftext/plain; charset=Shift_JIS; name=xlogdirtywrite_v5.diffDownload
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 23ebc11..cdced7f 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1878,6 +1878,13 @@ include 'filename'
         results in most cases.
        </para>
 
+       <para>
+        When you see pg_stat_walwriter.dirty_write, which means number
+        of buffer flushing at buffer full, is continuously increasing
+        in your running server, you may need to enlarge this buffer
+        size.
+       </para>
+
       </listitem>
      </varlistentry>
 
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 4ec6981..15d9202 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -278,6 +278,14 @@ postgres: <replaceable>user</> <replaceable>database</> <replaceable>host</> <re
      </row>
 
      <row>
+      <entry><structname>pg_stat_walwriter</><indexterm><primary>pg_stat_walwriter</primary></indexterm></entry>
+      <entry>One row only, showing statistics about the wal writer
+       process's activity. See <xref linkend="pg-stat-walwriter-view">
+       for details.
+     </entry>
+     </row>
+
+     <row>
       <entry><structname>pg_stat_database</><indexterm><primary>pg_stat_database</primary></indexterm></entry>
       <entry>One row per database, showing database-wide statistics. See
        <xref linkend="pg-stat-database-view"> for details.
@@ -735,6 +743,39 @@ postgres: <replaceable>user</> <replaceable>database</> <replaceable>host</> <re
    single row, containing global data for the cluster.
   </para>
 
+  <table id="pg-stat-walwriter-view" xreflabel="pg_stat_walwriter">
+   <title><structname>pg_stat_walwriter</structname> View</title>
+
+   <tgroup cols="3">
+    <thead>
+    <row>
+      <entry>Column</entry>
+      <entry>Type</entry>
+      <entry>Description</entry>
+     </row>
+    </thead>
+
+    <tbody>
+     <row>
+      <entry><structfield>dirty_writes</></entry>
+      <entry><type>bigint</type></entry>
+      <entry>Number of dirty writes, which means flushing wal buffers
+       because of its full.</entry>
+     </row>
+     <row>
+      <entry><structfield>stats_reset</></entry>
+      <entry><type>timestamp with time zone</type></entry>
+      <entry>Time at which these statistics were last reset</entry>
+     </row>
+    </tbody>
+    </tgroup>
+  </table>
+
+  <para>
+   The <structname>pg_stat_walwriter</structname> view will always have a
+   single row, containing global data for the cluster.
+  </para>
+
   <table id="pg-stat-database-view" xreflabel="pg_stat_database">
    <title><structname>pg_stat_database</structname> View</title>
    <tgroup cols="3">
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index dc47c47..d0e85c9 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2467,6 +2467,7 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, bool opportunistic)
 					WriteRqst.Write = OldPageRqstPtr;
 					WriteRqst.Flush = 0;
 					XLogWrite(WriteRqst, false);
+					WalWriterStats.m_xlog_dirty_writes++;
 					LWLockRelease(WALWriteLock);
 					TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
 				}
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 575a40f..12a2ed0 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -686,6 +686,11 @@ CREATE VIEW pg_stat_bgwriter AS
         pg_stat_get_buf_alloc() AS buffers_alloc,
         pg_stat_get_bgwriter_stat_reset_time() AS stats_reset;
 
+CREATE VIEW pg_stat_walwriter AS
+    SELECT
+        pg_stat_get_xlog_dirty_writes() AS dirty_writes,
+        pg_stat_get_wal_stat_reset_time() AS stats_reset;
+
 CREATE VIEW pg_user_mappings AS
     SELECT
         U.oid       AS umid,
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index b5ce2f6..8c56af5 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -129,6 +129,14 @@ char	   *pgstat_stat_tmpname = NULL;
  */
 PgStat_MsgBgWriter BgWriterStats;
 
+/*
+ * WalWriter statistics counter.
+ * This counter is incremented by each XLogWrite call,
+ * both in the wal writer process and each backend.
+ * And then, sent to the stat collector process.
+ */
+PgStat_MsgWalWriter WalWriterStats;
+
 /* ----------
  * Local data
  * ----------
@@ -293,6 +301,7 @@ static void pgstat_recv_autovac(PgStat_MsgAutovacStart *msg, int len);
 static void pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len);
 static void pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len);
 static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len);
+static void pgstat_recv_walwriter(PgStat_MsgWalWriter *msg, int len);
 static void pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len);
 static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
 static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len);
@@ -820,6 +829,9 @@ pgstat_report_stat(bool force)
 
 	/* Now, send function statistics */
 	pgstat_send_funcstats();
+
+	/* Now, send wal buffer flush statistics */
+	pgstat_send_walwriter();
 }
 
 /*
@@ -1249,11 +1261,13 @@ pgstat_reset_shared_counters(const char *target)
 
 	if (strcmp(target, "bgwriter") == 0)
 		msg.m_resettarget = RESET_BGWRITER;
+	else if (strcmp(target, "walwriter") == 0)
+		msg.m_resettarget = RESET_WALWRITER;
 	else
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("unrecognized reset target: \"%s\"", target),
-				 errhint("Target must be \"bgwriter\".")));
+				 errhint("Target must be \"bgwriter\" or \"walwriter\".")));
 
 	pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RESETSHAREDCOUNTER);
 	pgstat_send(&msg, sizeof(msg));
@@ -3055,6 +3069,38 @@ pgstat_send_bgwriter(void)
 	MemSet(&BgWriterStats, 0, sizeof(BgWriterStats));
 }
 
+/* ----------
+ * pgstat_send_walwriter() -
+ *
+ *		Send walwriter statistics to the collector
+ * ----------
+ */
+void
+pgstat_send_walwriter(void)
+{
+	/* We assume this initializes to zeroes */
+	static const PgStat_MsgBgWriter all_zeroes;
+
+	/*
+	 * This function can be called even if nothing at all has happened. In
+	 * this case, avoid sending a completely empty message to the stats
+	 * collector.
+	 */
+	if (memcmp(&WalWriterStats, &all_zeroes, sizeof(PgStat_MsgWalWriter)) == 0)
+		return;
+
+	/*
+	 * Prepare and send the message
+	 */
+	pgstat_setheader(&WalWriterStats.m_hdr, PGSTAT_MTYPE_WALWRITER);
+	pgstat_send(&WalWriterStats, sizeof(WalWriterStats));
+
+	/*
+	 * Clear out the statistics buffer, so it can be re-used.
+	 */
+	MemSet(&WalWriterStats, 0, sizeof(WalWriterStats));
+}
+
 
 /* ----------
  * PgstatCollectorMain() -
@@ -3270,6 +3316,10 @@ PgstatCollectorMain(int argc, char *argv[])
 					pgstat_recv_bgwriter((PgStat_MsgBgWriter *) &msg, len);
 					break;
 
+				case PGSTAT_MTYPE_WALWRITER:
+					pgstat_recv_walwriter((PgStat_MsgWalWriter *) &msg, len);
+					break;
+
 				case PGSTAT_MTYPE_FUNCSTAT:
 					pgstat_recv_funcstat((PgStat_MsgFuncstat *) &msg, len);
 					break;
@@ -3825,7 +3875,8 @@ pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep)
 	 * Set the current timestamp (will be kept only in case we can't load an
 	 * existing statsfile).
 	 */
-	globalStats.stat_reset_timestamp = GetCurrentTimestamp();
+	globalStats.bgWriterGlobalStats.stat_reset_timestamp = GetCurrentTimestamp();
+	globalStats.walWriterGlobalStats.stat_reset_timestamp = GetCurrentTimestamp();
 
 	/*
 	 * Try to open the stats file. If it doesn't exist, the backends simply
@@ -4723,8 +4774,23 @@ pgstat_recv_resetsharedcounter(PgStat_MsgResetsharedcounter *msg, int len)
 	if (msg->m_resettarget == RESET_BGWRITER)
 	{
 		/* Reset the global background writer statistics for the cluster. */
-		memset(&globalStats, 0, sizeof(globalStats));
-		globalStats.stat_reset_timestamp = GetCurrentTimestamp();
+		globalStats.bgWriterGlobalStats.timed_checkpoints       = 0;
+		globalStats.bgWriterGlobalStats.requested_checkpoints   = 0;
+		globalStats.bgWriterGlobalStats.checkpoint_write_time   = 0;
+		globalStats.bgWriterGlobalStats.checkpoint_sync_time    = 0;
+		globalStats.bgWriterGlobalStats.buf_written_checkpoints = 0;
+		globalStats.bgWriterGlobalStats.buf_written_clean       = 0;
+		globalStats.bgWriterGlobalStats.maxwritten_clean        = 0;
+		globalStats.bgWriterGlobalStats.buf_written_backend     = 0;
+		globalStats.bgWriterGlobalStats.buf_fsync_backend       = 0;
+		globalStats.bgWriterGlobalStats.buf_alloc               = 0;
+		globalStats.bgWriterGlobalStats.stat_reset_timestamp = GetCurrentTimestamp();
+	}
+	else if (msg->m_resettarget == RESET_WALWRITER)
+	{
+		/* Reset the global walwriter statistics for the cluster. */
+		globalStats.walWriterGlobalStats.xlog_dirty_writes = 0;
+		globalStats.walWriterGlobalStats.stat_reset_timestamp = GetCurrentTimestamp();
 	}
 
 	/*
@@ -4865,16 +4931,28 @@ pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len)
 static void
 pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len)
 {
-	globalStats.timed_checkpoints += msg->m_timed_checkpoints;
-	globalStats.requested_checkpoints += msg->m_requested_checkpoints;
-	globalStats.checkpoint_write_time += msg->m_checkpoint_write_time;
-	globalStats.checkpoint_sync_time += msg->m_checkpoint_sync_time;
-	globalStats.buf_written_checkpoints += msg->m_buf_written_checkpoints;
-	globalStats.buf_written_clean += msg->m_buf_written_clean;
-	globalStats.maxwritten_clean += msg->m_maxwritten_clean;
-	globalStats.buf_written_backend += msg->m_buf_written_backend;
-	globalStats.buf_fsync_backend += msg->m_buf_fsync_backend;
-	globalStats.buf_alloc += msg->m_buf_alloc;
+	globalStats.bgWriterGlobalStats.timed_checkpoints += msg->m_timed_checkpoints;
+	globalStats.bgWriterGlobalStats.requested_checkpoints += msg->m_requested_checkpoints;
+	globalStats.bgWriterGlobalStats.checkpoint_write_time += msg->m_checkpoint_write_time;
+	globalStats.bgWriterGlobalStats.checkpoint_sync_time += msg->m_checkpoint_sync_time;
+	globalStats.bgWriterGlobalStats.buf_written_checkpoints += msg->m_buf_written_checkpoints;
+	globalStats.bgWriterGlobalStats.buf_written_clean += msg->m_buf_written_clean;
+	globalStats.bgWriterGlobalStats.maxwritten_clean += msg->m_maxwritten_clean;
+	globalStats.bgWriterGlobalStats.buf_written_backend += msg->m_buf_written_backend;
+	globalStats.bgWriterGlobalStats.buf_fsync_backend += msg->m_buf_fsync_backend;
+	globalStats.bgWriterGlobalStats.buf_alloc += msg->m_buf_alloc;
+}
+
+/* ----------
+ * pgstat_recv_walwriter() -
+ *
+ *	Process a WALWRITER message.
+ * ----------
+ */
+static void
+pgstat_recv_walwriter(PgStat_MsgWalWriter *msg, int len)
+{
+	globalStats.walWriterGlobalStats.xlog_dirty_writes += msg->m_xlog_dirty_writes;
 }
 
 /* ----------
diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c
index 8359da6..dedec0d 100644
--- a/src/backend/postmaster/walwriter.c
+++ b/src/backend/postmaster/walwriter.c
@@ -290,6 +290,8 @@ WalWriterMain(void)
 		else if (left_till_hibernate > 0)
 			left_till_hibernate--;
 
+		pgstat_send_walwriter();
+
 		/*
 		 * Sleep until we are signaled or WalWriterDelay has elapsed.  If we
 		 * haven't done anything useful for quite some time, lengthen the
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 0533cd6..df9f1d8 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -98,6 +98,7 @@ extern Datum pg_stat_get_bgwriter_stat_reset_time(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_buf_written_backend(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_buf_fsync_backend(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_buf_alloc(PG_FUNCTION_ARGS);
+extern Datum pg_stat_get_wal_stat_reset_time(PG_FUNCTION_ARGS);
 
 extern Datum pg_stat_get_xact_numscans(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_xact_tuples_returned(PG_FUNCTION_ARGS);
@@ -119,6 +120,8 @@ extern Datum pg_stat_reset_shared(PG_FUNCTION_ARGS);
 extern Datum pg_stat_reset_single_table_counters(PG_FUNCTION_ARGS);
 extern Datum pg_stat_reset_single_function_counters(PG_FUNCTION_ARGS);
 
+extern Datum pg_stat_get_xlog_dirty_writes(PG_FUNCTION_ARGS);
+
 /* Global bgwriter statistics, from bgwriter.c */
 extern PgStat_MsgBgWriter bgwriterStats;
 
@@ -1409,69 +1412,75 @@ pg_stat_get_db_blk_write_time(PG_FUNCTION_ARGS)
 Datum
 pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_INT64(pgstat_fetch_global()->timed_checkpoints);
+	PG_RETURN_INT64(pgstat_fetch_global()->bgWriterGlobalStats.timed_checkpoints);
 }
 
 Datum
 pg_stat_get_bgwriter_requested_checkpoints(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_INT64(pgstat_fetch_global()->requested_checkpoints);
+	PG_RETURN_INT64(pgstat_fetch_global()->bgWriterGlobalStats.requested_checkpoints);
 }
 
 Datum
 pg_stat_get_bgwriter_buf_written_checkpoints(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_INT64(pgstat_fetch_global()->buf_written_checkpoints);
+	PG_RETURN_INT64(pgstat_fetch_global()->bgWriterGlobalStats.buf_written_checkpoints);
 }
 
 Datum
 pg_stat_get_bgwriter_buf_written_clean(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_INT64(pgstat_fetch_global()->buf_written_clean);
+	PG_RETURN_INT64(pgstat_fetch_global()->bgWriterGlobalStats.buf_written_clean);
 }
 
 Datum
 pg_stat_get_bgwriter_maxwritten_clean(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_INT64(pgstat_fetch_global()->maxwritten_clean);
+	PG_RETURN_INT64(pgstat_fetch_global()->bgWriterGlobalStats.maxwritten_clean);
 }
 
 Datum
 pg_stat_get_checkpoint_write_time(PG_FUNCTION_ARGS)
 {
 	/* time is already in msec, just convert to double for presentation */
-	PG_RETURN_FLOAT8((double) pgstat_fetch_global()->checkpoint_write_time);
+	PG_RETURN_FLOAT8((double) pgstat_fetch_global()->bgWriterGlobalStats.checkpoint_write_time);
 }
 
 Datum
 pg_stat_get_checkpoint_sync_time(PG_FUNCTION_ARGS)
 {
 	/* time is already in msec, just convert to double for presentation */
-	PG_RETURN_FLOAT8((double) pgstat_fetch_global()->checkpoint_sync_time);
+	PG_RETURN_FLOAT8((double) pgstat_fetch_global()->bgWriterGlobalStats.checkpoint_sync_time);
 }
 
 Datum
 pg_stat_get_bgwriter_stat_reset_time(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_TIMESTAMPTZ(pgstat_fetch_global()->stat_reset_timestamp);
+	PG_RETURN_TIMESTAMPTZ(pgstat_fetch_global()->bgWriterGlobalStats.stat_reset_timestamp);
 }
 
 Datum
 pg_stat_get_buf_written_backend(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_INT64(pgstat_fetch_global()->buf_written_backend);
+	PG_RETURN_INT64(pgstat_fetch_global()->bgWriterGlobalStats.buf_written_backend);
 }
 
 Datum
 pg_stat_get_buf_fsync_backend(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_INT64(pgstat_fetch_global()->buf_fsync_backend);
+	PG_RETURN_INT64(pgstat_fetch_global()->bgWriterGlobalStats.buf_fsync_backend);
 }
 
 Datum
 pg_stat_get_buf_alloc(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_INT64(pgstat_fetch_global()->buf_alloc);
+	PG_RETURN_INT64(pgstat_fetch_global()->bgWriterGlobalStats.buf_alloc);
+}
+
+Datum
+pg_stat_get_wal_stat_reset_time(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_TIMESTAMPTZ(pgstat_fetch_global()->walWriterGlobalStats.stat_reset_timestamp);
 }
 
 Datum
@@ -1711,3 +1720,9 @@ pg_stat_reset_single_function_counters(PG_FUNCTION_ARGS)
 
 	PG_RETURN_VOID();
 }
+
+Datum
+pg_stat_get_xlog_dirty_writes(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_INT64(pgstat_fetch_global()->walWriterGlobalStats.xlog_dirty_writes);
+}
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index f03dd0b..add47dc 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -2709,6 +2709,8 @@ DATA(insert OID = 3063 ( pg_stat_get_buf_fsync_backend PGNSP PGUID 12 1 0 0 0 f
 DESCR("statistics: number of backend buffer writes that did their own fsync");
 DATA(insert OID = 2859 ( pg_stat_get_buf_alloc			PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 20 "" _null_ _null_ _null_ _null_ pg_stat_get_buf_alloc _null_ _null_ _null_ ));
 DESCR("statistics: number of buffer allocations");
+DATA(insert OID = 2860 ( pg_stat_get_wal_stat_reset_time PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 1184 "" _null_ _null_ _null_ _null_	pg_stat_get_wal_stat_reset_time _null_ _null_ _null_ ));
+DESCR("statistics: last reset for the wal");
 
 DATA(insert OID = 2978 (  pg_stat_get_function_calls		PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_function_calls _null_ _null_ _null_ ));
 DESCR("statistics: number of function calls");
@@ -2753,6 +2755,9 @@ DESCR("statistics: reset collected statistics for a single table or index in the
 DATA(insert OID = 3777 (  pg_stat_reset_single_function_counters	PGNSP PGUID 12 1 0 0 0 f f f f f f v 1 0 2278 "26" _null_ _null_ _null_ _null_	pg_stat_reset_single_function_counters _null_ _null_ _null_ ));
 DESCR("statistics: reset collected statistics for a single function in the current database");
 
+DATA(insert OID = 3766 (  pg_stat_get_xlog_dirty_writes  PGNSP PGUID 12 1 0 0 0 f f f f f f v 0 0 20 "" _null_ _null_ _null_ _null_ pg_stat_get_xlog_dirty_writes _null_ _null_ _null_ ));
+DESCR("statistics: get xlog dirty buffer write statistics");
+
 DATA(insert OID = 3163 (  pg_trigger_depth				PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 23 "" _null_ _null_ _null_ _null_ pg_trigger_depth _null_ _null_ _null_ ));
 DESCR("current trigger depth");
 
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index fb242e4..1213964 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -45,6 +45,7 @@ typedef enum StatMsgType
 	PGSTAT_MTYPE_VACUUM,
 	PGSTAT_MTYPE_ANALYZE,
 	PGSTAT_MTYPE_BGWRITER,
+	PGSTAT_MTYPE_WALWRITER,
 	PGSTAT_MTYPE_FUNCSTAT,
 	PGSTAT_MTYPE_FUNCPURGE,
 	PGSTAT_MTYPE_RECOVERYCONFLICT,
@@ -102,7 +103,8 @@ typedef struct PgStat_TableCounts
 /* Possible targets for resetting cluster-wide shared values */
 typedef enum PgStat_Shared_Reset_Target
 {
-	RESET_BGWRITER
+	RESET_BGWRITER,
+	RESET_WALWRITER
 } PgStat_Shared_Reset_Target;
 
 /* Possible object types for resetting single counters */
@@ -373,6 +375,17 @@ typedef struct PgStat_MsgBgWriter
 } PgStat_MsgBgWriter;
 
 /* ----------
+ * PgStat_MsgWalWriter			Sent by the walwriter to update statistics.
+ * ----------
+ */
+typedef struct PgStat_MsgWalWriter
+{
+	PgStat_MsgHdr m_hdr;
+
+	PgStat_Counter m_xlog_dirty_writes;
+} PgStat_MsgWalWriter;
+
+/* ----------
  * PgStat_MsgRecoveryConflict	Sent by the backend upon recovery conflict
  * ----------
  */
@@ -500,6 +513,7 @@ typedef union PgStat_Msg
 	PgStat_MsgVacuum msg_vacuum;
 	PgStat_MsgAnalyze msg_analyze;
 	PgStat_MsgBgWriter msg_bgwriter;
+	PgStat_MsgWalWriter msg_walwriter;
 	PgStat_MsgFuncstat msg_funcstat;
 	PgStat_MsgFuncpurge msg_funcpurge;
 	PgStat_MsgRecoveryConflict msg_recoveryconflict;
@@ -608,12 +622,8 @@ typedef struct PgStat_StatFuncEntry
 } PgStat_StatFuncEntry;
 
 
-/*
- * Global statistics kept in the stats collector
- */
-typedef struct PgStat_GlobalStats
+typedef struct PgStat_BgWriterGlobalStats
 {
-	TimestampTz stats_timestamp;	/* time of stats file update */
 	PgStat_Counter timed_checkpoints;
 	PgStat_Counter requested_checkpoints;
 	PgStat_Counter checkpoint_write_time;		/* times in milliseconds */
@@ -625,6 +635,22 @@ typedef struct PgStat_GlobalStats
 	PgStat_Counter buf_fsync_backend;
 	PgStat_Counter buf_alloc;
 	TimestampTz stat_reset_timestamp;
+} PgStat_BgWriterGlobalStats;
+
+typedef struct PgStat_WalWriterGlobalStats
+{
+	PgStat_Counter xlog_dirty_writes;
+	TimestampTz stat_reset_timestamp;
+} PgStat_WalWriterGlobalStats;
+
+/*
+ * Global statistics kept in the stats collector
+ */
+typedef struct PgStat_GlobalStats
+{
+	TimestampTz stats_timestamp;    /* time of stats file update */
+	PgStat_BgWriterGlobalStats bgWriterGlobalStats;
+	PgStat_WalWriterGlobalStats walWriterGlobalStats;
 } PgStat_GlobalStats;
 
 
@@ -733,6 +759,8 @@ extern char *pgstat_stat_filename;
  */
 extern PgStat_MsgBgWriter BgWriterStats;
 
+extern PgStat_MsgWalWriter WalWriterStats;
+
 /*
  * Updated by pgstat_count_buffer_*_time macros
  */
@@ -861,6 +889,7 @@ extern void pgstat_twophase_postabort(TransactionId xid, uint16 info,
 						  void *recdata, uint32 len);
 
 extern void pgstat_send_bgwriter(void);
+extern void pgstat_send_walwriter(void);
 
 /* ----------
  * Support functions for the SQL-callable functions to
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 8f24c51..4074c61 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1775,6 +1775,8 @@ SELECT viewname, definition FROM pg_views WHERE schemaname <> 'information_schem
                                  |     pg_stat_all_tables.autoanalyze_count                                                                                                                                                                       +
                                  |    FROM pg_stat_all_tables                                                                                                                                                                                     +
                                  |   WHERE ((pg_stat_all_tables.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_stat_all_tables.schemaname !~ '^pg_toast'::text));
+ pg_stat_walwriter               |  SELECT pg_stat_get_xlog_dirty_writes() AS dirty_writes,                                                                                                                                                       +
+                                 |     pg_stat_get_wal_stat_reset_time() AS stats_reset;
  pg_stat_xact_all_tables         |  SELECT c.oid AS relid,                                                                                                                                                                                        +
                                  |     n.nspname AS schemaname,                                                                                                                                                                                   +
                                  |     c.relname,                                                                                                                                                                                                 +
@@ -2142,7 +2144,7 @@ SELECT viewname, definition FROM pg_views WHERE schemaname <> 'information_schem
                                  |    FROM tv;
  tvvmv                           |  SELECT tvvm.grandtot                                                                                                                                                                                          +
                                  |    FROM tvvm;
-(64 rows)
+(65 rows)
 
 SELECT tablename, rulename, definition FROM pg_rules
 	ORDER BY tablename, rulename;
test_xlogdirtywrite.shtext/plain; charset=Shift_JIS; name=test_xlogdirtywrite.shDownload
#28Peter Eisentraut
peter_e@gmx.net
In reply to: Satoshi Nagayasu (#27)
Re: New statistics for WAL buffer dirty writes

On 9/6/13 11:32 PM, Satoshi Nagayasu wrote:

The revised patch for wal buffer statistics is attached.
A test script is also attached. Please take a look.

You have duplicate OIDs. Run the script duplicate_oids to find them.

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#29Peter Geoghegan
pg@heroku.com
In reply to: Peter Eisentraut (#28)
Re: New statistics for WAL buffer dirty writes

On Mon, Sep 9, 2013 at 2:43 PM, Peter Eisentraut <peter_e@gmx.net> wrote:

You have duplicate OIDs. Run the script duplicate_oids to find them.

Are you considering picking up the script that Andrew wrote to
automate that as part of the build? I wonder why that didn't end up
going anywhere.

--
Peter Geoghegan

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#30Peter Eisentraut
peter_e@gmx.net
In reply to: Peter Geoghegan (#29)
Re: New statistics for WAL buffer dirty writes

On Mon, 2013-09-09 at 14:51 -0700, Peter Geoghegan wrote:

On Mon, Sep 9, 2013 at 2:43 PM, Peter Eisentraut <peter_e@gmx.net> wrote:

You have duplicate OIDs. Run the script duplicate_oids to find them.

Are you considering picking up the script that Andrew wrote to
automate that as part of the build? I wonder why that didn't end up
going anywhere.

It is automated. Andrew's rewrite is still worth considering, and I had
planned to do that, but it doesn't provide any functionality we don't
already have.

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#31Peter Geoghegan
pg@heroku.com
In reply to: Peter Eisentraut (#30)
Re: New statistics for WAL buffer dirty writes

On Mon, Sep 9, 2013 at 6:05 PM, Peter Eisentraut <peter_e@gmx.net> wrote:

It is automated.

Oh, yeah. I see that the maintainer-check target does that. I should
probably get into the habit of using targets other than
check/installcheck, as you recently demonstrated.

--
Peter Geoghegan

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#32Satoshi Nagayasu
snaga@uptime.jp
In reply to: Peter Eisentraut (#28)
1 attachment(s)
Re: New statistics for WAL buffer dirty writes

Thanks for checking. Revised one attached.

(2013/09/10 6:43), Peter Eisentraut wrote:

On 9/6/13 11:32 PM, Satoshi Nagayasu wrote:

The revised patch for wal buffer statistics is attached.
A test script is also attached. Please take a look.

You have duplicate OIDs. Run the script duplicate_oids to find them.

--
Satoshi Nagayasu <snaga@uptime.jp>
Uptime Technologies, LLC. http://www.uptime.jp

Attachments:

xlogdirtywrite_v6.difftext/plain; charset=Shift_JIS; name=xlogdirtywrite_v6.diffDownload
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 23ebc11..cdced7f 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1878,6 +1878,13 @@ include 'filename'
         results in most cases.
        </para>
 
+       <para>
+        When you see pg_stat_walwriter.dirty_write, which means number
+        of buffer flushing at buffer full, is continuously increasing
+        in your running server, you may need to enlarge this buffer
+        size.
+       </para>
+
       </listitem>
      </varlistentry>
 
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 4ec6981..15d9202 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -278,6 +278,14 @@ postgres: <replaceable>user</> <replaceable>database</> <replaceable>host</> <re
      </row>
 
      <row>
+      <entry><structname>pg_stat_walwriter</><indexterm><primary>pg_stat_walwriter</primary></indexterm></entry>
+      <entry>One row only, showing statistics about the wal writer
+       process's activity. See <xref linkend="pg-stat-walwriter-view">
+       for details.
+     </entry>
+     </row>
+
+     <row>
       <entry><structname>pg_stat_database</><indexterm><primary>pg_stat_database</primary></indexterm></entry>
       <entry>One row per database, showing database-wide statistics. See
        <xref linkend="pg-stat-database-view"> for details.
@@ -735,6 +743,39 @@ postgres: <replaceable>user</> <replaceable>database</> <replaceable>host</> <re
    single row, containing global data for the cluster.
   </para>
 
+  <table id="pg-stat-walwriter-view" xreflabel="pg_stat_walwriter">
+   <title><structname>pg_stat_walwriter</structname> View</title>
+
+   <tgroup cols="3">
+    <thead>
+    <row>
+      <entry>Column</entry>
+      <entry>Type</entry>
+      <entry>Description</entry>
+     </row>
+    </thead>
+
+    <tbody>
+     <row>
+      <entry><structfield>dirty_writes</></entry>
+      <entry><type>bigint</type></entry>
+      <entry>Number of dirty writes, which means flushing wal buffers
+       because of its full.</entry>
+     </row>
+     <row>
+      <entry><structfield>stats_reset</></entry>
+      <entry><type>timestamp with time zone</type></entry>
+      <entry>Time at which these statistics were last reset</entry>
+     </row>
+    </tbody>
+    </tgroup>
+  </table>
+
+  <para>
+   The <structname>pg_stat_walwriter</structname> view will always have a
+   single row, containing global data for the cluster.
+  </para>
+
   <table id="pg-stat-database-view" xreflabel="pg_stat_database">
    <title><structname>pg_stat_database</structname> View</title>
    <tgroup cols="3">
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index dc47c47..d0e85c9 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2467,6 +2467,7 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, bool opportunistic)
 					WriteRqst.Write = OldPageRqstPtr;
 					WriteRqst.Flush = 0;
 					XLogWrite(WriteRqst, false);
+					WalWriterStats.m_xlog_dirty_writes++;
 					LWLockRelease(WALWriteLock);
 					TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
 				}
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 575a40f..12a2ed0 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -686,6 +686,11 @@ CREATE VIEW pg_stat_bgwriter AS
         pg_stat_get_buf_alloc() AS buffers_alloc,
         pg_stat_get_bgwriter_stat_reset_time() AS stats_reset;
 
+CREATE VIEW pg_stat_walwriter AS
+    SELECT
+        pg_stat_get_xlog_dirty_writes() AS dirty_writes,
+        pg_stat_get_wal_stat_reset_time() AS stats_reset;
+
 CREATE VIEW pg_user_mappings AS
     SELECT
         U.oid       AS umid,
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index b5ce2f6..8c56af5 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -129,6 +129,14 @@ char	   *pgstat_stat_tmpname = NULL;
  */
 PgStat_MsgBgWriter BgWriterStats;
 
+/*
+ * WalWriter statistics counter.
+ * This counter is incremented by each XLogWrite call,
+ * both in the wal writer process and each backend.
+ * And then, sent to the stat collector process.
+ */
+PgStat_MsgWalWriter WalWriterStats;
+
 /* ----------
  * Local data
  * ----------
@@ -293,6 +301,7 @@ static void pgstat_recv_autovac(PgStat_MsgAutovacStart *msg, int len);
 static void pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len);
 static void pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len);
 static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len);
+static void pgstat_recv_walwriter(PgStat_MsgWalWriter *msg, int len);
 static void pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len);
 static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
 static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len);
@@ -820,6 +829,9 @@ pgstat_report_stat(bool force)
 
 	/* Now, send function statistics */
 	pgstat_send_funcstats();
+
+	/* Now, send wal buffer flush statistics */
+	pgstat_send_walwriter();
 }
 
 /*
@@ -1249,11 +1261,13 @@ pgstat_reset_shared_counters(const char *target)
 
 	if (strcmp(target, "bgwriter") == 0)
 		msg.m_resettarget = RESET_BGWRITER;
+	else if (strcmp(target, "walwriter") == 0)
+		msg.m_resettarget = RESET_WALWRITER;
 	else
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("unrecognized reset target: \"%s\"", target),
-				 errhint("Target must be \"bgwriter\".")));
+				 errhint("Target must be \"bgwriter\" or \"walwriter\".")));
 
 	pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RESETSHAREDCOUNTER);
 	pgstat_send(&msg, sizeof(msg));
@@ -3055,6 +3069,38 @@ pgstat_send_bgwriter(void)
 	MemSet(&BgWriterStats, 0, sizeof(BgWriterStats));
 }
 
+/* ----------
+ * pgstat_send_walwriter() -
+ *
+ *		Send walwriter statistics to the collector
+ * ----------
+ */
+void
+pgstat_send_walwriter(void)
+{
+	/* We assume this initializes to zeroes */
+	static const PgStat_MsgBgWriter all_zeroes;
+
+	/*
+	 * This function can be called even if nothing at all has happened. In
+	 * this case, avoid sending a completely empty message to the stats
+	 * collector.
+	 */
+	if (memcmp(&WalWriterStats, &all_zeroes, sizeof(PgStat_MsgWalWriter)) == 0)
+		return;
+
+	/*
+	 * Prepare and send the message
+	 */
+	pgstat_setheader(&WalWriterStats.m_hdr, PGSTAT_MTYPE_WALWRITER);
+	pgstat_send(&WalWriterStats, sizeof(WalWriterStats));
+
+	/*
+	 * Clear out the statistics buffer, so it can be re-used.
+	 */
+	MemSet(&WalWriterStats, 0, sizeof(WalWriterStats));
+}
+
 
 /* ----------
  * PgstatCollectorMain() -
@@ -3270,6 +3316,10 @@ PgstatCollectorMain(int argc, char *argv[])
 					pgstat_recv_bgwriter((PgStat_MsgBgWriter *) &msg, len);
 					break;
 
+				case PGSTAT_MTYPE_WALWRITER:
+					pgstat_recv_walwriter((PgStat_MsgWalWriter *) &msg, len);
+					break;
+
 				case PGSTAT_MTYPE_FUNCSTAT:
 					pgstat_recv_funcstat((PgStat_MsgFuncstat *) &msg, len);
 					break;
@@ -3825,7 +3875,8 @@ pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep)
 	 * Set the current timestamp (will be kept only in case we can't load an
 	 * existing statsfile).
 	 */
-	globalStats.stat_reset_timestamp = GetCurrentTimestamp();
+	globalStats.bgWriterGlobalStats.stat_reset_timestamp = GetCurrentTimestamp();
+	globalStats.walWriterGlobalStats.stat_reset_timestamp = GetCurrentTimestamp();
 
 	/*
 	 * Try to open the stats file. If it doesn't exist, the backends simply
@@ -4723,8 +4774,23 @@ pgstat_recv_resetsharedcounter(PgStat_MsgResetsharedcounter *msg, int len)
 	if (msg->m_resettarget == RESET_BGWRITER)
 	{
 		/* Reset the global background writer statistics for the cluster. */
-		memset(&globalStats, 0, sizeof(globalStats));
-		globalStats.stat_reset_timestamp = GetCurrentTimestamp();
+		globalStats.bgWriterGlobalStats.timed_checkpoints       = 0;
+		globalStats.bgWriterGlobalStats.requested_checkpoints   = 0;
+		globalStats.bgWriterGlobalStats.checkpoint_write_time   = 0;
+		globalStats.bgWriterGlobalStats.checkpoint_sync_time    = 0;
+		globalStats.bgWriterGlobalStats.buf_written_checkpoints = 0;
+		globalStats.bgWriterGlobalStats.buf_written_clean       = 0;
+		globalStats.bgWriterGlobalStats.maxwritten_clean        = 0;
+		globalStats.bgWriterGlobalStats.buf_written_backend     = 0;
+		globalStats.bgWriterGlobalStats.buf_fsync_backend       = 0;
+		globalStats.bgWriterGlobalStats.buf_alloc               = 0;
+		globalStats.bgWriterGlobalStats.stat_reset_timestamp = GetCurrentTimestamp();
+	}
+	else if (msg->m_resettarget == RESET_WALWRITER)
+	{
+		/* Reset the global walwriter statistics for the cluster. */
+		globalStats.walWriterGlobalStats.xlog_dirty_writes = 0;
+		globalStats.walWriterGlobalStats.stat_reset_timestamp = GetCurrentTimestamp();
 	}
 
 	/*
@@ -4865,16 +4931,28 @@ pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len)
 static void
 pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len)
 {
-	globalStats.timed_checkpoints += msg->m_timed_checkpoints;
-	globalStats.requested_checkpoints += msg->m_requested_checkpoints;
-	globalStats.checkpoint_write_time += msg->m_checkpoint_write_time;
-	globalStats.checkpoint_sync_time += msg->m_checkpoint_sync_time;
-	globalStats.buf_written_checkpoints += msg->m_buf_written_checkpoints;
-	globalStats.buf_written_clean += msg->m_buf_written_clean;
-	globalStats.maxwritten_clean += msg->m_maxwritten_clean;
-	globalStats.buf_written_backend += msg->m_buf_written_backend;
-	globalStats.buf_fsync_backend += msg->m_buf_fsync_backend;
-	globalStats.buf_alloc += msg->m_buf_alloc;
+	globalStats.bgWriterGlobalStats.timed_checkpoints += msg->m_timed_checkpoints;
+	globalStats.bgWriterGlobalStats.requested_checkpoints += msg->m_requested_checkpoints;
+	globalStats.bgWriterGlobalStats.checkpoint_write_time += msg->m_checkpoint_write_time;
+	globalStats.bgWriterGlobalStats.checkpoint_sync_time += msg->m_checkpoint_sync_time;
+	globalStats.bgWriterGlobalStats.buf_written_checkpoints += msg->m_buf_written_checkpoints;
+	globalStats.bgWriterGlobalStats.buf_written_clean += msg->m_buf_written_clean;
+	globalStats.bgWriterGlobalStats.maxwritten_clean += msg->m_maxwritten_clean;
+	globalStats.bgWriterGlobalStats.buf_written_backend += msg->m_buf_written_backend;
+	globalStats.bgWriterGlobalStats.buf_fsync_backend += msg->m_buf_fsync_backend;
+	globalStats.bgWriterGlobalStats.buf_alloc += msg->m_buf_alloc;
+}
+
+/* ----------
+ * pgstat_recv_walwriter() -
+ *
+ *	Process a WALWRITER message.
+ * ----------
+ */
+static void
+pgstat_recv_walwriter(PgStat_MsgWalWriter *msg, int len)
+{
+	globalStats.walWriterGlobalStats.xlog_dirty_writes += msg->m_xlog_dirty_writes;
 }
 
 /* ----------
diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c
index 8359da6..dedec0d 100644
--- a/src/backend/postmaster/walwriter.c
+++ b/src/backend/postmaster/walwriter.c
@@ -290,6 +290,8 @@ WalWriterMain(void)
 		else if (left_till_hibernate > 0)
 			left_till_hibernate--;
 
+		pgstat_send_walwriter();
+
 		/*
 		 * Sleep until we are signaled or WalWriterDelay has elapsed.  If we
 		 * haven't done anything useful for quite some time, lengthen the
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 0533cd6..df9f1d8 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -98,6 +98,7 @@ extern Datum pg_stat_get_bgwriter_stat_reset_time(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_buf_written_backend(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_buf_fsync_backend(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_buf_alloc(PG_FUNCTION_ARGS);
+extern Datum pg_stat_get_wal_stat_reset_time(PG_FUNCTION_ARGS);
 
 extern Datum pg_stat_get_xact_numscans(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_xact_tuples_returned(PG_FUNCTION_ARGS);
@@ -119,6 +120,8 @@ extern Datum pg_stat_reset_shared(PG_FUNCTION_ARGS);
 extern Datum pg_stat_reset_single_table_counters(PG_FUNCTION_ARGS);
 extern Datum pg_stat_reset_single_function_counters(PG_FUNCTION_ARGS);
 
+extern Datum pg_stat_get_xlog_dirty_writes(PG_FUNCTION_ARGS);
+
 /* Global bgwriter statistics, from bgwriter.c */
 extern PgStat_MsgBgWriter bgwriterStats;
 
@@ -1409,69 +1412,75 @@ pg_stat_get_db_blk_write_time(PG_FUNCTION_ARGS)
 Datum
 pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_INT64(pgstat_fetch_global()->timed_checkpoints);
+	PG_RETURN_INT64(pgstat_fetch_global()->bgWriterGlobalStats.timed_checkpoints);
 }
 
 Datum
 pg_stat_get_bgwriter_requested_checkpoints(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_INT64(pgstat_fetch_global()->requested_checkpoints);
+	PG_RETURN_INT64(pgstat_fetch_global()->bgWriterGlobalStats.requested_checkpoints);
 }
 
 Datum
 pg_stat_get_bgwriter_buf_written_checkpoints(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_INT64(pgstat_fetch_global()->buf_written_checkpoints);
+	PG_RETURN_INT64(pgstat_fetch_global()->bgWriterGlobalStats.buf_written_checkpoints);
 }
 
 Datum
 pg_stat_get_bgwriter_buf_written_clean(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_INT64(pgstat_fetch_global()->buf_written_clean);
+	PG_RETURN_INT64(pgstat_fetch_global()->bgWriterGlobalStats.buf_written_clean);
 }
 
 Datum
 pg_stat_get_bgwriter_maxwritten_clean(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_INT64(pgstat_fetch_global()->maxwritten_clean);
+	PG_RETURN_INT64(pgstat_fetch_global()->bgWriterGlobalStats.maxwritten_clean);
 }
 
 Datum
 pg_stat_get_checkpoint_write_time(PG_FUNCTION_ARGS)
 {
 	/* time is already in msec, just convert to double for presentation */
-	PG_RETURN_FLOAT8((double) pgstat_fetch_global()->checkpoint_write_time);
+	PG_RETURN_FLOAT8((double) pgstat_fetch_global()->bgWriterGlobalStats.checkpoint_write_time);
 }
 
 Datum
 pg_stat_get_checkpoint_sync_time(PG_FUNCTION_ARGS)
 {
 	/* time is already in msec, just convert to double for presentation */
-	PG_RETURN_FLOAT8((double) pgstat_fetch_global()->checkpoint_sync_time);
+	PG_RETURN_FLOAT8((double) pgstat_fetch_global()->bgWriterGlobalStats.checkpoint_sync_time);
 }
 
 Datum
 pg_stat_get_bgwriter_stat_reset_time(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_TIMESTAMPTZ(pgstat_fetch_global()->stat_reset_timestamp);
+	PG_RETURN_TIMESTAMPTZ(pgstat_fetch_global()->bgWriterGlobalStats.stat_reset_timestamp);
 }
 
 Datum
 pg_stat_get_buf_written_backend(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_INT64(pgstat_fetch_global()->buf_written_backend);
+	PG_RETURN_INT64(pgstat_fetch_global()->bgWriterGlobalStats.buf_written_backend);
 }
 
 Datum
 pg_stat_get_buf_fsync_backend(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_INT64(pgstat_fetch_global()->buf_fsync_backend);
+	PG_RETURN_INT64(pgstat_fetch_global()->bgWriterGlobalStats.buf_fsync_backend);
 }
 
 Datum
 pg_stat_get_buf_alloc(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_INT64(pgstat_fetch_global()->buf_alloc);
+	PG_RETURN_INT64(pgstat_fetch_global()->bgWriterGlobalStats.buf_alloc);
+}
+
+Datum
+pg_stat_get_wal_stat_reset_time(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_TIMESTAMPTZ(pgstat_fetch_global()->walWriterGlobalStats.stat_reset_timestamp);
 }
 
 Datum
@@ -1711,3 +1720,9 @@ pg_stat_reset_single_function_counters(PG_FUNCTION_ARGS)
 
 	PG_RETURN_VOID();
 }
+
+Datum
+pg_stat_get_xlog_dirty_writes(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_INT64(pgstat_fetch_global()->walWriterGlobalStats.xlog_dirty_writes);
+}
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index f03dd0b..b1f9c54 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -2709,6 +2709,8 @@ DATA(insert OID = 3063 ( pg_stat_get_buf_fsync_backend PGNSP PGUID 12 1 0 0 0 f
 DESCR("statistics: number of backend buffer writes that did their own fsync");
 DATA(insert OID = 2859 ( pg_stat_get_buf_alloc			PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 20 "" _null_ _null_ _null_ _null_ pg_stat_get_buf_alloc _null_ _null_ _null_ ));
 DESCR("statistics: number of buffer allocations");
+DATA(insert OID = 3178 ( pg_stat_get_wal_stat_reset_time PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 1184 "" _null_ _null_ _null_ _null_	pg_stat_get_wal_stat_reset_time _null_ _null_ _null_ ));
+DESCR("statistics: last reset for the wal");
 
 DATA(insert OID = 2978 (  pg_stat_get_function_calls		PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_function_calls _null_ _null_ _null_ ));
 DESCR("statistics: number of function calls");
@@ -2753,6 +2755,9 @@ DESCR("statistics: reset collected statistics for a single table or index in the
 DATA(insert OID = 3777 (  pg_stat_reset_single_function_counters	PGNSP PGUID 12 1 0 0 0 f f f f f f v 1 0 2278 "26" _null_ _null_ _null_ _null_	pg_stat_reset_single_function_counters _null_ _null_ _null_ ));
 DESCR("statistics: reset collected statistics for a single function in the current database");
 
+DATA(insert OID = 3179 (  pg_stat_get_xlog_dirty_writes  PGNSP PGUID 12 1 0 0 0 f f f f f f v 0 0 20 "" _null_ _null_ _null_ _null_ pg_stat_get_xlog_dirty_writes _null_ _null_ _null_ ));
+DESCR("statistics: get xlog dirty buffer write statistics");
+
 DATA(insert OID = 3163 (  pg_trigger_depth				PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 23 "" _null_ _null_ _null_ _null_ pg_trigger_depth _null_ _null_ _null_ ));
 DESCR("current trigger depth");
 
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index fb242e4..1213964 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -45,6 +45,7 @@ typedef enum StatMsgType
 	PGSTAT_MTYPE_VACUUM,
 	PGSTAT_MTYPE_ANALYZE,
 	PGSTAT_MTYPE_BGWRITER,
+	PGSTAT_MTYPE_WALWRITER,
 	PGSTAT_MTYPE_FUNCSTAT,
 	PGSTAT_MTYPE_FUNCPURGE,
 	PGSTAT_MTYPE_RECOVERYCONFLICT,
@@ -102,7 +103,8 @@ typedef struct PgStat_TableCounts
 /* Possible targets for resetting cluster-wide shared values */
 typedef enum PgStat_Shared_Reset_Target
 {
-	RESET_BGWRITER
+	RESET_BGWRITER,
+	RESET_WALWRITER
 } PgStat_Shared_Reset_Target;
 
 /* Possible object types for resetting single counters */
@@ -373,6 +375,17 @@ typedef struct PgStat_MsgBgWriter
 } PgStat_MsgBgWriter;
 
 /* ----------
+ * PgStat_MsgWalWriter			Sent by the walwriter to update statistics.
+ * ----------
+ */
+typedef struct PgStat_MsgWalWriter
+{
+	PgStat_MsgHdr m_hdr;
+
+	PgStat_Counter m_xlog_dirty_writes;
+} PgStat_MsgWalWriter;
+
+/* ----------
  * PgStat_MsgRecoveryConflict	Sent by the backend upon recovery conflict
  * ----------
  */
@@ -500,6 +513,7 @@ typedef union PgStat_Msg
 	PgStat_MsgVacuum msg_vacuum;
 	PgStat_MsgAnalyze msg_analyze;
 	PgStat_MsgBgWriter msg_bgwriter;
+	PgStat_MsgWalWriter msg_walwriter;
 	PgStat_MsgFuncstat msg_funcstat;
 	PgStat_MsgFuncpurge msg_funcpurge;
 	PgStat_MsgRecoveryConflict msg_recoveryconflict;
@@ -608,12 +622,8 @@ typedef struct PgStat_StatFuncEntry
 } PgStat_StatFuncEntry;
 
 
-/*
- * Global statistics kept in the stats collector
- */
-typedef struct PgStat_GlobalStats
+typedef struct PgStat_BgWriterGlobalStats
 {
-	TimestampTz stats_timestamp;	/* time of stats file update */
 	PgStat_Counter timed_checkpoints;
 	PgStat_Counter requested_checkpoints;
 	PgStat_Counter checkpoint_write_time;		/* times in milliseconds */
@@ -625,6 +635,22 @@ typedef struct PgStat_GlobalStats
 	PgStat_Counter buf_fsync_backend;
 	PgStat_Counter buf_alloc;
 	TimestampTz stat_reset_timestamp;
+} PgStat_BgWriterGlobalStats;
+
+typedef struct PgStat_WalWriterGlobalStats
+{
+	PgStat_Counter xlog_dirty_writes;
+	TimestampTz stat_reset_timestamp;
+} PgStat_WalWriterGlobalStats;
+
+/*
+ * Global statistics kept in the stats collector
+ */
+typedef struct PgStat_GlobalStats
+{
+	TimestampTz stats_timestamp;    /* time of stats file update */
+	PgStat_BgWriterGlobalStats bgWriterGlobalStats;
+	PgStat_WalWriterGlobalStats walWriterGlobalStats;
 } PgStat_GlobalStats;
 
 
@@ -733,6 +759,8 @@ extern char *pgstat_stat_filename;
  */
 extern PgStat_MsgBgWriter BgWriterStats;
 
+extern PgStat_MsgWalWriter WalWriterStats;
+
 /*
  * Updated by pgstat_count_buffer_*_time macros
  */
@@ -861,6 +889,7 @@ extern void pgstat_twophase_postabort(TransactionId xid, uint16 info,
 						  void *recdata, uint32 len);
 
 extern void pgstat_send_bgwriter(void);
+extern void pgstat_send_walwriter(void);
 
 /* ----------
  * Support functions for the SQL-callable functions to
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 8f24c51..4074c61 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1775,6 +1775,8 @@ SELECT viewname, definition FROM pg_views WHERE schemaname <> 'information_schem
                                  |     pg_stat_all_tables.autoanalyze_count                                                                                                                                                                       +
                                  |    FROM pg_stat_all_tables                                                                                                                                                                                     +
                                  |   WHERE ((pg_stat_all_tables.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_stat_all_tables.schemaname !~ '^pg_toast'::text));
+ pg_stat_walwriter               |  SELECT pg_stat_get_xlog_dirty_writes() AS dirty_writes,                                                                                                                                                       +
+                                 |     pg_stat_get_wal_stat_reset_time() AS stats_reset;
  pg_stat_xact_all_tables         |  SELECT c.oid AS relid,                                                                                                                                                                                        +
                                  |     n.nspname AS schemaname,                                                                                                                                                                                   +
                                  |     c.relname,                                                                                                                                                                                                 +
@@ -2142,7 +2144,7 @@ SELECT viewname, definition FROM pg_views WHERE schemaname <> 'information_schem
                                  |    FROM tv;
  tvvmv                           |  SELECT tvvm.grandtot                                                                                                                                                                                          +
                                  |    FROM tvvm;
-(64 rows)
+(65 rows)
 
 SELECT tablename, rulename, definition FROM pg_rules
 	ORDER BY tablename, rulename;
#33Peter Eisentraut
peter_e@gmx.net
In reply to: Satoshi Nagayasu (#32)
Re: New statistics for WAL buffer dirty writes

On 9/10/13 3:37 AM, Satoshi Nagayasu wrote:

Thanks for checking. Revised one attached.

Please fix compiler warning:

walwriter.c: In function �スeWalWriterMain�スf:
walwriter.c:293:3: warning: implicit declaration of function
�スepgstat_send_walwriter�スf [-Wimplicit-function-declaration]

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#34Satoshi Nagayasu
snaga@uptime.jp
In reply to: Peter Eisentraut (#33)
1 attachment(s)
Re: New statistics for WAL buffer dirty writes

(2013/09/10 22:48), Peter Eisentraut wrote:

On 9/10/13 3:37 AM, Satoshi Nagayasu wrote:

Thanks for checking. Revised one attached.

Please fix compiler warning:

walwriter.c: In function �スeWalWriterMain�スf:
walwriter.c:293:3: warning: implicit declaration of function
�スepgstat_send_walwriter�スf [-Wimplicit-function-declaration]

Thanks. Fixed.

--
Satoshi Nagayasu <snaga@uptime.jp>
Uptime Technologies, LLC. http://www.uptime.jp

Attachments:

xlogdirtywrite_v7.difftext/plain; charset=Shift_JIS; name=xlogdirtywrite_v7.diffDownload
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 23ebc11..cdced7f 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1878,6 +1878,13 @@ include 'filename'
         results in most cases.
        </para>
 
+       <para>
+        When you see pg_stat_walwriter.dirty_write, which means number
+        of buffer flushing at buffer full, is continuously increasing
+        in your running server, you may need to enlarge this buffer
+        size.
+       </para>
+
       </listitem>
      </varlistentry>
 
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 4ec6981..15d9202 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -278,6 +278,14 @@ postgres: <replaceable>user</> <replaceable>database</> <replaceable>host</> <re
      </row>
 
      <row>
+      <entry><structname>pg_stat_walwriter</><indexterm><primary>pg_stat_walwriter</primary></indexterm></entry>
+      <entry>One row only, showing statistics about the wal writer
+       process's activity. See <xref linkend="pg-stat-walwriter-view">
+       for details.
+     </entry>
+     </row>
+
+     <row>
       <entry><structname>pg_stat_database</><indexterm><primary>pg_stat_database</primary></indexterm></entry>
       <entry>One row per database, showing database-wide statistics. See
        <xref linkend="pg-stat-database-view"> for details.
@@ -735,6 +743,39 @@ postgres: <replaceable>user</> <replaceable>database</> <replaceable>host</> <re
    single row, containing global data for the cluster.
   </para>
 
+  <table id="pg-stat-walwriter-view" xreflabel="pg_stat_walwriter">
+   <title><structname>pg_stat_walwriter</structname> View</title>
+
+   <tgroup cols="3">
+    <thead>
+    <row>
+      <entry>Column</entry>
+      <entry>Type</entry>
+      <entry>Description</entry>
+     </row>
+    </thead>
+
+    <tbody>
+     <row>
+      <entry><structfield>dirty_writes</></entry>
+      <entry><type>bigint</type></entry>
+      <entry>Number of dirty writes, which means flushing wal buffers
+       because of its full.</entry>
+     </row>
+     <row>
+      <entry><structfield>stats_reset</></entry>
+      <entry><type>timestamp with time zone</type></entry>
+      <entry>Time at which these statistics were last reset</entry>
+     </row>
+    </tbody>
+    </tgroup>
+  </table>
+
+  <para>
+   The <structname>pg_stat_walwriter</structname> view will always have a
+   single row, containing global data for the cluster.
+  </para>
+
   <table id="pg-stat-database-view" xreflabel="pg_stat_database">
    <title><structname>pg_stat_database</structname> View</title>
    <tgroup cols="3">
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index dc47c47..d0e85c9 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2467,6 +2467,7 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, bool opportunistic)
 					WriteRqst.Write = OldPageRqstPtr;
 					WriteRqst.Flush = 0;
 					XLogWrite(WriteRqst, false);
+					WalWriterStats.m_xlog_dirty_writes++;
 					LWLockRelease(WALWriteLock);
 					TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
 				}
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 575a40f..12a2ed0 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -686,6 +686,11 @@ CREATE VIEW pg_stat_bgwriter AS
         pg_stat_get_buf_alloc() AS buffers_alloc,
         pg_stat_get_bgwriter_stat_reset_time() AS stats_reset;
 
+CREATE VIEW pg_stat_walwriter AS
+    SELECT
+        pg_stat_get_xlog_dirty_writes() AS dirty_writes,
+        pg_stat_get_wal_stat_reset_time() AS stats_reset;
+
 CREATE VIEW pg_user_mappings AS
     SELECT
         U.oid       AS umid,
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index b5ce2f6..8c56af5 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -129,6 +129,14 @@ char	   *pgstat_stat_tmpname = NULL;
  */
 PgStat_MsgBgWriter BgWriterStats;
 
+/*
+ * WalWriter statistics counter.
+ * This counter is incremented by each XLogWrite call,
+ * both in the wal writer process and each backend.
+ * And then, sent to the stat collector process.
+ */
+PgStat_MsgWalWriter WalWriterStats;
+
 /* ----------
  * Local data
  * ----------
@@ -293,6 +301,7 @@ static void pgstat_recv_autovac(PgStat_MsgAutovacStart *msg, int len);
 static void pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len);
 static void pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len);
 static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len);
+static void pgstat_recv_walwriter(PgStat_MsgWalWriter *msg, int len);
 static void pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len);
 static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
 static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len);
@@ -820,6 +829,9 @@ pgstat_report_stat(bool force)
 
 	/* Now, send function statistics */
 	pgstat_send_funcstats();
+
+	/* Now, send wal buffer flush statistics */
+	pgstat_send_walwriter();
 }
 
 /*
@@ -1249,11 +1261,13 @@ pgstat_reset_shared_counters(const char *target)
 
 	if (strcmp(target, "bgwriter") == 0)
 		msg.m_resettarget = RESET_BGWRITER;
+	else if (strcmp(target, "walwriter") == 0)
+		msg.m_resettarget = RESET_WALWRITER;
 	else
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("unrecognized reset target: \"%s\"", target),
-				 errhint("Target must be \"bgwriter\".")));
+				 errhint("Target must be \"bgwriter\" or \"walwriter\".")));
 
 	pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RESETSHAREDCOUNTER);
 	pgstat_send(&msg, sizeof(msg));
@@ -3055,6 +3069,38 @@ pgstat_send_bgwriter(void)
 	MemSet(&BgWriterStats, 0, sizeof(BgWriterStats));
 }
 
+/* ----------
+ * pgstat_send_walwriter() -
+ *
+ *		Send walwriter statistics to the collector
+ * ----------
+ */
+void
+pgstat_send_walwriter(void)
+{
+	/* We assume this initializes to zeroes */
+	static const PgStat_MsgBgWriter all_zeroes;
+
+	/*
+	 * This function can be called even if nothing at all has happened. In
+	 * this case, avoid sending a completely empty message to the stats
+	 * collector.
+	 */
+	if (memcmp(&WalWriterStats, &all_zeroes, sizeof(PgStat_MsgWalWriter)) == 0)
+		return;
+
+	/*
+	 * Prepare and send the message
+	 */
+	pgstat_setheader(&WalWriterStats.m_hdr, PGSTAT_MTYPE_WALWRITER);
+	pgstat_send(&WalWriterStats, sizeof(WalWriterStats));
+
+	/*
+	 * Clear out the statistics buffer, so it can be re-used.
+	 */
+	MemSet(&WalWriterStats, 0, sizeof(WalWriterStats));
+}
+
 
 /* ----------
  * PgstatCollectorMain() -
@@ -3270,6 +3316,10 @@ PgstatCollectorMain(int argc, char *argv[])
 					pgstat_recv_bgwriter((PgStat_MsgBgWriter *) &msg, len);
 					break;
 
+				case PGSTAT_MTYPE_WALWRITER:
+					pgstat_recv_walwriter((PgStat_MsgWalWriter *) &msg, len);
+					break;
+
 				case PGSTAT_MTYPE_FUNCSTAT:
 					pgstat_recv_funcstat((PgStat_MsgFuncstat *) &msg, len);
 					break;
@@ -3825,7 +3875,8 @@ pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep)
 	 * Set the current timestamp (will be kept only in case we can't load an
 	 * existing statsfile).
 	 */
-	globalStats.stat_reset_timestamp = GetCurrentTimestamp();
+	globalStats.bgWriterGlobalStats.stat_reset_timestamp = GetCurrentTimestamp();
+	globalStats.walWriterGlobalStats.stat_reset_timestamp = GetCurrentTimestamp();
 
 	/*
 	 * Try to open the stats file. If it doesn't exist, the backends simply
@@ -4723,8 +4774,23 @@ pgstat_recv_resetsharedcounter(PgStat_MsgResetsharedcounter *msg, int len)
 	if (msg->m_resettarget == RESET_BGWRITER)
 	{
 		/* Reset the global background writer statistics for the cluster. */
-		memset(&globalStats, 0, sizeof(globalStats));
-		globalStats.stat_reset_timestamp = GetCurrentTimestamp();
+		globalStats.bgWriterGlobalStats.timed_checkpoints       = 0;
+		globalStats.bgWriterGlobalStats.requested_checkpoints   = 0;
+		globalStats.bgWriterGlobalStats.checkpoint_write_time   = 0;
+		globalStats.bgWriterGlobalStats.checkpoint_sync_time    = 0;
+		globalStats.bgWriterGlobalStats.buf_written_checkpoints = 0;
+		globalStats.bgWriterGlobalStats.buf_written_clean       = 0;
+		globalStats.bgWriterGlobalStats.maxwritten_clean        = 0;
+		globalStats.bgWriterGlobalStats.buf_written_backend     = 0;
+		globalStats.bgWriterGlobalStats.buf_fsync_backend       = 0;
+		globalStats.bgWriterGlobalStats.buf_alloc               = 0;
+		globalStats.bgWriterGlobalStats.stat_reset_timestamp = GetCurrentTimestamp();
+	}
+	else if (msg->m_resettarget == RESET_WALWRITER)
+	{
+		/* Reset the global walwriter statistics for the cluster. */
+		globalStats.walWriterGlobalStats.xlog_dirty_writes = 0;
+		globalStats.walWriterGlobalStats.stat_reset_timestamp = GetCurrentTimestamp();
 	}
 
 	/*
@@ -4865,16 +4931,28 @@ pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len)
 static void
 pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len)
 {
-	globalStats.timed_checkpoints += msg->m_timed_checkpoints;
-	globalStats.requested_checkpoints += msg->m_requested_checkpoints;
-	globalStats.checkpoint_write_time += msg->m_checkpoint_write_time;
-	globalStats.checkpoint_sync_time += msg->m_checkpoint_sync_time;
-	globalStats.buf_written_checkpoints += msg->m_buf_written_checkpoints;
-	globalStats.buf_written_clean += msg->m_buf_written_clean;
-	globalStats.maxwritten_clean += msg->m_maxwritten_clean;
-	globalStats.buf_written_backend += msg->m_buf_written_backend;
-	globalStats.buf_fsync_backend += msg->m_buf_fsync_backend;
-	globalStats.buf_alloc += msg->m_buf_alloc;
+	globalStats.bgWriterGlobalStats.timed_checkpoints += msg->m_timed_checkpoints;
+	globalStats.bgWriterGlobalStats.requested_checkpoints += msg->m_requested_checkpoints;
+	globalStats.bgWriterGlobalStats.checkpoint_write_time += msg->m_checkpoint_write_time;
+	globalStats.bgWriterGlobalStats.checkpoint_sync_time += msg->m_checkpoint_sync_time;
+	globalStats.bgWriterGlobalStats.buf_written_checkpoints += msg->m_buf_written_checkpoints;
+	globalStats.bgWriterGlobalStats.buf_written_clean += msg->m_buf_written_clean;
+	globalStats.bgWriterGlobalStats.maxwritten_clean += msg->m_maxwritten_clean;
+	globalStats.bgWriterGlobalStats.buf_written_backend += msg->m_buf_written_backend;
+	globalStats.bgWriterGlobalStats.buf_fsync_backend += msg->m_buf_fsync_backend;
+	globalStats.bgWriterGlobalStats.buf_alloc += msg->m_buf_alloc;
+}
+
+/* ----------
+ * pgstat_recv_walwriter() -
+ *
+ *	Process a WALWRITER message.
+ * ----------
+ */
+static void
+pgstat_recv_walwriter(PgStat_MsgWalWriter *msg, int len)
+{
+	globalStats.walWriterGlobalStats.xlog_dirty_writes += msg->m_xlog_dirty_writes;
 }
 
 /* ----------
diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c
index 8359da6..a6cdee2 100644
--- a/src/backend/postmaster/walwriter.c
+++ b/src/backend/postmaster/walwriter.c
@@ -49,6 +49,7 @@
 #include "access/xlog.h"
 #include "libpq/pqsignal.h"
 #include "miscadmin.h"
+#include "pgstat.h"
 #include "postmaster/walwriter.h"
 #include "storage/bufmgr.h"
 #include "storage/fd.h"
@@ -290,6 +291,8 @@ WalWriterMain(void)
 		else if (left_till_hibernate > 0)
 			left_till_hibernate--;
 
+		pgstat_send_walwriter();
+
 		/*
 		 * Sleep until we are signaled or WalWriterDelay has elapsed.  If we
 		 * haven't done anything useful for quite some time, lengthen the
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 0533cd6..df9f1d8 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -98,6 +98,7 @@ extern Datum pg_stat_get_bgwriter_stat_reset_time(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_buf_written_backend(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_buf_fsync_backend(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_buf_alloc(PG_FUNCTION_ARGS);
+extern Datum pg_stat_get_wal_stat_reset_time(PG_FUNCTION_ARGS);
 
 extern Datum pg_stat_get_xact_numscans(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_xact_tuples_returned(PG_FUNCTION_ARGS);
@@ -119,6 +120,8 @@ extern Datum pg_stat_reset_shared(PG_FUNCTION_ARGS);
 extern Datum pg_stat_reset_single_table_counters(PG_FUNCTION_ARGS);
 extern Datum pg_stat_reset_single_function_counters(PG_FUNCTION_ARGS);
 
+extern Datum pg_stat_get_xlog_dirty_writes(PG_FUNCTION_ARGS);
+
 /* Global bgwriter statistics, from bgwriter.c */
 extern PgStat_MsgBgWriter bgwriterStats;
 
@@ -1409,69 +1412,75 @@ pg_stat_get_db_blk_write_time(PG_FUNCTION_ARGS)
 Datum
 pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_INT64(pgstat_fetch_global()->timed_checkpoints);
+	PG_RETURN_INT64(pgstat_fetch_global()->bgWriterGlobalStats.timed_checkpoints);
 }
 
 Datum
 pg_stat_get_bgwriter_requested_checkpoints(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_INT64(pgstat_fetch_global()->requested_checkpoints);
+	PG_RETURN_INT64(pgstat_fetch_global()->bgWriterGlobalStats.requested_checkpoints);
 }
 
 Datum
 pg_stat_get_bgwriter_buf_written_checkpoints(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_INT64(pgstat_fetch_global()->buf_written_checkpoints);
+	PG_RETURN_INT64(pgstat_fetch_global()->bgWriterGlobalStats.buf_written_checkpoints);
 }
 
 Datum
 pg_stat_get_bgwriter_buf_written_clean(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_INT64(pgstat_fetch_global()->buf_written_clean);
+	PG_RETURN_INT64(pgstat_fetch_global()->bgWriterGlobalStats.buf_written_clean);
 }
 
 Datum
 pg_stat_get_bgwriter_maxwritten_clean(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_INT64(pgstat_fetch_global()->maxwritten_clean);
+	PG_RETURN_INT64(pgstat_fetch_global()->bgWriterGlobalStats.maxwritten_clean);
 }
 
 Datum
 pg_stat_get_checkpoint_write_time(PG_FUNCTION_ARGS)
 {
 	/* time is already in msec, just convert to double for presentation */
-	PG_RETURN_FLOAT8((double) pgstat_fetch_global()->checkpoint_write_time);
+	PG_RETURN_FLOAT8((double) pgstat_fetch_global()->bgWriterGlobalStats.checkpoint_write_time);
 }
 
 Datum
 pg_stat_get_checkpoint_sync_time(PG_FUNCTION_ARGS)
 {
 	/* time is already in msec, just convert to double for presentation */
-	PG_RETURN_FLOAT8((double) pgstat_fetch_global()->checkpoint_sync_time);
+	PG_RETURN_FLOAT8((double) pgstat_fetch_global()->bgWriterGlobalStats.checkpoint_sync_time);
 }
 
 Datum
 pg_stat_get_bgwriter_stat_reset_time(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_TIMESTAMPTZ(pgstat_fetch_global()->stat_reset_timestamp);
+	PG_RETURN_TIMESTAMPTZ(pgstat_fetch_global()->bgWriterGlobalStats.stat_reset_timestamp);
 }
 
 Datum
 pg_stat_get_buf_written_backend(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_INT64(pgstat_fetch_global()->buf_written_backend);
+	PG_RETURN_INT64(pgstat_fetch_global()->bgWriterGlobalStats.buf_written_backend);
 }
 
 Datum
 pg_stat_get_buf_fsync_backend(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_INT64(pgstat_fetch_global()->buf_fsync_backend);
+	PG_RETURN_INT64(pgstat_fetch_global()->bgWriterGlobalStats.buf_fsync_backend);
 }
 
 Datum
 pg_stat_get_buf_alloc(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_INT64(pgstat_fetch_global()->buf_alloc);
+	PG_RETURN_INT64(pgstat_fetch_global()->bgWriterGlobalStats.buf_alloc);
+}
+
+Datum
+pg_stat_get_wal_stat_reset_time(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_TIMESTAMPTZ(pgstat_fetch_global()->walWriterGlobalStats.stat_reset_timestamp);
 }
 
 Datum
@@ -1711,3 +1720,9 @@ pg_stat_reset_single_function_counters(PG_FUNCTION_ARGS)
 
 	PG_RETURN_VOID();
 }
+
+Datum
+pg_stat_get_xlog_dirty_writes(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_INT64(pgstat_fetch_global()->walWriterGlobalStats.xlog_dirty_writes);
+}
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index f03dd0b..b1f9c54 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -2709,6 +2709,8 @@ DATA(insert OID = 3063 ( pg_stat_get_buf_fsync_backend PGNSP PGUID 12 1 0 0 0 f
 DESCR("statistics: number of backend buffer writes that did their own fsync");
 DATA(insert OID = 2859 ( pg_stat_get_buf_alloc			PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 20 "" _null_ _null_ _null_ _null_ pg_stat_get_buf_alloc _null_ _null_ _null_ ));
 DESCR("statistics: number of buffer allocations");
+DATA(insert OID = 3178 ( pg_stat_get_wal_stat_reset_time PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 1184 "" _null_ _null_ _null_ _null_	pg_stat_get_wal_stat_reset_time _null_ _null_ _null_ ));
+DESCR("statistics: last reset for the wal");
 
 DATA(insert OID = 2978 (  pg_stat_get_function_calls		PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_function_calls _null_ _null_ _null_ ));
 DESCR("statistics: number of function calls");
@@ -2753,6 +2755,9 @@ DESCR("statistics: reset collected statistics for a single table or index in the
 DATA(insert OID = 3777 (  pg_stat_reset_single_function_counters	PGNSP PGUID 12 1 0 0 0 f f f f f f v 1 0 2278 "26" _null_ _null_ _null_ _null_	pg_stat_reset_single_function_counters _null_ _null_ _null_ ));
 DESCR("statistics: reset collected statistics for a single function in the current database");
 
+DATA(insert OID = 3179 (  pg_stat_get_xlog_dirty_writes  PGNSP PGUID 12 1 0 0 0 f f f f f f v 0 0 20 "" _null_ _null_ _null_ _null_ pg_stat_get_xlog_dirty_writes _null_ _null_ _null_ ));
+DESCR("statistics: get xlog dirty buffer write statistics");
+
 DATA(insert OID = 3163 (  pg_trigger_depth				PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 23 "" _null_ _null_ _null_ _null_ pg_trigger_depth _null_ _null_ _null_ ));
 DESCR("current trigger depth");
 
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index fb242e4..1213964 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -45,6 +45,7 @@ typedef enum StatMsgType
 	PGSTAT_MTYPE_VACUUM,
 	PGSTAT_MTYPE_ANALYZE,
 	PGSTAT_MTYPE_BGWRITER,
+	PGSTAT_MTYPE_WALWRITER,
 	PGSTAT_MTYPE_FUNCSTAT,
 	PGSTAT_MTYPE_FUNCPURGE,
 	PGSTAT_MTYPE_RECOVERYCONFLICT,
@@ -102,7 +103,8 @@ typedef struct PgStat_TableCounts
 /* Possible targets for resetting cluster-wide shared values */
 typedef enum PgStat_Shared_Reset_Target
 {
-	RESET_BGWRITER
+	RESET_BGWRITER,
+	RESET_WALWRITER
 } PgStat_Shared_Reset_Target;
 
 /* Possible object types for resetting single counters */
@@ -373,6 +375,17 @@ typedef struct PgStat_MsgBgWriter
 } PgStat_MsgBgWriter;
 
 /* ----------
+ * PgStat_MsgWalWriter			Sent by the walwriter to update statistics.
+ * ----------
+ */
+typedef struct PgStat_MsgWalWriter
+{
+	PgStat_MsgHdr m_hdr;
+
+	PgStat_Counter m_xlog_dirty_writes;
+} PgStat_MsgWalWriter;
+
+/* ----------
  * PgStat_MsgRecoveryConflict	Sent by the backend upon recovery conflict
  * ----------
  */
@@ -500,6 +513,7 @@ typedef union PgStat_Msg
 	PgStat_MsgVacuum msg_vacuum;
 	PgStat_MsgAnalyze msg_analyze;
 	PgStat_MsgBgWriter msg_bgwriter;
+	PgStat_MsgWalWriter msg_walwriter;
 	PgStat_MsgFuncstat msg_funcstat;
 	PgStat_MsgFuncpurge msg_funcpurge;
 	PgStat_MsgRecoveryConflict msg_recoveryconflict;
@@ -608,12 +622,8 @@ typedef struct PgStat_StatFuncEntry
 } PgStat_StatFuncEntry;
 
 
-/*
- * Global statistics kept in the stats collector
- */
-typedef struct PgStat_GlobalStats
+typedef struct PgStat_BgWriterGlobalStats
 {
-	TimestampTz stats_timestamp;	/* time of stats file update */
 	PgStat_Counter timed_checkpoints;
 	PgStat_Counter requested_checkpoints;
 	PgStat_Counter checkpoint_write_time;		/* times in milliseconds */
@@ -625,6 +635,22 @@ typedef struct PgStat_GlobalStats
 	PgStat_Counter buf_fsync_backend;
 	PgStat_Counter buf_alloc;
 	TimestampTz stat_reset_timestamp;
+} PgStat_BgWriterGlobalStats;
+
+typedef struct PgStat_WalWriterGlobalStats
+{
+	PgStat_Counter xlog_dirty_writes;
+	TimestampTz stat_reset_timestamp;
+} PgStat_WalWriterGlobalStats;
+
+/*
+ * Global statistics kept in the stats collector
+ */
+typedef struct PgStat_GlobalStats
+{
+	TimestampTz stats_timestamp;    /* time of stats file update */
+	PgStat_BgWriterGlobalStats bgWriterGlobalStats;
+	PgStat_WalWriterGlobalStats walWriterGlobalStats;
 } PgStat_GlobalStats;
 
 
@@ -733,6 +759,8 @@ extern char *pgstat_stat_filename;
  */
 extern PgStat_MsgBgWriter BgWriterStats;
 
+extern PgStat_MsgWalWriter WalWriterStats;
+
 /*
  * Updated by pgstat_count_buffer_*_time macros
  */
@@ -861,6 +889,7 @@ extern void pgstat_twophase_postabort(TransactionId xid, uint16 info,
 						  void *recdata, uint32 len);
 
 extern void pgstat_send_bgwriter(void);
+extern void pgstat_send_walwriter(void);
 
 /* ----------
  * Support functions for the SQL-callable functions to
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 8f24c51..4074c61 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1775,6 +1775,8 @@ SELECT viewname, definition FROM pg_views WHERE schemaname <> 'information_schem
                                  |     pg_stat_all_tables.autoanalyze_count                                                                                                                                                                       +
                                  |    FROM pg_stat_all_tables                                                                                                                                                                                     +
                                  |   WHERE ((pg_stat_all_tables.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_stat_all_tables.schemaname !~ '^pg_toast'::text));
+ pg_stat_walwriter               |  SELECT pg_stat_get_xlog_dirty_writes() AS dirty_writes,                                                                                                                                                       +
+                                 |     pg_stat_get_wal_stat_reset_time() AS stats_reset;
  pg_stat_xact_all_tables         |  SELECT c.oid AS relid,                                                                                                                                                                                        +
                                  |     n.nspname AS schemaname,                                                                                                                                                                                   +
                                  |     c.relname,                                                                                                                                                                                                 +
@@ -2142,7 +2144,7 @@ SELECT viewname, definition FROM pg_views WHERE schemaname <> 'information_schem
                                  |    FROM tv;
  tvvmv                           |  SELECT tvvm.grandtot                                                                                                                                                                                          +
                                  |    FROM tvvm;
-(64 rows)
+(65 rows)
 
 SELECT tablename, rulename, definition FROM pg_rules
 	ORDER BY tablename, rulename;
#35Fujii Masao
masao.fujii@gmail.com
In reply to: Satoshi Nagayasu (#34)
Re: New statistics for WAL buffer dirty writes

On Wed, Sep 11, 2013 at 12:43 PM, Satoshi Nagayasu <snaga@uptime.jp> wrote:

(2013/09/10 22:48), Peter Eisentraut wrote:

On 9/10/13 3:37 AM, Satoshi Nagayasu wrote:

Thanks for checking. Revised one attached.

Please fix compiler warning:

walwriter.c: In function ‘WalWriterMain’:
walwriter.c:293:3: warning: implicit declaration of function
‘pgstat_send_walwriter’ [-Wimplicit-function-declaration]

Thanks. Fixed.

The patch looks good to me. I have some comments:

The description of pg_stat_reset_shared() should mention
pg_stat_walwriter in the document.

We should implment something like pg_stat_reset_shared('all') so that
we can easily reset
all cluster-wide statistics counters to zero?

Some background workers may write WAL because WAL buffer is full. So you seem to
need to change those processes so that they also can increase the
xlog_dirty_writes
counter.

Regards,

--
Fujii Masao

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers