Refactor calculations to use instr_time

Started by Nazir Bilal Yavuzalmost 3 years ago11 messages
#1Nazir Bilal Yavuz
byavuz81@gmail.com
1 attachment(s)

Hi,

In 'instr_time.h' it is stated that:

* When summing multiple measurements, it's recommended to leave the
* running sum in instr_time form (ie, use INSTR_TIME_ADD or
* INSTR_TIME_ACCUM_DIFF) and convert to a result format only at the end.

So, I refactored 'PendingWalStats' to use 'instr_time' instead of
'PgStat_Counter' while accumulating 'wal_write_time' and
'wal_sync_time'. Also, I refactored some calculations to use
'INSTR_TIME_ACCUM_DIFF' instead of using 'INSTR_TIME_SUBTRACT' and
'INSTR_TIME_ADD'. What do you think?

Regards,
Nazir Bilal Yavuz
Microsoft

Attachments:

v1-0001-Refactor-instr_time-calculations.patchtext/x-diff; charset=UTF-8; name=v1-0001-Refactor-instr_time-calculations.patchDownload
From 5216c70da53ff32b8e9898595445ad6f4880b06d Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavuz81@gmail.com>
Date: Thu, 2 Feb 2023 15:06:48 +0300
Subject: [PATCH v1] Refactor instr_time calculations

---
 src/backend/access/transam/xlog.c       |  6 ++----
 src/backend/storage/file/buffile.c      |  6 ++----
 src/backend/utils/activity/pgstat_wal.c | 27 ++++++++++++-------------
 src/include/pgstat.h                    | 17 +++++++++++++++-
 4 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index f9f0f6db8d1..3c35dc1ca23 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2206,8 +2206,7 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 					instr_time	duration;
 
 					INSTR_TIME_SET_CURRENT(duration);
-					INSTR_TIME_SUBTRACT(duration, start);
-					PendingWalStats.wal_write_time += INSTR_TIME_GET_MICROSEC(duration);
+					INSTR_TIME_ACCUM_DIFF(PendingWalStats.wal_write_time, duration, start);
 				}
 
 				PendingWalStats.wal_write++;
@@ -8201,8 +8200,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
 		instr_time	duration;
 
 		INSTR_TIME_SET_CURRENT(duration);
-		INSTR_TIME_SUBTRACT(duration, start);
-		PendingWalStats.wal_sync_time += INSTR_TIME_GET_MICROSEC(duration);
+		INSTR_TIME_ACCUM_DIFF(PendingWalStats.wal_sync_time, duration, start);
 	}
 
 	PendingWalStats.wal_sync++;
diff --git a/src/backend/storage/file/buffile.c b/src/backend/storage/file/buffile.c
index 0a51624df3b..e55f86b675e 100644
--- a/src/backend/storage/file/buffile.c
+++ b/src/backend/storage/file/buffile.c
@@ -469,8 +469,7 @@ BufFileLoadBuffer(BufFile *file)
 	if (track_io_timing)
 	{
 		INSTR_TIME_SET_CURRENT(io_time);
-		INSTR_TIME_SUBTRACT(io_time, io_start);
-		INSTR_TIME_ADD(pgBufferUsage.temp_blk_read_time, io_time);
+		INSTR_TIME_ACCUM_DIFF(pgBufferUsage.temp_blk_read_time, io_time, io_start);
 	}
 
 	/* we choose not to advance curOffset here */
@@ -544,8 +543,7 @@ BufFileDumpBuffer(BufFile *file)
 		if (track_io_timing)
 		{
 			INSTR_TIME_SET_CURRENT(io_time);
-			INSTR_TIME_SUBTRACT(io_time, io_start);
-			INSTR_TIME_ADD(pgBufferUsage.temp_blk_write_time, io_time);
+			INSTR_TIME_ACCUM_DIFF(pgBufferUsage.temp_blk_write_time, io_time, io_start);
 		}
 
 		file->curOffset += bytestowrite;
diff --git a/src/backend/utils/activity/pgstat_wal.c b/src/backend/utils/activity/pgstat_wal.c
index e8598b2f4e0..56211fbc61a 100644
--- a/src/backend/utils/activity/pgstat_wal.c
+++ b/src/backend/utils/activity/pgstat_wal.c
@@ -21,7 +21,7 @@
 #include "executor/instrument.h"
 
 
-PgStat_WalStats PendingWalStats = {0};
+PgStat_PendingWalUsage PendingWalStats = {0};
 
 /*
  * WAL usage counters saved from pgWALUsage at the previous call to
@@ -89,26 +89,25 @@ pgstat_flush_wal(bool nowait)
 	 * previous counters from the current ones.
 	 */
 	WalUsageAccumDiff(&diff, &pgWalUsage, &prevWalUsage);
-	PendingWalStats.wal_records = diff.wal_records;
-	PendingWalStats.wal_fpi = diff.wal_fpi;
-	PendingWalStats.wal_bytes = diff.wal_bytes;
 
 	if (!nowait)
 		LWLockAcquire(&stats_shmem->lock, LW_EXCLUSIVE);
 	else if (!LWLockConditionalAcquire(&stats_shmem->lock, LW_EXCLUSIVE))
 		return true;
 
-#define WALSTAT_ACC(fld) stats_shmem->stats.fld += PendingWalStats.fld
-	WALSTAT_ACC(wal_records);
-	WALSTAT_ACC(wal_fpi);
-	WALSTAT_ACC(wal_bytes);
-	WALSTAT_ACC(wal_buffers_full);
-	WALSTAT_ACC(wal_write);
-	WALSTAT_ACC(wal_sync);
-	WALSTAT_ACC(wal_write_time);
-	WALSTAT_ACC(wal_sync_time);
+#define WALSTAT_ACC(fld, var_to_add) \
+ 	(stats_shmem->stats.fld += var_to_add.fld)
+#define WALLSTAT_ACC_INSTR_TIME_TYPE(fld) \
+	(stats_shmem->stats.fld += INSTR_TIME_GET_MICROSEC(PendingWalStats.fld))
+	WALSTAT_ACC(wal_records, diff);
+	WALSTAT_ACC(wal_fpi, diff);
+	WALSTAT_ACC(wal_bytes, diff);
+	WALSTAT_ACC(wal_buffers_full, PendingWalStats);
+	WALSTAT_ACC(wal_write, PendingWalStats);
+	WALSTAT_ACC(wal_sync, PendingWalStats);
+	WALLSTAT_ACC_INSTR_TIME_TYPE(wal_write_time);
+	WALLSTAT_ACC_INSTR_TIME_TYPE(wal_sync_time);
 #undef WALSTAT_ACC
-
 	LWLockRelease(&stats_shmem->lock);
 
 	/*
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index db9675884f3..295c5eabf38 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -445,6 +445,21 @@ typedef struct PgStat_WalStats
 	TimestampTz stat_reset_timestamp;
 } PgStat_WalStats;
 
+/* Created for accumulating wal_write_time and wal_sync_time as a instr_time
+ * but instr_time can't be used as a type where it ends up on-disk
+ * because its units may change. PgStat_WalStats type is used for
+ * in-memory/on-disk data. So, PgStat_PendingWalUsage is created for
+ * accumulating intervals as a instr_time.
+ */
+typedef struct PgStat_PendingWalUsage
+{
+	PgStat_Counter wal_buffers_full;
+	PgStat_Counter wal_write;
+	PgStat_Counter wal_sync;
+	instr_time wal_write_time;
+	instr_time wal_sync_time;
+} PgStat_PendingWalUsage;
+
 
 /*
  * Functions in pgstat.c
@@ -758,7 +773,7 @@ extern PGDLLIMPORT SessionEndType pgStatSessionEndCause;
  */
 
 /* updated directly by backends and background processes */
-extern PGDLLIMPORT PgStat_WalStats PendingWalStats;
+extern PGDLLIMPORT PgStat_PendingWalUsage PendingWalStats;
 
 
 #endif							/* PGSTAT_H */
-- 
2.39.1

#2Andres Freund
andres@anarazel.de
In reply to: Nazir Bilal Yavuz (#1)
Re: Refactor calculations to use instr_time

Hi,

On 2023-02-16 16:19:02 +0300, Nazir Bilal Yavuz wrote:

What do you think?

Here's a small review:

+#define WALSTAT_ACC(fld, var_to_add) \
+ 	(stats_shmem->stats.fld += var_to_add.fld)
+#define WALLSTAT_ACC_INSTR_TIME_TYPE(fld) \
+	(stats_shmem->stats.fld += INSTR_TIME_GET_MICROSEC(PendingWalStats.fld))
+	WALSTAT_ACC(wal_records, diff);
+	WALSTAT_ACC(wal_fpi, diff);
+	WALSTAT_ACC(wal_bytes, diff);
+	WALSTAT_ACC(wal_buffers_full, PendingWalStats);
+	WALSTAT_ACC(wal_write, PendingWalStats);
+	WALSTAT_ACC(wal_sync, PendingWalStats);
+	WALLSTAT_ACC_INSTR_TIME_TYPE(wal_write_time);
+	WALLSTAT_ACC_INSTR_TIME_TYPE(wal_sync_time);
#undef WALSTAT_ACC
-
LWLockRelease(&stats_shmem->lock);

WALSTAT_ACC is undefined, but WALLSTAT_ACC_INSTR_TIME_TYPE isn't.

I'd not remove the newline before LWLockRelease().

/*
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index db9675884f3..295c5eabf38 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -445,6 +445,21 @@ typedef struct PgStat_WalStats
TimestampTz stat_reset_timestamp;
} PgStat_WalStats;

+/* Created for accumulating wal_write_time and wal_sync_time as a
instr_time

Minor code-formatting point: In postgres we don't put code in the same line as
a multi-line comment starting with the /*. So either

/* single line comment */
or
/*
* multi line
* comment
*/

+ * but instr_time can't be used as a type where it ends up on-disk
+ * because its units may change. PgStat_WalStats type is used for
+ * in-memory/on-disk data. So, PgStat_PendingWalUsage is created for
+ * accumulating intervals as a instr_time.
+ */
+typedef struct PgStat_PendingWalUsage
+{
+	PgStat_Counter wal_buffers_full;
+	PgStat_Counter wal_write;
+	PgStat_Counter wal_sync;
+	instr_time wal_write_time;
+	instr_time wal_sync_time;
+} PgStat_PendingWalUsage;
+

I wonder if we should try to put pgWalUsage in here. But that's probably
better done as a separate patch.

Greetings,

Andres Freund

#3Nazir Bilal Yavuz
byavuz81@gmail.com
In reply to: Andres Freund (#2)
1 attachment(s)
Re: Refactor calculations to use instr_time

Hi,

On 2/16/23 19:13, Andres Freund wrote:

+#define WALSTAT_ACC(fld, var_to_add) \
+ 	(stats_shmem->stats.fld += var_to_add.fld)
+#define WALLSTAT_ACC_INSTR_TIME_TYPE(fld) \
+	(stats_shmem->stats.fld += INSTR_TIME_GET_MICROSEC(PendingWalStats.fld))
+	WALSTAT_ACC(wal_records, diff);
+	WALSTAT_ACC(wal_fpi, diff);
+	WALSTAT_ACC(wal_bytes, diff);
+	WALSTAT_ACC(wal_buffers_full, PendingWalStats);
+	WALSTAT_ACC(wal_write, PendingWalStats);
+	WALSTAT_ACC(wal_sync, PendingWalStats);
+	WALLSTAT_ACC_INSTR_TIME_TYPE(wal_write_time);
+	WALLSTAT_ACC_INSTR_TIME_TYPE(wal_sync_time);
#undef WALSTAT_ACC
-
LWLockRelease(&stats_shmem->lock);

WALSTAT_ACC is undefined, but WALLSTAT_ACC_INSTR_TIME_TYPE isn't.

I'd not remove the newline before LWLockRelease().

/*
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index db9675884f3..295c5eabf38 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -445,6 +445,21 @@ typedef struct PgStat_WalStats
TimestampTz stat_reset_timestamp;
} PgStat_WalStats;

+/* Created for accumulating wal_write_time and wal_sync_time as a
instr_time

Minor code-formatting point: In postgres we don't put code in the same line as
a multi-line comment starting with the /*. So either

/* single line comment */
or
/*
* multi line
* comment
*/

Thanks for the review. I updated the patch.

Regards,
Nazir Bilal Yavuz
Microsoft

Attachments:

v2-0001-Refactor-instr_time-calculations.patchtext/x-diff; charset=UTF-8; name=v2-0001-Refactor-instr_time-calculations.patchDownload
From e3723aca8b79b07190834b0cfd1440dbbf706862 Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavuz81@gmail.com>
Date: Thu, 2 Feb 2023 15:06:48 +0300
Subject: [PATCH v2] Refactor instr_time calculations

---
 src/backend/access/transam/xlog.c       |  6 ++----
 src/backend/storage/file/buffile.c      |  6 ++----
 src/backend/utils/activity/pgstat_wal.c | 27 +++++++++++++------------
 src/include/pgstat.h                    | 18 ++++++++++++++++-
 4 files changed, 35 insertions(+), 22 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index f9f0f6db8d1..3c35dc1ca23 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2206,8 +2206,7 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 					instr_time	duration;
 
 					INSTR_TIME_SET_CURRENT(duration);
-					INSTR_TIME_SUBTRACT(duration, start);
-					PendingWalStats.wal_write_time += INSTR_TIME_GET_MICROSEC(duration);
+					INSTR_TIME_ACCUM_DIFF(PendingWalStats.wal_write_time, duration, start);
 				}
 
 				PendingWalStats.wal_write++;
@@ -8201,8 +8200,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
 		instr_time	duration;
 
 		INSTR_TIME_SET_CURRENT(duration);
-		INSTR_TIME_SUBTRACT(duration, start);
-		PendingWalStats.wal_sync_time += INSTR_TIME_GET_MICROSEC(duration);
+		INSTR_TIME_ACCUM_DIFF(PendingWalStats.wal_sync_time, duration, start);
 	}
 
 	PendingWalStats.wal_sync++;
diff --git a/src/backend/storage/file/buffile.c b/src/backend/storage/file/buffile.c
index 0a51624df3b..e55f86b675e 100644
--- a/src/backend/storage/file/buffile.c
+++ b/src/backend/storage/file/buffile.c
@@ -469,8 +469,7 @@ BufFileLoadBuffer(BufFile *file)
 	if (track_io_timing)
 	{
 		INSTR_TIME_SET_CURRENT(io_time);
-		INSTR_TIME_SUBTRACT(io_time, io_start);
-		INSTR_TIME_ADD(pgBufferUsage.temp_blk_read_time, io_time);
+		INSTR_TIME_ACCUM_DIFF(pgBufferUsage.temp_blk_read_time, io_time, io_start);
 	}
 
 	/* we choose not to advance curOffset here */
@@ -544,8 +543,7 @@ BufFileDumpBuffer(BufFile *file)
 		if (track_io_timing)
 		{
 			INSTR_TIME_SET_CURRENT(io_time);
-			INSTR_TIME_SUBTRACT(io_time, io_start);
-			INSTR_TIME_ADD(pgBufferUsage.temp_blk_write_time, io_time);
+			INSTR_TIME_ACCUM_DIFF(pgBufferUsage.temp_blk_write_time, io_time, io_start);
 		}
 
 		file->curOffset += bytestowrite;
diff --git a/src/backend/utils/activity/pgstat_wal.c b/src/backend/utils/activity/pgstat_wal.c
index e8598b2f4e0..ab2869a0e24 100644
--- a/src/backend/utils/activity/pgstat_wal.c
+++ b/src/backend/utils/activity/pgstat_wal.c
@@ -21,7 +21,7 @@
 #include "executor/instrument.h"
 
 
-PgStat_WalStats PendingWalStats = {0};
+PgStat_PendingWalUsage PendingWalStats = {0};
 
 /*
  * WAL usage counters saved from pgWALUsage at the previous call to
@@ -89,24 +89,25 @@ pgstat_flush_wal(bool nowait)
 	 * previous counters from the current ones.
 	 */
 	WalUsageAccumDiff(&diff, &pgWalUsage, &prevWalUsage);
-	PendingWalStats.wal_records = diff.wal_records;
-	PendingWalStats.wal_fpi = diff.wal_fpi;
-	PendingWalStats.wal_bytes = diff.wal_bytes;
 
 	if (!nowait)
 		LWLockAcquire(&stats_shmem->lock, LW_EXCLUSIVE);
 	else if (!LWLockConditionalAcquire(&stats_shmem->lock, LW_EXCLUSIVE))
 		return true;
 
-#define WALSTAT_ACC(fld) stats_shmem->stats.fld += PendingWalStats.fld
-	WALSTAT_ACC(wal_records);
-	WALSTAT_ACC(wal_fpi);
-	WALSTAT_ACC(wal_bytes);
-	WALSTAT_ACC(wal_buffers_full);
-	WALSTAT_ACC(wal_write);
-	WALSTAT_ACC(wal_sync);
-	WALSTAT_ACC(wal_write_time);
-	WALSTAT_ACC(wal_sync_time);
+#define WALSTAT_ACC(fld, var_to_add) \
+ 	(stats_shmem->stats.fld += var_to_add.fld)
+#define WALLSTAT_ACC_INSTR_TIME_TYPE(fld) \
+	(stats_shmem->stats.fld += INSTR_TIME_GET_MICROSEC(PendingWalStats.fld))
+	WALSTAT_ACC(wal_records, diff);
+	WALSTAT_ACC(wal_fpi, diff);
+	WALSTAT_ACC(wal_bytes, diff);
+	WALSTAT_ACC(wal_buffers_full, PendingWalStats);
+	WALSTAT_ACC(wal_write, PendingWalStats);
+	WALSTAT_ACC(wal_sync, PendingWalStats);
+	WALLSTAT_ACC_INSTR_TIME_TYPE(wal_write_time);
+	WALLSTAT_ACC_INSTR_TIME_TYPE(wal_sync_time);
+#undef WALLSTAT_ACC_INSTR_TIME_TYPE
 #undef WALSTAT_ACC
 
 	LWLockRelease(&stats_shmem->lock);
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index db9675884f3..cc103df7d2e 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -445,6 +445,22 @@ typedef struct PgStat_WalStats
 	TimestampTz stat_reset_timestamp;
 } PgStat_WalStats;
 
+/*
+ * Created for accumulating wal_write_time and wal_sync_time as a instr_time
+ * but instr_time can't be used as a type where it ends up on-disk
+ * because its units may change. PgStat_WalStats type is used for
+ * in-memory/on-disk data. So, PgStat_PendingWalUsage is created for
+ * accumulating intervals as a instr_time.
+ */
+typedef struct PgStat_PendingWalUsage
+{
+	PgStat_Counter wal_buffers_full;
+	PgStat_Counter wal_write;
+	PgStat_Counter wal_sync;
+	instr_time wal_write_time;
+	instr_time wal_sync_time;
+} PgStat_PendingWalUsage;
+
 
 /*
  * Functions in pgstat.c
@@ -758,7 +774,7 @@ extern PGDLLIMPORT SessionEndType pgStatSessionEndCause;
  */
 
 /* updated directly by backends and background processes */
-extern PGDLLIMPORT PgStat_WalStats PendingWalStats;
+extern PGDLLIMPORT PgStat_PendingWalUsage PendingWalStats;
 
 
 #endif							/* PGSTAT_H */
-- 
2.39.1

#4Kyotaro Horiguchi
horikyota.ntt@gmail.com
In reply to: Nazir Bilal Yavuz (#3)
Re: Refactor calculations to use instr_time

At Fri, 17 Feb 2023 13:53:36 +0300, Nazir Bilal Yavuz <byavuz81@gmail.com> wrote in

Thanks for the review. I updated the patch.

 	WalUsageAccumDiff(&diff, &pgWalUsage, &prevWalUsage);
-	PendingWalStats.wal_records = diff.wal_records;
-	PendingWalStats.wal_fpi = diff.wal_fpi;
-	PendingWalStats.wal_bytes = diff.wal_bytes;
...
+	WALSTAT_ACC(wal_records, diff);
+	WALSTAT_ACC(wal_fpi, diff);
+	WALSTAT_ACC(wal_bytes, diff);
+	WALSTAT_ACC(wal_buffers_full, PendingWalStats);

The lifetime of the variable "diff" seems to be longer now. Wouldn't
it be clearer if we renamed it to something more meaningful, like
wal_usage_diff, WalUsageDiff or PendingWalUsage? Along those same
lines, it occurs to me that the new struct should be named
PgStat_PendingWalStats, instead of ..Usage. That change makes the name
of the type and the variable consistent.

regards.

--
Kyotaro Horiguchi
NTT Open Source Software Center

#5Nazir Bilal Yavuz
byavuz81@gmail.com
In reply to: Kyotaro Horiguchi (#4)
1 attachment(s)
Re: Refactor calculations to use instr_time

Hi,

Thanks for the review.

On Mon, 20 Feb 2023 at 06:01, Kyotaro Horiguchi <horikyota.ntt@gmail.com> wrote:

At Fri, 17 Feb 2023 13:53:36 +0300, Nazir Bilal Yavuz <byavuz81@gmail.com> wrote in

Thanks for the review. I updated the patch.

WalUsageAccumDiff(&diff, &pgWalUsage, &prevWalUsage);
-       PendingWalStats.wal_records = diff.wal_records;
-       PendingWalStats.wal_fpi = diff.wal_fpi;
-       PendingWalStats.wal_bytes = diff.wal_bytes;
...
+       WALSTAT_ACC(wal_records, diff);
+       WALSTAT_ACC(wal_fpi, diff);
+       WALSTAT_ACC(wal_bytes, diff);
+       WALSTAT_ACC(wal_buffers_full, PendingWalStats);

The lifetime of the variable "diff" seems to be longer now. Wouldn't
it be clearer if we renamed it to something more meaningful, like
wal_usage_diff, WalUsageDiff or PendingWalUsage? Along those same
lines, it occurs to me that the new struct should be named
PgStat_PendingWalStats, instead of ..Usage. That change makes the name
of the type and the variable consistent.

I agree. The patch is updated.

Regards,
Nazir Bilal Yavuz
Microsoft

Attachments:

v3-0001-Refactor-instr_time-calculations.patchapplication/octet-stream; name=v3-0001-Refactor-instr_time-calculations.patchDownload
From f8a7e590b9bfb240bf807d32543192c9486d1d21 Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavuz81@gmail.com>
Date: Thu, 2 Feb 2023 15:06:48 +0300
Subject: [PATCH v3] Refactor instr_time calculations

---
 src/backend/access/transam/xlog.c       |  6 ++---
 src/backend/storage/file/buffile.c      |  6 ++---
 src/backend/utils/activity/pgstat_wal.c | 31 +++++++++++++------------
 src/include/pgstat.h                    | 18 +++++++++++++-
 4 files changed, 37 insertions(+), 24 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index f9f0f6db8d..3c35dc1ca2 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2206,8 +2206,7 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 					instr_time	duration;
 
 					INSTR_TIME_SET_CURRENT(duration);
-					INSTR_TIME_SUBTRACT(duration, start);
-					PendingWalStats.wal_write_time += INSTR_TIME_GET_MICROSEC(duration);
+					INSTR_TIME_ACCUM_DIFF(PendingWalStats.wal_write_time, duration, start);
 				}
 
 				PendingWalStats.wal_write++;
@@ -8201,8 +8200,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
 		instr_time	duration;
 
 		INSTR_TIME_SET_CURRENT(duration);
-		INSTR_TIME_SUBTRACT(duration, start);
-		PendingWalStats.wal_sync_time += INSTR_TIME_GET_MICROSEC(duration);
+		INSTR_TIME_ACCUM_DIFF(PendingWalStats.wal_sync_time, duration, start);
 	}
 
 	PendingWalStats.wal_sync++;
diff --git a/src/backend/storage/file/buffile.c b/src/backend/storage/file/buffile.c
index 0a51624df3..e55f86b675 100644
--- a/src/backend/storage/file/buffile.c
+++ b/src/backend/storage/file/buffile.c
@@ -469,8 +469,7 @@ BufFileLoadBuffer(BufFile *file)
 	if (track_io_timing)
 	{
 		INSTR_TIME_SET_CURRENT(io_time);
-		INSTR_TIME_SUBTRACT(io_time, io_start);
-		INSTR_TIME_ADD(pgBufferUsage.temp_blk_read_time, io_time);
+		INSTR_TIME_ACCUM_DIFF(pgBufferUsage.temp_blk_read_time, io_time, io_start);
 	}
 
 	/* we choose not to advance curOffset here */
@@ -544,8 +543,7 @@ BufFileDumpBuffer(BufFile *file)
 		if (track_io_timing)
 		{
 			INSTR_TIME_SET_CURRENT(io_time);
-			INSTR_TIME_SUBTRACT(io_time, io_start);
-			INSTR_TIME_ADD(pgBufferUsage.temp_blk_write_time, io_time);
+			INSTR_TIME_ACCUM_DIFF(pgBufferUsage.temp_blk_write_time, io_time, io_start);
 		}
 
 		file->curOffset += bytestowrite;
diff --git a/src/backend/utils/activity/pgstat_wal.c b/src/backend/utils/activity/pgstat_wal.c
index e8598b2f4e..0979d1c269 100644
--- a/src/backend/utils/activity/pgstat_wal.c
+++ b/src/backend/utils/activity/pgstat_wal.c
@@ -21,7 +21,7 @@
 #include "executor/instrument.h"
 
 
-PgStat_WalStats PendingWalStats = {0};
+PgStat_PendingWalStats PendingWalStats = {0};
 
 /*
  * WAL usage counters saved from pgWALUsage at the previous call to
@@ -70,7 +70,7 @@ bool
 pgstat_flush_wal(bool nowait)
 {
 	PgStatShared_Wal *stats_shmem = &pgStatLocal.shmem->wal;
-	WalUsage	diff = {0};
+	WalUsage	wal_usage_diff = {0};
 
 	Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
 	Assert(pgStatLocal.shmem != NULL &&
@@ -88,25 +88,26 @@ pgstat_flush_wal(bool nowait)
 	 * Calculate how much WAL usage counters were increased by subtracting the
 	 * previous counters from the current ones.
 	 */
-	WalUsageAccumDiff(&diff, &pgWalUsage, &prevWalUsage);
-	PendingWalStats.wal_records = diff.wal_records;
-	PendingWalStats.wal_fpi = diff.wal_fpi;
-	PendingWalStats.wal_bytes = diff.wal_bytes;
+	WalUsageAccumDiff(&wal_usage_diff, &pgWalUsage, &prevWalUsage);
 
 	if (!nowait)
 		LWLockAcquire(&stats_shmem->lock, LW_EXCLUSIVE);
 	else if (!LWLockConditionalAcquire(&stats_shmem->lock, LW_EXCLUSIVE))
 		return true;
 
-#define WALSTAT_ACC(fld) stats_shmem->stats.fld += PendingWalStats.fld
-	WALSTAT_ACC(wal_records);
-	WALSTAT_ACC(wal_fpi);
-	WALSTAT_ACC(wal_bytes);
-	WALSTAT_ACC(wal_buffers_full);
-	WALSTAT_ACC(wal_write);
-	WALSTAT_ACC(wal_sync);
-	WALSTAT_ACC(wal_write_time);
-	WALSTAT_ACC(wal_sync_time);
+#define WALSTAT_ACC(fld, var_to_add) \
+ 	(stats_shmem->stats.fld += var_to_add.fld)
+#define WALLSTAT_ACC_INSTR_TIME_TYPE(fld) \
+	(stats_shmem->stats.fld += INSTR_TIME_GET_MICROSEC(PendingWalStats.fld))
+	WALSTAT_ACC(wal_records, wal_usage_diff);
+	WALSTAT_ACC(wal_fpi, wal_usage_diff);
+	WALSTAT_ACC(wal_bytes, wal_usage_diff);
+	WALSTAT_ACC(wal_buffers_full, PendingWalStats);
+	WALSTAT_ACC(wal_write, PendingWalStats);
+	WALSTAT_ACC(wal_sync, PendingWalStats);
+	WALLSTAT_ACC_INSTR_TIME_TYPE(wal_write_time);
+	WALLSTAT_ACC_INSTR_TIME_TYPE(wal_sync_time);
+#undef WALLSTAT_ACC_INSTR_TIME_TYPE
 #undef WALSTAT_ACC
 
 	LWLockRelease(&stats_shmem->lock);
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index db9675884f..4be6fdeaf5 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -445,6 +445,22 @@ typedef struct PgStat_WalStats
 	TimestampTz stat_reset_timestamp;
 } PgStat_WalStats;
 
+/*
+ * Created for accumulating wal_write_time and wal_sync_time as a instr_time
+ * but instr_time can't be used as a type where it ends up on-disk
+ * because its units may change. PgStat_WalStats type is used for
+ * in-memory/on-disk data. So, PgStat_PendingWalStats is created for
+ * accumulating intervals as a instr_time.
+ */
+typedef struct PgStat_PendingWalStats
+{
+	PgStat_Counter wal_buffers_full;
+	PgStat_Counter wal_write;
+	PgStat_Counter wal_sync;
+	instr_time wal_write_time;
+	instr_time wal_sync_time;
+} PgStat_PendingWalStats;
+
 
 /*
  * Functions in pgstat.c
@@ -758,7 +774,7 @@ extern PGDLLIMPORT SessionEndType pgStatSessionEndCause;
  */
 
 /* updated directly by backends and background processes */
-extern PGDLLIMPORT PgStat_WalStats PendingWalStats;
+extern PGDLLIMPORT PgStat_PendingWalStats PendingWalStats;
 
 
 #endif							/* PGSTAT_H */
-- 
2.25.1

#6Kyotaro Horiguchi
horikyota.ntt@gmail.com
In reply to: Nazir Bilal Yavuz (#5)
Re: Refactor calculations to use instr_time

At Tue, 21 Feb 2023 16:11:19 +0300, Nazir Bilal Yavuz <byavuz81@gmail.com> wrote in

I agree. The patch is updated.

Thanks, that part looks good to me. I'd like to provide some
additional comments.

PgStat_PendingStats should be included in typedefs.list.

+ * Created for accumulating wal_write_time and wal_sync_time as a instr_time
+ * but instr_time can't be used as a type where it ends up on-disk
+ * because its units may change. PgStat_WalStats type is used for
+ * in-memory/on-disk data. So, PgStat_PendingWalStats is created for
+ * accumulating intervals as a instr_time.
+ */
+typedef struct PgStat_PendingWalStats

IMHO, this comment looks somewhat off. Maybe we could try something
like the following instead?

This struct stores wal-related durations as instr_time, which makes it
easier to accumulate them without requiring type conversions. Then,
during stats flush, they will be moved into shared stats with type
conversions.

The aim of this patch is to keep using instr_time for accumulation.
So it seems like we could do the same refactoring for
pgStatBlockReadTime, pgStatBlockWriteTime, pgStatActiveTime and
pgStatTransactionIdleTime. What do you think - should we include this
additional refactoring in the same patch or make a separate one for
it?

regards.

--
Kyotaro Horiguchi
NTT Open Source Software Center

#7Nazir Bilal Yavuz
byavuz81@gmail.com
In reply to: Kyotaro Horiguchi (#6)
1 attachment(s)
Re: Refactor calculations to use instr_time

Hi,

Thanks for the review.

On Wed, 22 Feb 2023 at 05:50, Kyotaro Horiguchi <horikyota.ntt@gmail.com> wrote:

PgStat_PendingStats should be included in typedefs.list.

Done.

+ * Created for accumulating wal_write_time and wal_sync_time as a instr_time
+ * but instr_time can't be used as a type where it ends up on-disk
+ * because its units may change. PgStat_WalStats type is used for
+ * in-memory/on-disk data. So, PgStat_PendingWalStats is created for
+ * accumulating intervals as a instr_time.
+ */
+typedef struct PgStat_PendingWalStats

IMHO, this comment looks somewhat off. Maybe we could try something
like the following instead?

This struct stores wal-related durations as instr_time, which makes it
easier to accumulate them without requiring type conversions. Then,
during stats flush, they will be moved into shared stats with type
conversions.

Done. And I think we should write why we didn't change
PgStat_WalStats's variable types and instead created a new struct.
Maybe we can explain it in the commit description?

The aim of this patch is to keep using instr_time for accumulation.
So it seems like we could do the same refactoring for
pgStatBlockReadTime, pgStatBlockWriteTime, pgStatActiveTime and
pgStatTransactionIdleTime. What do you think - should we include this
additional refactoring in the same patch or make a separate one for
it?

I tried a bit but it seems the required changes for additional
refactoring aren't small. So, I think we can create a separate patch
for these changes.

Regards,
Nazir Bilal Yavuz
Microsoft

Attachments:

v4-0001-Refactor-instr_time-calculations.patchapplication/octet-stream; name=v4-0001-Refactor-instr_time-calculations.patchDownload
From 9c5d7a96aed730e9d77496890742f8188d107343 Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavuz81@gmail.com>
Date: Thu, 2 Feb 2023 15:06:48 +0300
Subject: [PATCH v4] Refactor instr_time calculations

---
 src/backend/access/transam/xlog.c       |  6 ++---
 src/backend/storage/file/buffile.c      |  6 ++---
 src/backend/utils/activity/pgstat_wal.c | 31 +++++++++++++------------
 src/include/pgstat.h                    | 17 +++++++++++++-
 src/tools/pgindent/typedefs.list        |  1 +
 5 files changed, 37 insertions(+), 24 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index f9f0f6db8d..3c35dc1ca2 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2206,8 +2206,7 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 					instr_time	duration;
 
 					INSTR_TIME_SET_CURRENT(duration);
-					INSTR_TIME_SUBTRACT(duration, start);
-					PendingWalStats.wal_write_time += INSTR_TIME_GET_MICROSEC(duration);
+					INSTR_TIME_ACCUM_DIFF(PendingWalStats.wal_write_time, duration, start);
 				}
 
 				PendingWalStats.wal_write++;
@@ -8201,8 +8200,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
 		instr_time	duration;
 
 		INSTR_TIME_SET_CURRENT(duration);
-		INSTR_TIME_SUBTRACT(duration, start);
-		PendingWalStats.wal_sync_time += INSTR_TIME_GET_MICROSEC(duration);
+		INSTR_TIME_ACCUM_DIFF(PendingWalStats.wal_sync_time, duration, start);
 	}
 
 	PendingWalStats.wal_sync++;
diff --git a/src/backend/storage/file/buffile.c b/src/backend/storage/file/buffile.c
index 0a51624df3..e55f86b675 100644
--- a/src/backend/storage/file/buffile.c
+++ b/src/backend/storage/file/buffile.c
@@ -469,8 +469,7 @@ BufFileLoadBuffer(BufFile *file)
 	if (track_io_timing)
 	{
 		INSTR_TIME_SET_CURRENT(io_time);
-		INSTR_TIME_SUBTRACT(io_time, io_start);
-		INSTR_TIME_ADD(pgBufferUsage.temp_blk_read_time, io_time);
+		INSTR_TIME_ACCUM_DIFF(pgBufferUsage.temp_blk_read_time, io_time, io_start);
 	}
 
 	/* we choose not to advance curOffset here */
@@ -544,8 +543,7 @@ BufFileDumpBuffer(BufFile *file)
 		if (track_io_timing)
 		{
 			INSTR_TIME_SET_CURRENT(io_time);
-			INSTR_TIME_SUBTRACT(io_time, io_start);
-			INSTR_TIME_ADD(pgBufferUsage.temp_blk_write_time, io_time);
+			INSTR_TIME_ACCUM_DIFF(pgBufferUsage.temp_blk_write_time, io_time, io_start);
 		}
 
 		file->curOffset += bytestowrite;
diff --git a/src/backend/utils/activity/pgstat_wal.c b/src/backend/utils/activity/pgstat_wal.c
index e8598b2f4e..0979d1c269 100644
--- a/src/backend/utils/activity/pgstat_wal.c
+++ b/src/backend/utils/activity/pgstat_wal.c
@@ -21,7 +21,7 @@
 #include "executor/instrument.h"
 
 
-PgStat_WalStats PendingWalStats = {0};
+PgStat_PendingWalStats PendingWalStats = {0};
 
 /*
  * WAL usage counters saved from pgWALUsage at the previous call to
@@ -70,7 +70,7 @@ bool
 pgstat_flush_wal(bool nowait)
 {
 	PgStatShared_Wal *stats_shmem = &pgStatLocal.shmem->wal;
-	WalUsage	diff = {0};
+	WalUsage	wal_usage_diff = {0};
 
 	Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
 	Assert(pgStatLocal.shmem != NULL &&
@@ -88,25 +88,26 @@ pgstat_flush_wal(bool nowait)
 	 * Calculate how much WAL usage counters were increased by subtracting the
 	 * previous counters from the current ones.
 	 */
-	WalUsageAccumDiff(&diff, &pgWalUsage, &prevWalUsage);
-	PendingWalStats.wal_records = diff.wal_records;
-	PendingWalStats.wal_fpi = diff.wal_fpi;
-	PendingWalStats.wal_bytes = diff.wal_bytes;
+	WalUsageAccumDiff(&wal_usage_diff, &pgWalUsage, &prevWalUsage);
 
 	if (!nowait)
 		LWLockAcquire(&stats_shmem->lock, LW_EXCLUSIVE);
 	else if (!LWLockConditionalAcquire(&stats_shmem->lock, LW_EXCLUSIVE))
 		return true;
 
-#define WALSTAT_ACC(fld) stats_shmem->stats.fld += PendingWalStats.fld
-	WALSTAT_ACC(wal_records);
-	WALSTAT_ACC(wal_fpi);
-	WALSTAT_ACC(wal_bytes);
-	WALSTAT_ACC(wal_buffers_full);
-	WALSTAT_ACC(wal_write);
-	WALSTAT_ACC(wal_sync);
-	WALSTAT_ACC(wal_write_time);
-	WALSTAT_ACC(wal_sync_time);
+#define WALSTAT_ACC(fld, var_to_add) \
+ 	(stats_shmem->stats.fld += var_to_add.fld)
+#define WALLSTAT_ACC_INSTR_TIME_TYPE(fld) \
+	(stats_shmem->stats.fld += INSTR_TIME_GET_MICROSEC(PendingWalStats.fld))
+	WALSTAT_ACC(wal_records, wal_usage_diff);
+	WALSTAT_ACC(wal_fpi, wal_usage_diff);
+	WALSTAT_ACC(wal_bytes, wal_usage_diff);
+	WALSTAT_ACC(wal_buffers_full, PendingWalStats);
+	WALSTAT_ACC(wal_write, PendingWalStats);
+	WALSTAT_ACC(wal_sync, PendingWalStats);
+	WALLSTAT_ACC_INSTR_TIME_TYPE(wal_write_time);
+	WALLSTAT_ACC_INSTR_TIME_TYPE(wal_sync_time);
+#undef WALLSTAT_ACC_INSTR_TIME_TYPE
 #undef WALSTAT_ACC
 
 	LWLockRelease(&stats_shmem->lock);
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index db9675884f..82dd7d6416 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -445,6 +445,21 @@ typedef struct PgStat_WalStats
 	TimestampTz stat_reset_timestamp;
 } PgStat_WalStats;
 
+/*
+ * This struct stores wal-related durations as instr_time, which makes it
+ * easier to accumulate them without requiring type conversions. Then,
+ * during stats flush, they will be moved into shared stats with type
+ * conversions.
+ */
+typedef struct PgStat_PendingWalStats
+{
+	PgStat_Counter wal_buffers_full;
+	PgStat_Counter wal_write;
+	PgStat_Counter wal_sync;
+	instr_time wal_write_time;
+	instr_time wal_sync_time;
+} PgStat_PendingWalStats;
+
 
 /*
  * Functions in pgstat.c
@@ -758,7 +773,7 @@ extern PGDLLIMPORT SessionEndType pgStatSessionEndCause;
  */
 
 /* updated directly by backends and background processes */
-extern PGDLLIMPORT PgStat_WalStats PendingWalStats;
+extern PGDLLIMPORT PgStat_PendingWalStats PendingWalStats;
 
 
 #endif							/* PGSTAT_H */
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 22ea42c16b..5d28aa1ca9 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2045,6 +2045,7 @@ PgStat_Kind
 PgStat_KindInfo
 PgStat_LocalState
 PgStat_PendingDroppedStatsItem
+PgStat_PendingWalStats
 PgStat_SLRUStats
 PgStat_ShmemControl
 PgStat_Snapshot
-- 
2.25.1

#8Nazir Bilal Yavuz
byavuz81@gmail.com
In reply to: Nazir Bilal Yavuz (#7)
1 attachment(s)
Re: Refactor calculations to use instr_time

Hi,

There was a warning while applying the patch, v5 is attached.

Regards,
Nazir Bilal Yavuz
Microsoft

Attachments:

v5-0001-Refactor-instr_time-calculations.patchtext/x-diff; charset=US-ASCII; name=v5-0001-Refactor-instr_time-calculations.patchDownload
From dcd49e48a0784a95b8731df1c6ee7c3a612a8529 Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavuz81@gmail.com>
Date: Thu, 9 Mar 2023 15:35:38 +0300
Subject: [PATCH v5] Refactor instr_time calculations

In instr_time.h it is stated that:

* When summing multiple measurements, it's recommended to leave the
* running sum in instr_time form (ie, use INSTR_TIME_ADD or
* INSTR_TIME_ACCUM_DIFF) and convert to a result format only at the end.

So, this patch aims to refactor 'PendingWalStats' to use 'instr_time' instead
of 'PgStat_Counter' while accumulating 'wal_write_time' and 'wal_sync_time'.

Also, some calculations are refactored to use 'INSTR_TIME_ACCUM_DIFF' instead
of using 'INSTR_TIME_SUBTRACT' and 'INSTR_TIME_ADD'.
---
 src/backend/access/transam/xlog.c       |  6 ++---
 src/backend/storage/file/buffile.c      |  6 ++---
 src/backend/utils/activity/pgstat_wal.c | 31 +++++++++++++------------
 src/include/pgstat.h                    | 17 +++++++++++++-
 src/tools/pgindent/typedefs.list        |  1 +
 5 files changed, 37 insertions(+), 24 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 543d4d897ae..46821ad6056 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2206,8 +2206,7 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 					instr_time	duration;
 
 					INSTR_TIME_SET_CURRENT(duration);
-					INSTR_TIME_SUBTRACT(duration, start);
-					PendingWalStats.wal_write_time += INSTR_TIME_GET_MICROSEC(duration);
+					INSTR_TIME_ACCUM_DIFF(PendingWalStats.wal_write_time, duration, start);
 				}
 
 				PendingWalStats.wal_write++;
@@ -8204,8 +8203,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
 		instr_time	duration;
 
 		INSTR_TIME_SET_CURRENT(duration);
-		INSTR_TIME_SUBTRACT(duration, start);
-		PendingWalStats.wal_sync_time += INSTR_TIME_GET_MICROSEC(duration);
+		INSTR_TIME_ACCUM_DIFF(PendingWalStats.wal_sync_time, duration, start);
 	}
 
 	PendingWalStats.wal_sync++;
diff --git a/src/backend/storage/file/buffile.c b/src/backend/storage/file/buffile.c
index 0a51624df3b..e55f86b675e 100644
--- a/src/backend/storage/file/buffile.c
+++ b/src/backend/storage/file/buffile.c
@@ -469,8 +469,7 @@ BufFileLoadBuffer(BufFile *file)
 	if (track_io_timing)
 	{
 		INSTR_TIME_SET_CURRENT(io_time);
-		INSTR_TIME_SUBTRACT(io_time, io_start);
-		INSTR_TIME_ADD(pgBufferUsage.temp_blk_read_time, io_time);
+		INSTR_TIME_ACCUM_DIFF(pgBufferUsage.temp_blk_read_time, io_time, io_start);
 	}
 
 	/* we choose not to advance curOffset here */
@@ -544,8 +543,7 @@ BufFileDumpBuffer(BufFile *file)
 		if (track_io_timing)
 		{
 			INSTR_TIME_SET_CURRENT(io_time);
-			INSTR_TIME_SUBTRACT(io_time, io_start);
-			INSTR_TIME_ADD(pgBufferUsage.temp_blk_write_time, io_time);
+			INSTR_TIME_ACCUM_DIFF(pgBufferUsage.temp_blk_write_time, io_time, io_start);
 		}
 
 		file->curOffset += bytestowrite;
diff --git a/src/backend/utils/activity/pgstat_wal.c b/src/backend/utils/activity/pgstat_wal.c
index e8598b2f4e0..58daae3fbd6 100644
--- a/src/backend/utils/activity/pgstat_wal.c
+++ b/src/backend/utils/activity/pgstat_wal.c
@@ -21,7 +21,7 @@
 #include "executor/instrument.h"
 
 
-PgStat_WalStats PendingWalStats = {0};
+PgStat_PendingWalStats PendingWalStats = {0};
 
 /*
  * WAL usage counters saved from pgWALUsage at the previous call to
@@ -70,7 +70,7 @@ bool
 pgstat_flush_wal(bool nowait)
 {
 	PgStatShared_Wal *stats_shmem = &pgStatLocal.shmem->wal;
-	WalUsage	diff = {0};
+	WalUsage	wal_usage_diff = {0};
 
 	Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
 	Assert(pgStatLocal.shmem != NULL &&
@@ -88,25 +88,26 @@ pgstat_flush_wal(bool nowait)
 	 * Calculate how much WAL usage counters were increased by subtracting the
 	 * previous counters from the current ones.
 	 */
-	WalUsageAccumDiff(&diff, &pgWalUsage, &prevWalUsage);
-	PendingWalStats.wal_records = diff.wal_records;
-	PendingWalStats.wal_fpi = diff.wal_fpi;
-	PendingWalStats.wal_bytes = diff.wal_bytes;
+	WalUsageAccumDiff(&wal_usage_diff, &pgWalUsage, &prevWalUsage);
 
 	if (!nowait)
 		LWLockAcquire(&stats_shmem->lock, LW_EXCLUSIVE);
 	else if (!LWLockConditionalAcquire(&stats_shmem->lock, LW_EXCLUSIVE))
 		return true;
 
-#define WALSTAT_ACC(fld) stats_shmem->stats.fld += PendingWalStats.fld
-	WALSTAT_ACC(wal_records);
-	WALSTAT_ACC(wal_fpi);
-	WALSTAT_ACC(wal_bytes);
-	WALSTAT_ACC(wal_buffers_full);
-	WALSTAT_ACC(wal_write);
-	WALSTAT_ACC(wal_sync);
-	WALSTAT_ACC(wal_write_time);
-	WALSTAT_ACC(wal_sync_time);
+#define WALSTAT_ACC(fld, var_to_add) \
+	(stats_shmem->stats.fld += var_to_add.fld)
+#define WALLSTAT_ACC_INSTR_TIME_TYPE(fld) \
+	(stats_shmem->stats.fld += INSTR_TIME_GET_MICROSEC(PendingWalStats.fld))
+	WALSTAT_ACC(wal_records, wal_usage_diff);
+	WALSTAT_ACC(wal_fpi, wal_usage_diff);
+	WALSTAT_ACC(wal_bytes, wal_usage_diff);
+	WALSTAT_ACC(wal_buffers_full, PendingWalStats);
+	WALSTAT_ACC(wal_write, PendingWalStats);
+	WALSTAT_ACC(wal_sync, PendingWalStats);
+	WALLSTAT_ACC_INSTR_TIME_TYPE(wal_write_time);
+	WALLSTAT_ACC_INSTR_TIME_TYPE(wal_sync_time);
+#undef WALLSTAT_ACC_INSTR_TIME_TYPE
 #undef WALSTAT_ACC
 
 	LWLockRelease(&stats_shmem->lock);
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index f43fac09ede..5bbc55bb341 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -442,6 +442,21 @@ typedef struct PgStat_WalStats
 	TimestampTz stat_reset_timestamp;
 } PgStat_WalStats;
 
+/*
+ * This struct stores wal-related durations as instr_time, which makes it
+ * easier to accumulate them without requiring type conversions. Then,
+ * during stats flush, they will be moved into shared stats with type
+ * conversions.
+ */
+typedef struct PgStat_PendingWalStats
+{
+	PgStat_Counter wal_buffers_full;
+	PgStat_Counter wal_write;
+	PgStat_Counter wal_sync;
+	instr_time wal_write_time;
+	instr_time wal_sync_time;
+} PgStat_PendingWalStats;
+
 
 /*
  * Functions in pgstat.c
@@ -755,7 +770,7 @@ extern PGDLLIMPORT SessionEndType pgStatSessionEndCause;
  */
 
 /* updated directly by backends and background processes */
-extern PGDLLIMPORT PgStat_WalStats PendingWalStats;
+extern PGDLLIMPORT PgStat_PendingWalStats PendingWalStats;
 
 
 #endif							/* PGSTAT_H */
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 86a9303bf56..68eb52886fa 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2049,6 +2049,7 @@ PgStat_Kind
 PgStat_KindInfo
 PgStat_LocalState
 PgStat_PendingDroppedStatsItem
+PgStat_PendingWalStats
 PgStat_SLRUStats
 PgStat_ShmemControl
 PgStat_Snapshot
-- 
2.39.2

#9Melanie Plageman
melanieplageman@gmail.com
In reply to: Nazir Bilal Yavuz (#8)
Re: Refactor calculations to use instr_time

On Thu, Mar 09, 2023 at 04:02:44PM +0300, Nazir Bilal Yavuz wrote:

From dcd49e48a0784a95b8731df1c6ee7c3a612a8529 Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavuz81@gmail.com>
Date: Thu, 9 Mar 2023 15:35:38 +0300
Subject: [PATCH v5] Refactor instr_time calculations

Also, some calculations are refactored to use 'INSTR_TIME_ACCUM_DIFF' instead
of using 'INSTR_TIME_SUBTRACT' and 'INSTR_TIME_ADD'.
---
src/backend/access/transam/xlog.c | 6 ++---
src/backend/storage/file/buffile.c | 6 ++---
src/backend/utils/activity/pgstat_wal.c | 31 +++++++++++++------------
src/include/pgstat.h | 17 +++++++++++++-
src/tools/pgindent/typedefs.list | 1 +
5 files changed, 37 insertions(+), 24 deletions(-)

diff --git a/src/backend/utils/activity/pgstat_wal.c b/src/backend/utils/activity/pgstat_wal.c
index e8598b2f4e0..58daae3fbd6 100644
--- a/src/backend/utils/activity/pgstat_wal.c
+++ b/src/backend/utils/activity/pgstat_wal.c
@@ -88,25 +88,26 @@ pgstat_flush_wal(bool nowait)
* Calculate how much WAL usage counters were increased by subtracting the
* previous counters from the current ones.
*/
-	WalUsageAccumDiff(&diff, &pgWalUsage, &prevWalUsage);
-	PendingWalStats.wal_records = diff.wal_records;
-	PendingWalStats.wal_fpi = diff.wal_fpi;
-	PendingWalStats.wal_bytes = diff.wal_bytes;
+	WalUsageAccumDiff(&wal_usage_diff, &pgWalUsage, &prevWalUsage);

if (!nowait)
LWLockAcquire(&stats_shmem->lock, LW_EXCLUSIVE);
else if (!LWLockConditionalAcquire(&stats_shmem->lock, LW_EXCLUSIVE))
return true;

-#define WALSTAT_ACC(fld) stats_shmem->stats.fld += PendingWalStats.fld
-	WALSTAT_ACC(wal_records);
-	WALSTAT_ACC(wal_fpi);
-	WALSTAT_ACC(wal_bytes);
-	WALSTAT_ACC(wal_buffers_full);
-	WALSTAT_ACC(wal_write);
-	WALSTAT_ACC(wal_sync);
-	WALSTAT_ACC(wal_write_time);
-	WALSTAT_ACC(wal_sync_time);
+#define WALSTAT_ACC(fld, var_to_add) \
+	(stats_shmem->stats.fld += var_to_add.fld)
+#define WALLSTAT_ACC_INSTR_TIME_TYPE(fld) \
+	(stats_shmem->stats.fld += INSTR_TIME_GET_MICROSEC(PendingWalStats.fld))
+	WALSTAT_ACC(wal_records, wal_usage_diff);
+	WALSTAT_ACC(wal_fpi, wal_usage_diff);
+	WALSTAT_ACC(wal_bytes, wal_usage_diff);
+	WALSTAT_ACC(wal_buffers_full, PendingWalStats);
+	WALSTAT_ACC(wal_write, PendingWalStats);
+	WALSTAT_ACC(wal_sync, PendingWalStats);
+	WALLSTAT_ACC_INSTR_TIME_TYPE(wal_write_time);

I think you want one less L here?
WALLSTAT_ACC_INSTR_TIME_TYPE -> WALSTAT_ACC_INSTR_TIME_TYPE

Also, I don't quite understand why TYPE is at the end of the name. I
think it would still be clear without it.

I might find it clearer if the WALSTAT_ACC_INSTR_TIME_TYPE macro was
defined before using it for those fields instead of defining it right
after defining WALSTAT_ACC.

+	WALLSTAT_ACC_INSTR_TIME_TYPE(wal_sync_time);
+#undef WALLSTAT_ACC_INSTR_TIME_TYPE
#undef WALSTAT_ACC
LWLockRelease(&stats_shmem->lock);
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index f43fac09ede..5bbc55bb341 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -442,6 +442,21 @@ typedef struct PgStat_WalStats
TimestampTz stat_reset_timestamp;
} PgStat_WalStats;
+/*
+ * This struct stores wal-related durations as instr_time, which makes it
+ * easier to accumulate them without requiring type conversions. Then,
+ * during stats flush, they will be moved into shared stats with type
+ * conversions.
+ */
+typedef struct PgStat_PendingWalStats
+{
+	PgStat_Counter wal_buffers_full;
+	PgStat_Counter wal_write;
+	PgStat_Counter wal_sync;
+	instr_time wal_write_time;
+	instr_time wal_sync_time;
+} PgStat_PendingWalStats;
+

So, I am not a fan of having this second struct (PgStat_PendingWalStats)
which only has a subset of the members of PgStat_WalStats. It is pretty
confusing.

It is okay to have two structs -- one that is basically "in-memory" and
one that is a format that can be on disk, but these two structs with
different members are confusing and don't convey why we have the two
structs.

I would either put WalUsage into PgStat_PendingWalStats (so that it has
all the same members as PgStat_WalStats), or figure out a way to
maintain WalUsage separately from PgStat_WalStats or something else.
Worst case, add more comments to the struct definitions to explain why
they have the members they have and how WalUsage relates to them.

- Melanie

#10Nazir Bilal Yavuz
byavuz81@gmail.com
In reply to: Melanie Plageman (#9)
1 attachment(s)
Re: Refactor calculations to use instr_time

Hi,

Thanks for the review.

On Fri, 17 Mar 2023 at 02:02, Melanie Plageman
<melanieplageman@gmail.com> wrote:

I think you want one less L here?
WALLSTAT_ACC_INSTR_TIME_TYPE -> WALSTAT_ACC_INSTR_TIME_TYPE

Done.

Also, I don't quite understand why TYPE is at the end of the name. I
think it would still be clear without it.

Done.

I might find it clearer if the WALSTAT_ACC_INSTR_TIME_TYPE macro was
defined before using it for those fields instead of defining it right
after defining WALSTAT_ACC.

Since it is undefined together with WALSTAT_ACC, defining them
together makes sense to me.

+ * This struct stores wal-related durations as instr_time, which makes it
+ * easier to accumulate them without requiring type conversions. Then,
+ * during stats flush, they will be moved into shared stats with type
+ * conversions.
+ */
+typedef struct PgStat_PendingWalStats
+{
+     PgStat_Counter wal_buffers_full;
+     PgStat_Counter wal_write;
+     PgStat_Counter wal_sync;
+     instr_time wal_write_time;
+     instr_time wal_sync_time;
+} PgStat_PendingWalStats;
+

So, I am not a fan of having this second struct (PgStat_PendingWalStats)
which only has a subset of the members of PgStat_WalStats. It is pretty
confusing.

It is okay to have two structs -- one that is basically "in-memory" and
one that is a format that can be on disk, but these two structs with
different members are confusing and don't convey why we have the two
structs.

I would either put WalUsage into PgStat_PendingWalStats (so that it has
all the same members as PgStat_WalStats), or figure out a way to
maintain WalUsage separately from PgStat_WalStats or something else.
Worst case, add more comments to the struct definitions to explain why
they have the members they have and how WalUsage relates to them.

Yes, but like Andres said this could be better done as a separate patch.

v6 is attached.

Regards,
Nazir Bilal Yavuz
Microsoft

Attachments:

v6-0001-Refactor-instr_time-calculations.patchapplication/x-patch; name=v6-0001-Refactor-instr_time-calculations.patchDownload
From 02f1b5426ad9394a673c28a6df53fc56ff6ffa89 Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavuz81@gmail.com>
Date: Wed, 22 Mar 2023 15:25:44 +0300
Subject: [PATCH v6] Refactor instr_time calculations

In instr_time.h it is stated that:

* When summing multiple measurements, it's recommended to leave the
* running sum in instr_time form (ie, use INSTR_TIME_ADD or
* INSTR_TIME_ACCUM_DIFF) and convert to a result format only at the end.

So, this patch aims to refactor 'PendingWalStats' to use 'instr_time' instead
of 'PgStat_Counter' while accumulating 'wal_write_time' and 'wal_sync_time'.

Also, some calculations are refactored to use 'INSTR_TIME_ACCUM_DIFF' instead
of using 'INSTR_TIME_SUBTRACT' and 'INSTR_TIME_ADD'.
---
 src/backend/access/transam/xlog.c       |  6 ++---
 src/backend/storage/file/buffile.c      |  6 ++---
 src/backend/utils/activity/pgstat_wal.c | 31 +++++++++++++------------
 src/include/pgstat.h                    | 17 +++++++++++++-
 src/tools/pgindent/typedefs.list        |  1 +
 5 files changed, 37 insertions(+), 24 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 543d4d897ae..46821ad6056 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2206,8 +2206,7 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 					instr_time	duration;
 
 					INSTR_TIME_SET_CURRENT(duration);
-					INSTR_TIME_SUBTRACT(duration, start);
-					PendingWalStats.wal_write_time += INSTR_TIME_GET_MICROSEC(duration);
+					INSTR_TIME_ACCUM_DIFF(PendingWalStats.wal_write_time, duration, start);
 				}
 
 				PendingWalStats.wal_write++;
@@ -8204,8 +8203,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
 		instr_time	duration;
 
 		INSTR_TIME_SET_CURRENT(duration);
-		INSTR_TIME_SUBTRACT(duration, start);
-		PendingWalStats.wal_sync_time += INSTR_TIME_GET_MICROSEC(duration);
+		INSTR_TIME_ACCUM_DIFF(PendingWalStats.wal_sync_time, duration, start);
 	}
 
 	PendingWalStats.wal_sync++;
diff --git a/src/backend/storage/file/buffile.c b/src/backend/storage/file/buffile.c
index 0a51624df3b..e55f86b675e 100644
--- a/src/backend/storage/file/buffile.c
+++ b/src/backend/storage/file/buffile.c
@@ -469,8 +469,7 @@ BufFileLoadBuffer(BufFile *file)
 	if (track_io_timing)
 	{
 		INSTR_TIME_SET_CURRENT(io_time);
-		INSTR_TIME_SUBTRACT(io_time, io_start);
-		INSTR_TIME_ADD(pgBufferUsage.temp_blk_read_time, io_time);
+		INSTR_TIME_ACCUM_DIFF(pgBufferUsage.temp_blk_read_time, io_time, io_start);
 	}
 
 	/* we choose not to advance curOffset here */
@@ -544,8 +543,7 @@ BufFileDumpBuffer(BufFile *file)
 		if (track_io_timing)
 		{
 			INSTR_TIME_SET_CURRENT(io_time);
-			INSTR_TIME_SUBTRACT(io_time, io_start);
-			INSTR_TIME_ADD(pgBufferUsage.temp_blk_write_time, io_time);
+			INSTR_TIME_ACCUM_DIFF(pgBufferUsage.temp_blk_write_time, io_time, io_start);
 		}
 
 		file->curOffset += bytestowrite;
diff --git a/src/backend/utils/activity/pgstat_wal.c b/src/backend/utils/activity/pgstat_wal.c
index e8598b2f4e0..94f1a3b06e3 100644
--- a/src/backend/utils/activity/pgstat_wal.c
+++ b/src/backend/utils/activity/pgstat_wal.c
@@ -21,7 +21,7 @@
 #include "executor/instrument.h"
 
 
-PgStat_WalStats PendingWalStats = {0};
+PgStat_PendingWalStats PendingWalStats = {0};
 
 /*
  * WAL usage counters saved from pgWALUsage at the previous call to
@@ -70,7 +70,7 @@ bool
 pgstat_flush_wal(bool nowait)
 {
 	PgStatShared_Wal *stats_shmem = &pgStatLocal.shmem->wal;
-	WalUsage	diff = {0};
+	WalUsage	wal_usage_diff = {0};
 
 	Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
 	Assert(pgStatLocal.shmem != NULL &&
@@ -88,25 +88,26 @@ pgstat_flush_wal(bool nowait)
 	 * Calculate how much WAL usage counters were increased by subtracting the
 	 * previous counters from the current ones.
 	 */
-	WalUsageAccumDiff(&diff, &pgWalUsage, &prevWalUsage);
-	PendingWalStats.wal_records = diff.wal_records;
-	PendingWalStats.wal_fpi = diff.wal_fpi;
-	PendingWalStats.wal_bytes = diff.wal_bytes;
+	WalUsageAccumDiff(&wal_usage_diff, &pgWalUsage, &prevWalUsage);
 
 	if (!nowait)
 		LWLockAcquire(&stats_shmem->lock, LW_EXCLUSIVE);
 	else if (!LWLockConditionalAcquire(&stats_shmem->lock, LW_EXCLUSIVE))
 		return true;
 
-#define WALSTAT_ACC(fld) stats_shmem->stats.fld += PendingWalStats.fld
-	WALSTAT_ACC(wal_records);
-	WALSTAT_ACC(wal_fpi);
-	WALSTAT_ACC(wal_bytes);
-	WALSTAT_ACC(wal_buffers_full);
-	WALSTAT_ACC(wal_write);
-	WALSTAT_ACC(wal_sync);
-	WALSTAT_ACC(wal_write_time);
-	WALSTAT_ACC(wal_sync_time);
+#define WALSTAT_ACC(fld, var_to_add) \
+	(stats_shmem->stats.fld += var_to_add.fld)
+#define WALSTAT_ACC_INSTR_TIME(fld) \
+	(stats_shmem->stats.fld += INSTR_TIME_GET_MICROSEC(PendingWalStats.fld))
+	WALSTAT_ACC(wal_records, wal_usage_diff);
+	WALSTAT_ACC(wal_fpi, wal_usage_diff);
+	WALSTAT_ACC(wal_bytes, wal_usage_diff);
+	WALSTAT_ACC(wal_buffers_full, PendingWalStats);
+	WALSTAT_ACC(wal_write, PendingWalStats);
+	WALSTAT_ACC(wal_sync, PendingWalStats);
+	WALSTAT_ACC_INSTR_TIME(wal_write_time);
+	WALSTAT_ACC_INSTR_TIME(wal_sync_time);
+#undef WALSTAT_ACC_INSTR_TIME
 #undef WALSTAT_ACC
 
 	LWLockRelease(&stats_shmem->lock);
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 1e418b682b5..e8e17423c47 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -433,6 +433,21 @@ typedef struct PgStat_WalStats
 	TimestampTz stat_reset_timestamp;
 } PgStat_WalStats;
 
+/*
+ * This struct stores wal-related durations as instr_time, which makes it
+ * easier to accumulate them without requiring type conversions. Then,
+ * during stats flush, they will be moved into shared stats with type
+ * conversions.
+ */
+typedef struct PgStat_PendingWalStats
+{
+	PgStat_Counter wal_buffers_full;
+	PgStat_Counter wal_write;
+	PgStat_Counter wal_sync;
+	instr_time wal_write_time;
+	instr_time wal_sync_time;
+} PgStat_PendingWalStats;
+
 
 /*
  * Functions in pgstat.c
@@ -746,7 +761,7 @@ extern PGDLLIMPORT SessionEndType pgStatSessionEndCause;
  */
 
 /* updated directly by backends and background processes */
-extern PGDLLIMPORT PgStat_WalStats PendingWalStats;
+extern PGDLLIMPORT PgStat_PendingWalStats PendingWalStats;
 
 
 #endif							/* PGSTAT_H */
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 097f42e1b34..879f748a7dc 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2048,6 +2048,7 @@ PgStat_Kind
 PgStat_KindInfo
 PgStat_LocalState
 PgStat_PendingDroppedStatsItem
+PgStat_PendingWalStats
 PgStat_SLRUStats
 PgStat_ShmemControl
 PgStat_Snapshot
-- 
2.40.0

#11Andres Freund
andres@anarazel.de
In reply to: Nazir Bilal Yavuz (#10)
Re: Refactor calculations to use instr_time

Hi,

I pushed this version! Thanks all, for the contribution and reviews.

I would either put WalUsage into PgStat_PendingWalStats (so that it has
all the same members as PgStat_WalStats), or figure out a way to
maintain WalUsage separately from PgStat_WalStats or something else.
Worst case, add more comments to the struct definitions to explain why
they have the members they have and how WalUsage relates to them.

Yes, but like Andres said this could be better done as a separate patch.

I invite you to write a patch for that for 17...

Greetings,

Andres Freund