[PATCH] Expose checkpoint timestamp and duration in pg_stat_checkpointer
Hi all,
While debugging checkpointer write behavior, I recently found some of the
enhancements related to extending pg_stat_checkpointer by including
checkpoint type (manual/timed/immediate), last_checkpoint_time and
checkpoint_total_time information to checkpoint completion logs through SQL
when `log_checkpoints` is enabled. I am attaching my observations,
screenshots and patch in support for this.
1. Log for type of checkpoint occured:
2025-11-20 11:51:06.128 IST [18026] LOG: checkpoint complete
(immediate): wrote 7286 buffers (44.5%), wrote 4 SLRU buffers; 0 WAL
file(s) added, 0 removed, 27 recycled; write=0.095 s, sync=0.034 s,
total=0.279 s; sync files=17, longest=0.004 s, average=0.002 s;
distance=447382 kB, estimate=531349 kB; lsn=0/7F4EDED8, redo
lsn=0/7F4EDE80
2. Log for the checkpoint_total_time and last_checkpoint_time:
checkpoint_total_time | last_checkpoint_time
-----------------------+----------------------------------
175138 | 2025-11-20 11:58:02.879149+05:30
(1 row)
2025-11-20 11:58:02.879 IST [18026] LOG: checkpoint complete
(immediate): wrote 0 buffers (0.0%), wrote 0 SLRU buffers; 0 WAL
file(s) added, 0 removed, 0 recycled; write=0.001 s, sync=0.001 s,
total=0.019 s; sync files=0, longest=0.000 s, average=0.000 s;
distance=0 kB, estimate=478214 kB; lsn=0/7F4EDFE0, redo lsn=0/7F4EDF88
Looking forward to more feedback.
Regards,
Soumya
Attachments:
0001-Added-enhancement-related-to-checkpoint-reason-durat.patchtext/x-patch; charset=US-ASCII; name=0001-Added-enhancement-related-to-checkpoint-reason-durat.patchDownload
From 465ed2178a5437f31497ca5f5bb4a38ed0451b7a Mon Sep 17 00:00:00 2001
From: BharatDB <bharatdbpg@gmail.com>
Date: Mon, 10 Nov 2025 11:15:24 +0530
Subject: [PATCH] Added enhancement related to checkpoint reason / duration
info in logs and stats
Signed-off-by: BharatDB <bharatdbpg@gmail.com>
---
src/backend/access/transam/xlog.c | 11 +++++-
src/backend/catalog/system_views.sql | 4 ++-
.../utils/activity/pgstat_checkpointer.c | 34 +++++++++++++++++++
src/backend/utils/adt/pgstatfuncs.c | 21 ++++++++++++
src/include/catalog/pg_proc.dat | 11 ++++++
src/include/pgstat.h | 5 ++-
6 files changed, 83 insertions(+), 3 deletions(-)
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index ec992d2139..9217508917 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -6761,6 +6761,7 @@ LogCheckpointEnd(bool restartpoint, int flags)
sync_msecs = TimestampDifferenceMilliseconds(CheckpointStats.ckpt_sync_t,
CheckpointStats.ckpt_sync_end_t);
+
/* Accumulate checkpoint timing summary data, in milliseconds. */
PendingCheckpointerStats.write_time += write_msecs;
PendingCheckpointerStats.sync_time += sync_msecs;
@@ -6774,7 +6775,15 @@ LogCheckpointEnd(bool restartpoint, int flags)
total_msecs = TimestampDifferenceMilliseconds(CheckpointStats.ckpt_start_t,
CheckpointStats.ckpt_end_t);
-
+
+
+ /* Store in PendingCheckpointerStats */
+ PendingCheckpointerStats.checkpoint_total_time += (double) total_msecs;
+ PendingCheckpointerStats.last_checkpoint_time = CheckpointStats.ckpt_end_t;
+
+ /* Publishing it */
+ pgstat_report_checkpointer();
+
/*
* Timing values returned from CheckpointStats are in microseconds.
* Convert to milliseconds for consistent printing.
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index dec8df4f8e..903e001d95 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1190,7 +1190,9 @@ CREATE VIEW pg_stat_checkpointer AS
pg_stat_get_checkpointer_sync_time() AS sync_time,
pg_stat_get_checkpointer_buffers_written() AS buffers_written,
pg_stat_get_checkpointer_slru_written() AS slru_written,
- pg_stat_get_checkpointer_stat_reset_time() AS stats_reset;
+ pg_stat_get_checkpointer_stat_reset_time() AS stats_reset,
+ pg_stat_get_checkpointer_checkpoint_total_time() AS checkpoint_total_time,
+ pg_stat_get_checkpointer_last_checkpoint_time() AS last_checkpoint_time;
CREATE VIEW pg_stat_io AS
SELECT
diff --git a/src/backend/utils/activity/pgstat_checkpointer.c b/src/backend/utils/activity/pgstat_checkpointer.c
index e65034a30a..62ef427b82 100644
--- a/src/backend/utils/activity/pgstat_checkpointer.c
+++ b/src/backend/utils/activity/pgstat_checkpointer.c
@@ -56,8 +56,14 @@ pgstat_report_checkpointer(void)
CHECKPOINTER_ACC(sync_time);
CHECKPOINTER_ACC(buffers_written);
CHECKPOINTER_ACC(slru_written);
+ CHECKPOINTER_ACC(checkpoint_total_time);
#undef CHECKPOINTER_ACC
+ /* only overwrite if we actually have a new timestamp */
+ if (PendingCheckpointerStats.last_checkpoint_time != 0)
+ stats_shmem->stats.last_checkpoint_time =
+ PendingCheckpointerStats.last_checkpoint_time;
+
pgstat_end_changecount_write(&stats_shmem->changecount);
/*
@@ -71,6 +77,28 @@ pgstat_report_checkpointer(void)
pgstat_flush_io(false);
}
+/* ------------------------------------------------------------
+ * Extended checkpointer stats reporting function
+ * ------------------------------------------------------------
+ */
+void
+pgstat_report_checkpointer_extended(long total_msecs, TimestampTz end_time)
+{
+
+ PgStat_CheckpointerStats *checkpointer_stats;
+
+
+ checkpointer_stats = pgstat_fetch_stat_checkpointer();
+ if (!checkpointer_stats)
+ return;
+
+
+ checkpointer_stats->checkpoint_total_time += total_msecs;
+ checkpointer_stats->last_checkpoint_time = end_time;
+
+}
+
+
/*
* pgstat_fetch_stat_checkpointer() -
*
@@ -136,5 +164,11 @@ pgstat_checkpointer_snapshot_cb(void)
CHECKPOINTER_COMP(sync_time);
CHECKPOINTER_COMP(buffers_written);
CHECKPOINTER_COMP(slru_written);
+ CHECKPOINTER_COMP(checkpoint_total_time);
#undef CHECKPOINTER_COMP
+
+ pgStatLocal.snapshot.checkpointer.last_checkpoint_time = stats_shmem->stats.last_checkpoint_time;
+
+ elog(LOG, "DBG snapshot_cb: copied last_checkpoint_time=%ld",
+ (long) pgStatLocal.snapshot.checkpointer.last_checkpoint_time);
}
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index a710508979..591ad2ac88 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -2292,3 +2292,24 @@ pg_stat_have_stats(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(pgstat_have_entry(kind, dboid, objid));
}
+
+PG_FUNCTION_INFO_V1(pg_stat_get_checkpointer_checkpoint_total_time);
+
+Datum
+pg_stat_get_checkpointer_checkpoint_total_time(PG_FUNCTION_ARGS)
+{
+ PgStat_CheckpointerStats *stats = pgstat_fetch_stat_checkpointer();
+ PG_RETURN_FLOAT8(stats->checkpoint_total_time);
+}
+
+PG_FUNCTION_INFO_V1(pg_stat_get_checkpointer_last_checkpoint_time);
+
+Datum
+pg_stat_get_checkpointer_last_checkpoint_time(PG_FUNCTION_ARGS)
+{
+
+ PgStat_CheckpointerStats *stats = pgstat_fetch_stat_checkpointer();
+ if (!stats) PG_RETURN_NULL();
+ PG_RETURN_TIMESTAMPTZ(stats->last_checkpoint_time);
+
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 9121a382f7..a57053c4e2 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5980,6 +5980,17 @@
proname => 'pg_stat_get_checkpointer_stat_reset_time', provolatile => 's',
proparallel => 'r', prorettype => 'timestamptz', proargtypes => '',
prosrc => 'pg_stat_get_checkpointer_stat_reset_time' },
+# New functions for checkpointer
+{ oid => '7000',
+ descr => 'total time spent in last checkpoint in milliseconds',
+ proname => 'pg_stat_get_checkpointer_checkpoint_total_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => '',
+ prosrc => 'pg_stat_get_checkpointer_checkpoint_total_time' },
+{ oid => '7001',
+ descr => 'timestamp of last checkpoint completion',
+ proname => 'pg_stat_get_checkpointer_last_checkpoint_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'timestamptz', proargtypes => '',
+ prosrc => 'pg_stat_get_checkpointer_last_checkpoint_time' },
{ oid => '2772',
descr => 'statistics: number of buffers written by the bgwriter for cleaning dirty buffers',
proname => 'pg_stat_get_bgwriter_buf_written_clean', provolatile => 's',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 7ae503e71a..a8eb1f8add 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -263,7 +263,9 @@ typedef struct PgStat_CheckpointerStats
PgStat_Counter sync_time;
PgStat_Counter buffers_written;
PgStat_Counter slru_written;
- TimestampTz stat_reset_timestamp;
+ PgStat_Counter checkpoint_total_time; /* new: total ms of last checkpoint */
+ TimestampTz last_checkpoint_time; /* new: end time of last checkpoint */
+ TimestampTz stat_reset_timestamp;
} PgStat_CheckpointerStats;
@@ -583,6 +585,7 @@ extern PgStat_BgWriterStats *pgstat_fetch_stat_bgwriter(void);
extern void pgstat_report_checkpointer(void);
extern PgStat_CheckpointerStats *pgstat_fetch_stat_checkpointer(void);
+extern void pgstat_report_checkpointer_extended(long total_msecs, TimestampTz end_time);
/*
--
2.34.1
0001-Enhance-checkpoint-logs-to-include-reason-manual-tim.patchtext/x-patch; charset=US-ASCII; name=0001-Enhance-checkpoint-logs-to-include-reason-manual-tim.patchDownload
From 28ac031e3484df0d293bfeabd4ec08bed961b6f2 Mon Sep 17 00:00:00 2001
From: BharatDB <bharatdbpg@gmail.com>
Date: Mon, 3 Nov 2025 16:19:09 +0530
Subject: [PATCH] Enhance checkpoint logs to include reason
(manual/timed/immediate)
Signed-off-by: BharatDB <bharatdbpg@gmail.com>
---
src/backend/access/transam/xlog.c | 24 +++++++++++++++++++-----
1 file changed, 19 insertions(+), 5 deletions(-)
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index fd91bcd68e..ec992d2139 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -56,6 +56,7 @@
#include "access/transam.h"
#include "access/twophase.h"
#include "access/xact.h"
+#include "access/xlog.h"
#include "access/xlog_internal.h"
#include "access/xlogarchive.h"
#include "access/xloginsert.h"
@@ -6732,7 +6733,7 @@ LogCheckpointStart(int flags, bool restartpoint)
* Log end of a checkpoint.
*/
static void
-LogCheckpointEnd(bool restartpoint)
+LogCheckpointEnd(bool restartpoint, int flags)
{
long write_msecs,
sync_msecs,
@@ -6740,6 +6741,17 @@ LogCheckpointEnd(bool restartpoint)
longest_msecs,
average_msecs;
uint64 average_sync_time;
+ const char *ckpt_reason = "timed";
+
+ /* Determine checkpoint reason */
+ if (flags & CHECKPOINT_IS_SHUTDOWN)
+ ckpt_reason = "shutdown";
+ else if (flags & CHECKPOINT_END_OF_RECOVERY)
+ ckpt_reason = "end-of-recovery";
+ else if (flags & CHECKPOINT_FAST)
+ ckpt_reason = "immediate";
+ else if (flags & CHECKPOINT_FORCE)
+ ckpt_reason = "forced";
CheckpointStats.ckpt_end_t = GetCurrentTimestamp();
@@ -6782,12 +6794,13 @@ LogCheckpointEnd(bool restartpoint)
*/
if (restartpoint)
ereport(LOG,
- (errmsg("restartpoint complete: wrote %d buffers (%.1f%%), "
+ (errmsg("restartpoint complete (%s): wrote %d buffers (%.1f%%), "
"wrote %d SLRU buffers; %d WAL file(s) added, "
"%d removed, %d recycled; write=%ld.%03d s, "
"sync=%ld.%03d s, total=%ld.%03d s; sync files=%d, "
"longest=%ld.%03d s, average=%ld.%03d s; distance=%d kB, "
"estimate=%d kB; lsn=%X/%08X, redo lsn=%X/%08X",
+ ckpt_reason,
CheckpointStats.ckpt_bufs_written,
(double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
CheckpointStats.ckpt_slru_written,
@@ -6806,12 +6819,13 @@ LogCheckpointEnd(bool restartpoint)
LSN_FORMAT_ARGS(ControlFile->checkPointCopy.redo))));
else
ereport(LOG,
- (errmsg("checkpoint complete: wrote %d buffers (%.1f%%), "
+ (errmsg("checkpoint complete (%s): wrote %d buffers (%.1f%%), "
"wrote %d SLRU buffers; %d WAL file(s) added, "
"%d removed, %d recycled; write=%ld.%03d s, "
"sync=%ld.%03d s, total=%ld.%03d s; sync files=%d, "
"longest=%ld.%03d s, average=%ld.%03d s; distance=%d kB, "
"estimate=%d kB; lsn=%X/%08X, redo lsn=%X/%08X",
+ ckpt_reason,
CheckpointStats.ckpt_bufs_written,
(double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
CheckpointStats.ckpt_slru_written,
@@ -7400,7 +7414,7 @@ CreateCheckPoint(int flags)
TruncateSUBTRANS(GetOldestTransactionIdConsideredRunning());
/* Real work is done; log and update stats. */
- LogCheckpointEnd(false);
+ LogCheckpointEnd(false, flags);
/* Reset the process title */
update_checkpoint_display(flags, false, true);
@@ -7868,7 +7882,7 @@ CreateRestartPoint(int flags)
TruncateSUBTRANS(GetOldestTransactionIdConsideredRunning());
/* Real work is done; log and update stats. */
- LogCheckpointEnd(true);
+ LogCheckpointEnd(true, flags);
/* Reset the process title */
update_checkpoint_display(flags, true, true);
--
2.34.1
Hi,
On Mon, Nov 24, 2025 at 11:40:44AM +0530, Soumya S Murali wrote:
While debugging checkpointer write behavior, I recently found some of the
enhancements related to extending pg_stat_checkpointer by including
checkpoint type (manual/timed/immediate), last_checkpoint_time and
checkpoint_total_time information to checkpoint completion logs through SQL
when `log_checkpoints` is enabled. I am attaching my observations,
screenshots and patch in support for this.1. Log for type of checkpoint occured:
2025-11-20 11:51:06.128 IST [18026] LOG: checkpoint complete
(immediate): wrote 7286 buffers (44.5%), wrote 4 SLRU buffers; 0 WAL
file(s) added, 0 removed, 27 recycled; write=0.095 s, sync=0.034 s,
total=0.279 s; sync files=17, longest=0.004 s, average=0.002 s;
distance=447382 kB, estimate=531349 kB; lsn=0/7F4EDED8, redo
lsn=0/7F4EDE80
I think that'd be useful; the checkpoint complete log line clearly has
the more interesting output, and having it state the type would make it
easier to answer question like "how many buffers did the last wal-based
checkpoint write?
2. Log for the checkpoint_total_time and last_checkpoint_time:
checkpoint_total_time | last_checkpoint_time
-----------------------+----------------------------------
175138 | 2025-11-20 11:58:02.879149+05:30
(1 row)
Reading throught the patch, it looks like checkpoint_total_time is the
total time of the last checkpoint?
+ proparallel => 'r', prorettype => 'float8', proargtypes => '', + prosrc => 'pg_stat_get_checkpointer_checkpoint_total_time' },
If so, the naming is pretty confusing, last_checkpoint_duration or
something might be clearer.
In general I doubt how much those gauges (as oppposed to counters) only
pertaining to the last checkpoint are useful in pg_stat_checkpointer.
What would be the use case for those two values?
Also, as a nitpick, your patch adds unnecessary newlines and I think
stats_reset should be kept as last column in pg_stat_checkpointer as
usual.
Michael
On 2025-Nov-24, Michael Banck wrote:
In general I doubt how much those gauges (as oppposed to counters) only
pertaining to the last checkpoint are useful in pg_stat_checkpointer.
What would be the use case for those two values?
I think it's useful to know how long checkpoint has to work. It's a bit
lame to have only one duration (the last one), but at least with this
arrangement you can have external monitoring software connect to the
server, extract that value and save it somewhere else. Monitoring
systems do this all the time, and we've been waiting for a better
implementation to store monitoring data inside Postgres for years. I
think we shouldn't block this proposal just because of this issue,
because it can clearly be useful.
However, I'm not sure I'm very interested in knowing only the duration
of the checkpoint. I mean, much of the time the duration is going to be
whatever fraction of the checkpoint timeout you have as
checkpoint_completion_target, right? Which includes sleeps. So I think
you really want two durations: one is the duration itself, and the other
is what fraction of that did the checkpointer sleep in order to achieve
that duration. So you know how much time checkpointer spent trying to
get the operating system do stuff rather than just sit there waiting.
We already have that data, kinda, in write_time and sync_time, but those
are cumulative rather than just for the last one. (I guess you can have
the monitoring system compute the deltas as it finds each new
checkpoint.) I'm not sure how good this system is.
In the past, I looked at a couple of monitoring dashboards offered by
cloud vendors, searching for anything valuable in terms of checkpoints.
What I saw was very disappointing -- mostly just "how many checkpoints
per minute", which is mostly flat zero with periodic spikes. Totally
useless. Does anybody know if some vendor has good charts for this?
Also, if we were to add this new proposed duration, how could these
charts improve?
--
Álvaro Herrera Breisgau, Deutschland — https://www.EnterpriseDB.com/
"How strange it is to find the words "Perl" and "saner" in such close
proximity, with no apparent sense of irony. I doubt that Larry himself
could have managed it." (ncm, http://lwn.net/Articles/174769/)
Hi,
On Mon, Nov 24, 2025 at 11:07:41AM +0100, Álvaro Herrera wrote:
On 2025-Nov-24, Michael Banck wrote:
In general I doubt how much those gauges (as oppposed to counters) only
pertaining to the last checkpoint are useful in pg_stat_checkpointer.
What would be the use case for those two values?I think it's useful to know how long checkpoint has to work. It's a bit
lame to have only one duration (the last one), but at least with this
arrangement you can have external monitoring software connect to the
server, extract that value and save it somewhere else. Monitoring
systems do this all the time, and we've been waiting for a better
implementation to store monitoring data inside Postgres for years. I
think we shouldn't block this proposal just because of this issue,
because it can clearly be useful.
I don't know - what happens if the monitoring systems reads those values
every minute, but then suddenly Postgres checkpoints every 20 seconds
due to a traffic spike? It would just not see those additional
checkpoints in this case, no?
What monitoring systems do (have to do) is query write_time + sync_time
as total_time in pg_stat_checkpointer and store that along with the
timestamp of the query. Then you (maybe awkwardly) generate a graph of
the checkpoint durations over time.
However, I'm not sure I'm very interested in knowing only the duration
of the checkpoint. I mean, much of the time the duration is going to be
whatever fraction of the checkpoint timeout you have as
checkpoint_completion_target, right? Which includes sleeps.
Yeah, that is the other thing I was wondering about, but did not mention
in my mail, good point.
So I think you really want two durations: one is the duration itself,
and the other is what fraction of that did the checkpointer sleep in
order to achieve that duration. So you know how much time
checkpointer spent trying to get the operating system do stuff rather
than just sit there waiting. We already have that data, kinda, in
write_time and sync_time, but those are cumulative rather than just
for the last one.
I think that we either have "last timestamp whatever" or "total", but I
think we don't have "last duration" anywhere?
(I guess you can have the monitoring system compute
the deltas as it finds each new checkpoint.) I'm not sure how good
this system is.
Right, this is what I meant above. But from what I see on PG18,
total_time just seems tbe write_time + sync_time, do we have the sleep
somewhere?
In the past, I looked at a couple of monitoring dashboards offered by
cloud vendors, searching for anything valuable in terms of checkpoints.
What I saw was very disappointing -- mostly just "how many checkpoints
per minute", which is mostly flat zero with periodic spikes. Totally
useless. Does anybody know if some vendor has good charts for this?
Also, if we were to add this new proposed duration, how could these
charts improve?
I don't have a good answer here.
Michael
On Mon, Nov 24, 2025 at 2:48 PM Michael Banck <mbanck@gmx.net> wrote:
Hi,
On Mon, Nov 24, 2025 at 11:40:44AM +0530, Soumya S Murali wrote:
While debugging checkpointer write behavior, I recently found some of the
enhancements related to extending pg_stat_checkpointer by including
checkpoint type (manual/timed/immediate), last_checkpoint_time and
checkpoint_total_time information to checkpoint completion logs through SQL
when `log_checkpoints` is enabled. I am attaching my observations,
screenshots and patch in support for this.1. Log for type of checkpoint occured:
2025-11-20 11:51:06.128 IST [18026] LOG: checkpoint complete
(immediate): wrote 7286 buffers (44.5%), wrote 4 SLRU buffers; 0 WAL
file(s) added, 0 removed, 27 recycled; write=0.095 s, sync=0.034 s,
total=0.279 s; sync files=17, longest=0.004 s, average=0.002 s;
distance=447382 kB, estimate=531349 kB; lsn=0/7F4EDED8, redo
lsn=0/7F4EDE80I think that'd be useful; the checkpoint complete log line clearly has
the more interesting output, and having it state the type would make it
easier to answer question like "how many buffers did the last wal-based
checkpoint write?
Thank you for the feedback and glad to hear that exposing the
checkpoint type in the completion log seems useful. My main motivation
was exactly this kind of analysis: being able to know the buffer write
patterns with the type of checkpoint that triggered them.
2. Log for the checkpoint_total_time and last_checkpoint_time:
checkpoint_total_time | last_checkpoint_time
-----------------------+----------------------------------
175138 | 2025-11-20 11:58:02.879149+05:30
(1 row)Reading throught the patch, it looks like checkpoint_total_time is the
total time of the last checkpoint?+ proparallel => 'r', prorettype => 'float8', proargtypes => '', + prosrc => 'pg_stat_get_checkpointer_checkpoint_total_time' },If so, the naming is pretty confusing, last_checkpoint_duration or
something might be clearer.In general I doubt how much those gauges (as oppposed to counters) only
pertaining to the last checkpoint are useful in pg_stat_checkpointer.
What would be the use case for those two values?Also, as a nitpick, your patch adds unnecessary newlines and I think
stats_reset should be kept as last column in pg_stat_checkpointer as
usual.Michael
Yes, the field is intended to represent the duration of the most
recently completed checkpoint, and I agree that renaming it to
last_checkpoint_duration would make the purpose more clear. Even
though it is a single value, it can still help monitoring tools
capture and store each duration over time, so I’ll refine the naming,
remove the unnecessary newlines, and keep stats_reset as the last
column as suggested.
Regards
Soumya
Import Notes
Reply to msg id not found: 692422e0.170a0220.1bbf32.3601SMTPIN_ADDED_BROKEN@mx.google.com
On Mon, Nov 24, 2025 at 3:37 PM Álvaro Herrera <alvherre@kurilemu.de> wrote:
On 2025-Nov-24, Michael Banck wrote:
In general I doubt how much those gauges (as oppposed to counters) only
pertaining to the last checkpoint are useful in pg_stat_checkpointer.
What would be the use case for those two values?I think it's useful to know how long checkpoint has to work. It's a bit
lame to have only one duration (the last one), but at least with this
arrangement you can have external monitoring software connect to the
server, extract that value and save it somewhere else. Monitoring
systems do this all the time, and we've been waiting for a better
implementation to store monitoring data inside Postgres for years. I
think we shouldn't block this proposal just because of this issue,
because it can clearly be useful.However, I'm not sure I'm very interested in knowing only the duration
of the checkpoint. I mean, much of the time the duration is going to be
whatever fraction of the checkpoint timeout you have as
checkpoint_completion_target, right? Which includes sleeps. So I think
you really want two durations: one is the duration itself, and the other
is what fraction of that did the checkpointer sleep in order to achieve
that duration. So you know how much time checkpointer spent trying to
get the operating system do stuff rather than just sit there waiting.
We already have that data, kinda, in write_time and sync_time, but those
are cumulative rather than just for the last one. (I guess you can have
the monitoring system compute the deltas as it finds each new
checkpoint.) I'm not sure how good this system is.
Thank you for the detailed thoughts. I agree that having only the last
checkpoint’s duration is limited, but it still gives monitoring tools
a concrete value they can sample and store over time, which is better
than relying only on counters and logs. I will try whether separating
total duration and actual active write/sync time (vs. sleep time) can
be exposed in a more clearer way, as that seems useful for deeper
diagnosis.
In the past, I looked at a couple of monitoring dashboards offered by
cloud vendors, searching for anything valuable in terms of checkpoints.
What I saw was very disappointing -- mostly just "how many checkpoints
per minute", which is mostly flat zero with periodic spikes. Totally
useless. Does anybody know if some vendor has good charts for this?
Also, if we were to add this new proposed duration, how could these
charts improve?
I will look into this in more depth. Will let you know if I find
something concrete.
Regards
Soumya
El mié, 26 nov 2025, 11:14, Soumya S Murali <soumyamurali.work@gmail.com>
escribió:
On Mon, Nov 24, 2025 at 3:37 PM Álvaro Herrera <alvherre@kurilemu.de>
wrote:In the past, I looked at a couple of monitoring dashboards offered by
cloud vendors, searching for anything valuable in terms of checkpoints.
What I saw was very disappointing -- mostly just "how many checkpoints
per minute", which is mostly flat zero with periodic spikes. Totally
useless. Does anybody know if some vendor has good charts for this?
Also, if we were to add this new proposed duration, how could these
charts improve?I will look into this in more depth. Will let you know if I find
something concrete.
There is a "Checkpoints" section in the pgbadger reports, and that's
probably the most widely used tool.
Regards
Juan José Santamaría Flecha
Show quoted text
Hi,
On Wed, Nov 26, 2025 at 06:23:08PM +0100, Juan José Santamaría Flecha wrote:
El mié, 26 nov 2025, 11:14, Soumya S Murali <soumyamurali.work@gmail.com>
escribió:
There is a "Checkpoints" section in the pgbadger reports, and that's
probably the most widely used tool.
That one parses the Postgres logs, so is unaffected by the changes to
pg_stat_checkpointer discussed here.
Michael
Hi all,
On Wed, Nov 26, 2025 at 11:11 PM Michael Banck <mbanck@gmx.net> wrote:
Hi,
On Wed, Nov 26, 2025 at 06:23:08PM +0100, Juan José Santamaría Flecha wrote:
El mié, 26 nov 2025, 11:14, Soumya S Murali <soumyamurali.work@gmail.com>
escribió:
There is a "Checkpoints" section in the pgbadger reports, and that's
probably the most widely used tool.That one parses the Postgres logs, so is unaffected by the changes to
pg_stat_checkpointer discussed here.
Thank you for the suggestions. I will refer to how pgbadger visualizes
checkpoints and also will check whether any other monitoring tools
provide meaningful checkpoint charts. If I find anything useful, I’ll
share it.
Regards,
Soumya
Import Notes
Reply to msg id not found: 69273bd9.050a0220.1a3113.efd7SMTPIN_ADDED_BROKEN@mx.google.com
Hi all,
I have updated the code based on the feedback received to my earlier
mails and prepared a patch for further review. In this patch, I have
renamed the checkpoint_total_time to last_checkpoint_duration,
stats_reset has been kept as the last column following the usual
pattern, last_checkpoint_duration and last_checkpoint_time will now be
overwritten per checkpoint and also have removed unnecessary lines as
per the usual format. I had successfully verified the checkpointer
duration with different write loads and I am attaching the
observations for further reference.
pgbench -c 8 -j 8 -T 30 -p 55432 postgres
pgbench (19devel)
starting vacuum...end.
transaction type: <builtin: TPC-B (sort of)>
scaling factor: 50
query mode: simple
number of clients: 8
number of threads: 8
maximum number of tries: 1
duration: 30 s
number of transactions actually processed: 55936
number of failed transactions: 0 (0.000%)
latency average = 4.290 ms
initial connection time = 7.107 ms
tps = 1864.846690 (without initial connection time)
pgbench -c 16 -j 8 -T 60 -p 55432 postgres
pgbench (19devel)
starting vacuum...end.
transaction type: <builtin: TPC-B (sort of)>
scaling factor: 50
query mode: simple
number of clients: 16
number of threads: 8
maximum number of tries: 1
duration: 60 s
number of transactions actually processed: 196974
number of failed transactions: 0 (0.000%)
latency average = 4.873 ms
initial connection time = 12.535 ms
tps = 3283.407286 (without initial connection time)
postgres=# SELECT last_checkpoint_duration, last_checkpoint_time,
write_time, sync_time, buffers_written FROM pg_stat_checkpoint
er;
last_checkpoint_duration | last_checkpoint_time |
write_time | sync_time | buffers_written
--------------------------+----------------------------------+------------+-----------+-----------------
23940 | 2025-11-28 10:02:29.298905+05:30 |
104873 | 811 | 3468
(1 row)
CHECKPOINT
sleep 1
postgres=# SELECT last_checkpoint_duration, last_checkpoint_time,
write_time, sync_time, buffers_written FROM pg_stat_checkpointer;
last_checkpoint_duration | last_checkpoint_time |
write_time | sync_time | buffers_written
--------------------------+----------------------------------+------------+-----------+-----------------
332 | 2025-11-28 10:03:57.828072+05:30 |
104979 | 857 | 10453
(1 row)
2025-11-28 10:03:57.828 IST [11343] LOG: checkpoint complete
(immediate): wrote 6985 buffers (42.6%), wrote 11 SLRU buffers; 0 WAL
file(s) added, 0 removed, 32 recycled; write=0.106 s, sync=0.046 s,
total=0.332 s; sync files=23, longest=0.004 s, average=0.002 s;
distance=538440 kB, estimate=540445 kB; lsn=0/84DDA138, redo
lsn=0/84DDA0E0
I hope these observations are helpful for further analysis. Thank you
for the earlier reviews and helpful suggestions. Looking forward to
more feedback.
Regards,
Soumya
Attachments:
0001-Improve-checkpoint-logging-and-expose-last-checkpoin.patchapplication/x-patch; name=0001-Improve-checkpoint-logging-and-expose-last-checkpoin.patchDownload
From 5ac32acb618b563f3e8088afe9f026651c820b8b Mon Sep 17 00:00:00 2001
From: BharatDB <bharatdbpg@gmail.com>
Date: Thu, 27 Nov 2025 16:43:00 +0530
Subject: [PATCH] Improve checkpoint logging and expose last checkpoint
duration in pg_stat_checkpointer
Signed-off-by: BharatDB <bharatdbpg@gmail.com>
---
src/backend/access/transam/xlog.c | 2 +-
src/backend/catalog/system_views.sql | 6 ++--
.../utils/activity/pgstat_checkpointer.c | 32 +++----------------
src/backend/utils/adt/pgstatfuncs.c | 8 +++--
src/include/catalog/pg_proc.dat | 4 +--
src/include/pgstat.h | 2 +-
6 files changed, 17 insertions(+), 37 deletions(-)
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 9217508917..4a45f4f708 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -6778,7 +6778,7 @@ LogCheckpointEnd(bool restartpoint, int flags)
/* Store in PendingCheckpointerStats */
- PendingCheckpointerStats.checkpoint_total_time += (double) total_msecs;
+ PendingCheckpointerStats.last_checkpoint_duration = (double) total_msecs;
PendingCheckpointerStats.last_checkpoint_time = CheckpointStats.ckpt_end_t;
/* Publishing it */
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 903e001d95..a90f64494f 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1190,9 +1190,9 @@ CREATE VIEW pg_stat_checkpointer AS
pg_stat_get_checkpointer_sync_time() AS sync_time,
pg_stat_get_checkpointer_buffers_written() AS buffers_written,
pg_stat_get_checkpointer_slru_written() AS slru_written,
- pg_stat_get_checkpointer_stat_reset_time() AS stats_reset,
- pg_stat_get_checkpointer_checkpoint_total_time() AS checkpoint_total_time,
- pg_stat_get_checkpointer_last_checkpoint_time() AS last_checkpoint_time;
+ pg_stat_get_checkpointer_last_checkpoint_duration() AS last_checkpoint_duration,
+ pg_stat_get_checkpointer_last_checkpoint_time() AS last_checkpoint_time,
+ pg_stat_get_checkpointer_stat_reset_time() AS stats_reset;
CREATE VIEW pg_stat_io AS
SELECT
diff --git a/src/backend/utils/activity/pgstat_checkpointer.c b/src/backend/utils/activity/pgstat_checkpointer.c
index 62ef427b82..ec51874af7 100644
--- a/src/backend/utils/activity/pgstat_checkpointer.c
+++ b/src/backend/utils/activity/pgstat_checkpointer.c
@@ -56,10 +56,13 @@ pgstat_report_checkpointer(void)
CHECKPOINTER_ACC(sync_time);
CHECKPOINTER_ACC(buffers_written);
CHECKPOINTER_ACC(slru_written);
- CHECKPOINTER_ACC(checkpoint_total_time);
#undef CHECKPOINTER_ACC
/* only overwrite if we actually have a new timestamp */
+ if (PendingCheckpointerStats.last_checkpoint_duration > 0)
+ stats_shmem->stats.last_checkpoint_duration =
+ PendingCheckpointerStats.last_checkpoint_duration;
+
if (PendingCheckpointerStats.last_checkpoint_time != 0)
stats_shmem->stats.last_checkpoint_time =
PendingCheckpointerStats.last_checkpoint_time;
@@ -77,28 +80,6 @@ pgstat_report_checkpointer(void)
pgstat_flush_io(false);
}
-/* ------------------------------------------------------------
- * Extended checkpointer stats reporting function
- * ------------------------------------------------------------
- */
-void
-pgstat_report_checkpointer_extended(long total_msecs, TimestampTz end_time)
-{
-
- PgStat_CheckpointerStats *checkpointer_stats;
-
-
- checkpointer_stats = pgstat_fetch_stat_checkpointer();
- if (!checkpointer_stats)
- return;
-
-
- checkpointer_stats->checkpoint_total_time += total_msecs;
- checkpointer_stats->last_checkpoint_time = end_time;
-
-}
-
-
/*
* pgstat_fetch_stat_checkpointer() -
*
@@ -164,11 +145,8 @@ pgstat_checkpointer_snapshot_cb(void)
CHECKPOINTER_COMP(sync_time);
CHECKPOINTER_COMP(buffers_written);
CHECKPOINTER_COMP(slru_written);
- CHECKPOINTER_COMP(checkpoint_total_time);
+ CHECKPOINTER_COMP(last_checkpoint_duration);
#undef CHECKPOINTER_COMP
pgStatLocal.snapshot.checkpointer.last_checkpoint_time = stats_shmem->stats.last_checkpoint_time;
-
- elog(LOG, "DBG snapshot_cb: copied last_checkpoint_time=%ld",
- (long) pgStatLocal.snapshot.checkpointer.last_checkpoint_time);
}
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 591ad2ac88..57a1853ab1 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -2293,13 +2293,15 @@ pg_stat_have_stats(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(pgstat_have_entry(kind, dboid, objid));
}
-PG_FUNCTION_INFO_V1(pg_stat_get_checkpointer_checkpoint_total_time);
+PG_FUNCTION_INFO_V1(pg_stat_get_checkpointer_last_checkpoint_duration);
Datum
-pg_stat_get_checkpointer_checkpoint_total_time(PG_FUNCTION_ARGS)
+pg_stat_get_checkpointer_last_checkpoint_duration(PG_FUNCTION_ARGS)
{
PgStat_CheckpointerStats *stats = pgstat_fetch_stat_checkpointer();
- PG_RETURN_FLOAT8(stats->checkpoint_total_time);
+ if (!stats)
+ PG_RETURN_NULL();
+ PG_RETURN_FLOAT8(stats->last_checkpoint_duration);
}
PG_FUNCTION_INFO_V1(pg_stat_get_checkpointer_last_checkpoint_time);
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index a57053c4e2..043bf854bc 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5983,9 +5983,9 @@
# New functions for checkpointer
{ oid => '7000',
descr => 'total time spent in last checkpoint in milliseconds',
- proname => 'pg_stat_get_checkpointer_checkpoint_total_time', provolatile => 's',
+ proname => 'pg_stat_get_checkpointer_last_checkpoint_duration', provolatile => 's',
proparallel => 'r', prorettype => 'float8', proargtypes => '',
- prosrc => 'pg_stat_get_checkpointer_checkpoint_total_time' },
+ prosrc => 'pg_stat_get_checkpointer_last_checkpoint_duration' },
{ oid => '7001',
descr => 'timestamp of last checkpoint completion',
proname => 'pg_stat_get_checkpointer_last_checkpoint_time', provolatile => 's',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index a8eb1f8add..73688041c8 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -263,7 +263,7 @@ typedef struct PgStat_CheckpointerStats
PgStat_Counter sync_time;
PgStat_Counter buffers_written;
PgStat_Counter slru_written;
- PgStat_Counter checkpoint_total_time; /* new: total ms of last checkpoint */
+ PgStat_Counter last_checkpoint_duration; /* new: total ms of last checkpoint */
TimestampTz last_checkpoint_time; /* new: end time of last checkpoint */
TimestampTz stat_reset_timestamp;
} PgStat_CheckpointerStats;
--
2.34.1
Import Notes
Reply to msg id not found: 69243c0c.170a0220.c0ed6.e063SMTPIN_ADDED_BROKEN@mx.google.com
Hi,
On Fri, Nov 28, 2025 at 10:23:54AM +0530, Soumya S Murali wrote:
I have updated the code based on the feedback received to my earlier
mails and prepared a patch for further review.
I think the logging change and the pg_stat_checkpointer changes are
different enough that they should be separate patches. If not just
because the logging change seems to not have had any non-positive
feedback.
In this patch, I have renamed the checkpoint_total_time to
last_checkpoint_duration, stats_reset has been kept as the last column
following the usual pattern, last_checkpoint_duration and
last_checkpoint_time will now be overwritten per checkpoint and also
have removed unnecessary lines as per the usual format. I had
successfully verified the checkpointer duration with different write
loads and I am attaching the observations for further reference.
I am still not convinced of the usefulness of those changes to
pg_stat_checkpointer, but some feedback on the patch:
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 9217508917..4a45f4f708 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -6778,7 +6778,7 @@ LogCheckpointEnd(bool restartpoint, int flags)
/* Store in PendingCheckpointerStats */
- PendingCheckpointerStats.checkpoint_total_time += (double) total_msecs;
+ PendingCheckpointerStats.last_checkpoint_duration = (double) total_msecs;
PendingCheckpointerStats.last_checkpoint_time = CheckpointStats.ckpt_end_t;
[...]
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index a8eb1f8add..73688041c8 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -263,7 +263,7 @@ typedef struct PgStat_CheckpointerStats
PgStat_Counter sync_time;
PgStat_Counter buffers_written;
PgStat_Counter slru_written;
- PgStat_Counter checkpoint_total_time; /* new: total ms of last checkpoint */
+ PgStat_Counter last_checkpoint_duration; /* new: total ms of last checkpoint */
TimestampTz last_checkpoint_time; /* new: end time of last checkpoint */
TimestampTz stat_reset_timestamp;
} PgStat_CheckpointerStats;
This looks like an incremental patch based on your original one? It is
customary to send the full, updated, patch again.
Michael
Hi all,
On Fri, Nov 28, 2025 at 10:23:54AM +0530, Soumya S Murali wrote:
I have updated the code based on the feedback received to my earlier
mails and prepared a patch for further review.I think the logging change and the pg_stat_checkpointer changes are
different enough that they should be separate patches. If not just
because the logging change seems to not have had any non-positive
feedback.
Thank you for the review and for the clarification. I understand the
point about separating the logging change and the pg_stat_checkpointer
additions. As per the suggestion, I will make sure to split them into
two independent patches before sending the updated one.
In this patch, I have renamed the checkpoint_total_time to
last_checkpoint_duration, stats_reset has been kept as the last column
following the usual pattern, last_checkpoint_duration and
last_checkpoint_time will now be overwritten per checkpoint and also
have removed unnecessary lines as per the usual format. I had
successfully verified the checkpointer duration with different write
loads and I am attaching the observations for further reference.I am still not convinced of the usefulness of those changes to
pg_stat_checkpointer, but some feedback on the patch:
According to my understanding, The monitoring systems can already poll
pg_stat_checkpointer at a reasonable frequency but with the checkpoint
duration values exposed, I think it will be easier to compute - the
checkpoint deltas, fluctuations in duration, notice unusualities and
the timing instabilities in WAL-driven checkpoints etc. These may seem
simple but are useful signals that many existing monitoring dashboards
lack today.
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 9217508917..4a45f4f708 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -6778,7 +6778,7 @@ LogCheckpointEnd(bool restartpoint, int flags)/* Store in PendingCheckpointerStats */ - PendingCheckpointerStats.checkpoint_total_time += (double) total_msecs; + PendingCheckpointerStats.last_checkpoint_duration = (double) total_msecs; PendingCheckpointerStats.last_checkpoint_time = CheckpointStats.ckpt_end_t;[...]
diff --git a/src/include/pgstat.h b/src/include/pgstat.h index a8eb1f8add..73688041c8 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -263,7 +263,7 @@ typedef struct PgStat_CheckpointerStats PgStat_Counter sync_time; PgStat_Counter buffers_written; PgStat_Counter slru_written; - PgStat_Counter checkpoint_total_time; /* new: total ms of last checkpoint */ + PgStat_Counter last_checkpoint_duration; /* new: total ms of last checkpoint */ TimestampTz last_checkpoint_time; /* new: end time of last checkpoint */ TimestampTz stat_reset_timestamp; } PgStat_CheckpointerStats;This looks like an incremental patch based on your original one? It is
customary to send the full, updated, patch again.Michael
Ok noted. I will resend a full updated patch set soon and will make
sure every updation goes as per the intended flow.
Thank you for the guidance. Looking forward to more feedback.
Regards,
Soumya
Import Notes
Reply to msg id not found: 69295893.170a0220.59948.3a2cSMTPIN_ADDED_BROKEN@mx.google.com
Hi,
On Mon, Dec 01, 2025 at 11:05:19AM +0530, Soumya S Murali wrote:
On Fri, Nov 28, 2025 at 10:23:54AM +0530, Soumya S Murali wrote:
I am still not convinced of the usefulness of those changes to
pg_stat_checkpointer, but some feedback on the patch:According to my understanding, The monitoring systems can already poll
pg_stat_checkpointer at a reasonable frequency but with the checkpoint
duration values exposed, I think it will be easier to compute - the
checkpoint deltas, fluctuations in duration, notice unusualities and
the timing instabilities in WAL-driven checkpoints etc. These may seem
simple but are useful signals that many existing monitoring dashboards
lack today.
How would such a computation look like? Maybe if you give an example, it
would be easier to understand how this would make things better/more
robust.
I mentioned up-thread that one problem would be multiple checkpoints
having happened between two monitoring runs, where the monitoring system
sees the duration of the last checkpoint, but maybe more than one
happened. Should they keep track of the number of overall checkpoints
and adjust in that case?
To be more general: we don't store the last duration anywhere else (as
far as I can see, happy to be prove wrong), why is this essential for
checkpoint duration, and not other things? Or to put it another way: why
does the patch change it for checkpoint but not all the other places?
Michael
Hi all,
Thank you for the review and kind feedback.
On Mon, Dec 1, 2025 at 1:45 PM Michael Banck <mbanck@gmx.net> wrote:
Hi,
On Mon, Dec 01, 2025 at 11:05:19AM +0530, Soumya S Murali wrote:
On Fri, Nov 28, 2025 at 10:23:54AM +0530, Soumya S Murali wrote:
I am still not convinced of the usefulness of those changes to
pg_stat_checkpointer, but some feedback on the patch:According to my understanding, The monitoring systems can already poll
pg_stat_checkpointer at a reasonable frequency but with the checkpoint
duration values exposed, I think it will be easier to compute - the
checkpoint deltas, fluctuations in duration, notice unusualities and
the timing instabilities in WAL-driven checkpoints etc. These may seem
simple but are useful signals that many existing monitoring dashboards
lack today.How would such a computation look like? Maybe if you give an example, it
would be easier to understand how this would make things better/more
robust.
Consider a monitoring agent polls pg_stat_checkpointer every 30
seconds, It will read total write_time, total sync_time, counters and
the last checkpoint duration and timestamp (as in my proposal). Even
if multiple checkpoints happen between two samples, having the last
duration and last timestamp allows the monitoring system to spot
sudden slow checkpoints. For eg:- Imagine if the
last_checkpoint_duration suddenly jumps from approx (300 ms to 5000
ms), the monitoring system can alert immediately, even if multiple
checkpoints happened in between. But this is hard to find out purely
from cumulative write_time/sync_time without doing complex delta
calculations. And also If the timestamp shows checkpoints happening
much closer together than expected, the tool can alert it as “unusual
high checkpoint frequency” indicating any of the cases like an
aggressive WAL-producing workload errors, checkpoint_completion_target
not being met or I/O layer becoming saturated. This type of detection
becomes easier when the last checkpoint’s end time is visible
directly.
I mentioned up-thread that one problem would be multiple checkpoints
having happened between two monitoring runs, where the monitoring system
sees the duration of the last checkpoint, but maybe more than one
happened. Should they keep track of the number of overall checkpoints
and adjust in that case?To be more general: we don't store the last duration anywhere else (as
far as I can see, happy to be prove wrong), why is this essential for
checkpoint duration, and not other things? Or to put it another way: why
does the patch change it for checkpoint but not all the other places?Michael
You are right that the last duration has not been stored anywhere else
so far and it is a fact that most pg_stat views expose only cumulative
counters. The reason this patch focuses specifically on checkpoints is
that checkpoint timing is one of the few parameters where a single
reading of an event can directly indicate instability and other
irregularities. A single unusual long checkpoint often implies some of
the conditions like backend stalls, WAL flush bottlenecks, extended
buffer recycling, slowdowns in bgwriter or checkpointer I/O
instabilities. So storing the last checkpoint duration is indeed a
small extension, but it offers a direct signal that many monitoring
dashboards currently lack.
I hope this explanation will be helpful to understand more clearly
regarding the patch. Looking forward to more feedback.
Regards,
Soumya
Import Notes
Reply to msg id not found: 692d4eba.170a0220.28ee8a.a80fSMTPIN_ADDED_BROKEN@mx.google.com