Make GiST waldump output more descriptive

Started by Kirill Reshke3 months ago2 messages
#1Kirill Reshke
reshkekirill@gmail.com
1 attachment(s)

Hi hackers!

pageinspect and pg_waldump are among my most favorite postgresql
tools. I do often use them for my work. Recently I was busy working
with GiST internals, and spotted a few places in GiST pg_waldump which
can be enhanced to make output more descriptive.

These changes helped me a lot to understand how GiST redo works, so
maybe they are indeed useful?

Patch changes:

1) For XLOG_GIST_PAGE_UPDATE record, patch adds `delete offsets:`
field which describes which offsets on page ought to be deleted by
this walrecord (ntodelete from
gistxlogPageUpdate + data block 0 data). I did not add anything about
ntoinsert/
tuples to insert from this walrecord because of two reasons. First,
insert page offset is not included in the walrecord, since redo
routine always inserts at the end of index page. Secondly, I do not
see how to pretty-print tuple data in waldump (since without schema
knowledge this is basically random bytes). So I decided not to print
anything.

example output:
```
rmgr: Gist len (rec/tot): 106/ 106, tx: 0, lsn:
0/155CD1D8, prev 0/155CD190, desc: PAGE_UPDATE ntodelete 28 ntoinsert
0 delete offsets: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, blkref #0: rel
1663/16384/17020 blk 122

```

2)
For the XLOG_GIST_PAGE_SPLIT record, I simply printed how many tuples
are to be placed on each page after GiST split. I also printed all
fields of gistxlogPageSplit (it used to display `npages` only).

example output:

```
rmgr: Gist len (rec/tot): 10902/ 10902, tx: 1019, lsn:
0/154CBE28, prev 0/154CBDC8, desc: PAGE_SPLIT page_split: splits to 6
pages, origrlink 2, origleaf T, orignsn: 0, markfollowright: T, blk
data 1: adds 5 tuples, blk data 2: adds 4 tuples, blk data 3: adds 3
tuples, blk data 4: adds 2 tuples, blk data 5: adds 1 tuples, blk data
6: adds 1 tuples, blkref #1: rel 1663/16384/17020 blk 45, blkref #2:
rel 1663/16384/17020 blk 108, blkref #3: rel 1663/16384/17020 blk 109,
blkref #4: rel 1663/16384/17020 blk 110, blkref #5: rel
1663/16384/17020 blk 111, blkref #6: rel 1663/16384/17020 blk 112
```

WHYT?

--
Best regards,
Kirill Reshke

Attachments:

v1-0001-Gist-desc-better.patchapplication/octet-stream; name=v1-0001-Gist-desc-better.patchDownload
From f82a651f0c1e4fb3e35e292db297d365265c5d8a Mon Sep 17 00:00:00 2001
From: reshke <reshke@double.cloud>
Date: Thu, 2 Oct 2025 12:23:44 +0000
Subject: [PATCH v1] Gist desc better

---
 src/backend/access/rmgrdesc/gistdesc.c | 47 ++++++++++++++++++++++----
 1 file changed, 41 insertions(+), 6 deletions(-)

diff --git a/src/backend/access/rmgrdesc/gistdesc.c b/src/backend/access/rmgrdesc/gistdesc.c
index a2b84e898f9..3e9eba01eb9 100644
--- a/src/backend/access/rmgrdesc/gistdesc.c
+++ b/src/backend/access/rmgrdesc/gistdesc.c
@@ -15,11 +15,33 @@
 #include "postgres.h"
 
 #include "access/gistxlog.h"
+#include "access/xlogreader.h"
 #include "lib/stringinfo.h"
 
 static void
-out_gistxlogPageUpdate(StringInfo buf, gistxlogPageUpdate *xlrec)
+out_gistxlogPageUpdate(StringInfo buf, XLogReaderState *record, gistxlogPageUpdate *xlrec)
 {
+	appendStringInfo(buf, "ntodelete %u ntoinsert %u",
+				xlrec->ntodelete, xlrec->ntoinsert);
+
+	if (XLogRecHasBlockData(record, 0)) {
+		char* payload = XLogRecGetBlockData(record, 0, NULL);
+		OffsetNumber *todelete = (OffsetNumber *) payload;
+		uint16 i;
+
+		if (xlrec->ntodelete) {
+			appendStringInfo(buf, " delete offsets: ");
+
+			for (i = 0; i < xlrec->ntodelete; ++i) {
+				if (i + 1 != xlrec->ntodelete)
+					appendStringInfo(buf, "%d, ", todelete[i]);
+				else
+					appendStringInfo(buf, "%d", todelete[i]);
+			}
+
+			payload += sizeof(OffsetNumber) * xlrec->ntodelete;
+		}
+	}
 }
 
 static void
@@ -42,10 +64,23 @@ out_gistxlogDelete(StringInfo buf, gistxlogDelete *xlrec)
 }
 
 static void
-out_gistxlogPageSplit(StringInfo buf, gistxlogPageSplit *xlrec)
+out_gistxlogPageSplit(StringInfo buf, XLogReaderState *record, gistxlogPageSplit *xlrec)
 {
-	appendStringInfo(buf, "page_split: splits to %d pages",
-					 xlrec->npage);
+	int i;
+	appendStringInfo(buf, "page_split: splits to %d pages, origrlink %d, origleaf %c, orignsn: %ld, markfollowright: %c",
+					 xlrec->npage, xlrec->origrlink,
+					 xlrec->origleaf ? 'T' : 'F', xlrec->orignsn,
+					 xlrec->markfollowright ? 'T' : 'F');
+
+	for (i = 1; i <= xlrec->npage; ++ i)
+	{
+		int n;
+
+		/* extract the number of tuples */
+		memcpy(&n, XLogRecGetBlockData(record, i, NULL), sizeof(int));
+		appendStringInfo(buf, ", blk data %d: adds %d tuples",
+					 i, n);
+	}
 }
 
 static void
@@ -66,7 +101,7 @@ gist_desc(StringInfo buf, XLogReaderState *record)
 	switch (info)
 	{
 		case XLOG_GIST_PAGE_UPDATE:
-			out_gistxlogPageUpdate(buf, (gistxlogPageUpdate *) rec);
+			out_gistxlogPageUpdate(buf, record, (gistxlogPageUpdate *) rec);
 			break;
 		case XLOG_GIST_PAGE_REUSE:
 			out_gistxlogPageReuse(buf, (gistxlogPageReuse *) rec);
@@ -75,7 +110,7 @@ gist_desc(StringInfo buf, XLogReaderState *record)
 			out_gistxlogDelete(buf, (gistxlogDelete *) rec);
 			break;
 		case XLOG_GIST_PAGE_SPLIT:
-			out_gistxlogPageSplit(buf, (gistxlogPageSplit *) rec);
+			out_gistxlogPageSplit(buf, record, (gistxlogPageSplit *) rec);
 			break;
 		case XLOG_GIST_PAGE_DELETE:
 			out_gistxlogPageDelete(buf, (gistxlogPageDelete *) rec);
-- 
2.43.0

#2BharatDB
bharatdbpg@gmail.com
In reply to: Kirill Reshke (#1)
Re: Make GiST waldump output more descriptive

Hi Kirill and pgsql-hackers,

I came across your patch proposal and decided to give v1 a try on
PostgreSQL 19devel . Applied it manually after a small drift issue with git
apply, rebuilt cleanly, and ran some tests with a GiST index on a point
column (15k inserts for splits + 7.5k deletes + updates + VACUUM FULL for
updates).

I tried your patch and got output exactly like your message says—super
descriptive and helpful for understanding GiST redo! For example:

PAGE_UPDATE (counts print perfectly, offsets skipped when ntodelete=0 as
expected): rmgr: Gist ... desc: PAGE_UPDATE ntodelete 0 ntoinsert 1, blkref
#0: rel 1663/5/16445 blk 0

PAGE_SPLIT (full fields + tuple adds per blk, from one of several splits):
rmgr: Gist ... desc: PAGE_SPLIT page_split: splits to 3 pages, origrlink
-1, origleaf T, orignsn: 0, markfollowright: T, blk data 1: adds 2 tuples,
blk data 2: adds 117 tuples, blk data 3: adds 69 tuples, blkref #1: rel
1663/5/16445 blk 0, blkref #2: ... blk 1, blkref #3: ... blk 2

It makes WAL dumps way easier to follow—no more guessing on deletions or
split balances. No issues here.

I'm looking for further ways if I can contribute anything—maybe testing on
PostGIS for geo GiST, more scenarios, or helping with a v2 rebase? Just say
the word—excited to help!

Best,

Lakshmi G

On Mon, Oct 6, 2025 at 10:52 AM Kirill Reshke <reshkekirill@gmail.com>
wrote:

Show quoted text

Hi hackers!

pageinspect and pg_waldump are among my most favorite postgresql
tools. I do often use them for my work. Recently I was busy working
with GiST internals, and spotted a few places in GiST pg_waldump which
can be enhanced to make output more descriptive.

These changes helped me a lot to understand how GiST redo works, so
maybe they are indeed useful?

Patch changes:

1) For XLOG_GIST_PAGE_UPDATE record, patch adds `delete offsets:`
field which describes which offsets on page ought to be deleted by
this walrecord (ntodelete from
gistxlogPageUpdate + data block 0 data). I did not add anything about
ntoinsert/
tuples to insert from this walrecord because of two reasons. First,
insert page offset is not included in the walrecord, since redo
routine always inserts at the end of index page. Secondly, I do not
see how to pretty-print tuple data in waldump (since without schema
knowledge this is basically random bytes). So I decided not to print
anything.

example output:
```
rmgr: Gist len (rec/tot): 106/ 106, tx: 0, lsn:
0/155CD1D8, prev 0/155CD190, desc: PAGE_UPDATE ntodelete 28 ntoinsert
0 delete offsets: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, blkref #0: rel
1663/16384/17020 blk 122

```

2)
For the XLOG_GIST_PAGE_SPLIT record, I simply printed how many tuples
are to be placed on each page after GiST split. I also printed all
fields of gistxlogPageSplit (it used to display `npages` only).

example output:

```
rmgr: Gist len (rec/tot): 10902/ 10902, tx: 1019, lsn:
0/154CBE28, prev 0/154CBDC8, desc: PAGE_SPLIT page_split: splits to 6
pages, origrlink 2, origleaf T, orignsn: 0, markfollowright: T, blk
data 1: adds 5 tuples, blk data 2: adds 4 tuples, blk data 3: adds 3
tuples, blk data 4: adds 2 tuples, blk data 5: adds 1 tuples, blk data
6: adds 1 tuples, blkref #1: rel 1663/16384/17020 blk 45, blkref #2:
rel 1663/16384/17020 blk 108, blkref #3: rel 1663/16384/17020 blk 109,
blkref #4: rel 1663/16384/17020 blk 110, blkref #5: rel
1663/16384/17020 blk 111, blkref #6: rel 1663/16384/17020 blk 112
```

WHYT?

--
Best regards,
Kirill Reshke