From 85365c33aa0bd9703a5376fb94d1af17ea5ac3b6 Mon Sep 17 00:00:00 2001
From: Evgeny Voropaev <evorop@gmail.com>
Date: Fri, 6 Mar 2026 17:35:40 +0800
Subject: [PATCH v01 3/3] Use Delta Frame of Reference (DFoR) to compress
 prune/freeze records.

A prune/freeze record contains four sequences of integers representing
frozen, redirected, unused, and dead tuples. Using DFoR algorithms, the
`unused` and `dead` sequences are now compressed. The `frozen`
and `redirected` sequences cannot be compressed because the order of
their elements is significant, and DFoR does not support unsorted
sequences yet. The theoretical compression ratio for dfor_u16 can reach
up to 16.

The new GUC wal_prune_dfor_compression controls (enables or
disables) compression for prune/freeze records.

An integral TAP test, 052_prune_dfor_compression.pl, has been
implemented. It demonstrates an average compression ratio of at least 5
when analyzing prune/freeze records in practice.

Author: Evgeny Voropaev <evgeny.voropaev@tantorlabs.com> <evorop@gmail.com>
---
 src/backend/access/heap/heapam_xlog.c         |  12 +-
 src/backend/access/heap/pruneheap.c           | 134 +++++++++++++--
 src/backend/access/rmgrdesc/Makefile          |   1 +
 .../access/rmgrdesc/heapam_xlog_dfor.c        | 109 ++++++++++++
 src/backend/access/rmgrdesc/heapdesc.c        |  49 ++++--
 src/backend/utils/misc/guc_parameters.dat     |   7 +
 src/backend/utils/misc/guc_tables.c           |   1 +
 src/bin/pg_waldump/.gitignore                 |   9 +
 src/bin/pg_waldump/Makefile                   |  26 ++-
 src/include/access/heapam_xlog.h              |   8 +-
 src/include/access/heapam_xlog_dfor.h         | 137 ++++++++++++++++
 src/test/dfor/Makefile                        |   2 +
 .../recovery/t/052_prune_dfor_compression.pl  | 155 ++++++++++++++++++
 13 files changed, 616 insertions(+), 34 deletions(-)
 create mode 100644 src/backend/access/rmgrdesc/heapam_xlog_dfor.c
 create mode 100644 src/include/access/heapam_xlog_dfor.h
 create mode 100644 src/test/recovery/t/052_prune_dfor_compression.pl

diff --git a/src/backend/access/heap/heapam_xlog.c b/src/backend/access/heap/heapam_xlog.c
index 6d39a5fff7c..bbe9d471f4e 100644
--- a/src/backend/access/heap/heapam_xlog.c
+++ b/src/backend/access/heap/heapam_xlog.c
@@ -16,6 +16,7 @@
 
 #include "access/bufmask.h"
 #include "access/heapam.h"
+#include "access/heapam_xlog_dfor.h"
 #include "access/visibilitymap.h"
 #include "access/xlog.h"
 #include "access/xlogutils.h"
@@ -105,11 +106,20 @@ heap_xlog_prune_freeze(XLogReaderState *record)
 		char	   *dataptr = XLogRecGetBlockData(record, 0, &datalen);
 		bool		do_prune;
 
+		/*
+		 * DFoR unpacking needs outer buffers for saving results and for
+		 * allocating containers used during decompression. 2 buffer parts are
+		 * intended for saving sequences of offsets of dead and unused tuples.
+		 * Additional three chunks are needed for internal needs of the
+		 * dfor_unpack function.
+		 */
+		uint8 dfor_buf[5 * DFOR_BUF_PART_SIZE];
+
 		heap_xlog_deserialize_prune_and_freeze(dataptr, xlrec.flags,
 											   &nplans, &plans, &frz_offsets,
 											   &nredirected, &redirected,
 											   &ndead, &nowdead,
-											   &nunused, &nowunused);
+											   &nunused, &nowunused, dfor_buf);
 
 		do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
 
diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index 65c9f393f41..7e6cd793afb 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -16,6 +16,7 @@
 
 #include "access/heapam.h"
 #include "access/heapam_xlog.h"
+#include "access/heapam_xlog_dfor.h"
 #include "access/htup_details.h"
 #include "access/multixact.h"
 #include "access/transam.h"
@@ -202,7 +203,6 @@ static void page_verify_redirects(Page page);
 static bool heap_page_will_freeze(bool did_tuple_hint_fpi, bool do_prune, bool do_hint_prune,
 								  PruneState *prstate);
 
-
 /*
  * Optionally prune and repair fragmentation in the specified page.
  *
@@ -2147,6 +2147,17 @@ heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples,
 	return nplans;
 }
 
+/*
+ * Comparator for offsets.
+ */
+static int
+heap_log_offset_cmp(const void *arg1, const void *arg2)
+{
+	const OffsetNumber *offset1 = arg1;
+	const OffsetNumber *offset2 = arg2;
+	return (*offset1 > *offset2) - (*offset1 < *offset2);
+}
+
 /*
  * Write an XLOG_HEAP2_PRUNE* WAL record
  *
@@ -2201,11 +2212,34 @@ log_heap_prune_and_freeze(Relation relation, Buffer buffer,
 	bool		do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
 	bool		do_set_vm = vmflags & VISIBILITYMAP_VALID_BITS;
 
+	dfor_meta_t dead_meta = { 0 };
+	dfor_meta_t unused_meta = { 0 };
+
+	uint8 dead_meta_pack[MAX_PACKED_META_SIZE];
+	uint8 unused_meta_pack[MAX_PACKED_META_SIZE];
+
+	/*
+	 * Since this code is run in a critical section we can't use dynamic
+	 * allocation during DFoR packing, but we can use buffers allocated in the
+	 * stack. We need at maximum:
+	 * 1) 2 * DFOR_BUF_PART_SIZE
+	 *        - for 2 packed sequences: dead, unused
+	 * 2) 3 * DFOR_BUF_PART_SIZE
+	 * 		  - for internal needs of the dfor_pack function.
+	 *
+	 * Overall, 5 * DFOR_BUF_PART_SIZE
+	 */
+	uint8 dfor_buf[5 * DFOR_BUF_PART_SIZE];
+
 	Assert((vmflags & VISIBILITYMAP_VALID_BITS) == vmflags);
 
 	xlrec.flags = 0;
 	regbuf_flags_heap = REGBUF_STANDARD;
 
+	/* Heuristically estimated threshold for turning on DFoR compression */
+	if (wal_prune_dfor_compression && (ndead > 9 || nunused > 9))
+		xlrec.flags |= XLHP_DFOR_COMPRESSED;
+
 	/*
 	 * We can avoid an FPI of the heap page if the only modification we are
 	 * making to it is to set PD_ALL_VISIBLE and checksums/wal_log_hints are
@@ -2230,6 +2264,10 @@ log_heap_prune_and_freeze(Relation relation, Buffer buffer,
 	if (do_set_vm)
 		XLogRegisterBuffer(1, vmbuffer, 0);
 
+	/*
+	 * xlhp_freeze_plans is array of structures and is not a sequence
+	 * of integers, that is why we cannot use DFoR compression here.
+	 */
 	if (nfrozen > 0)
 	{
 		int			nplans;
@@ -2258,26 +2296,92 @@ log_heap_prune_and_freeze(Relation relation, Buffer buffer,
 		XLogRegisterBufData(0, redirected,
 							sizeof(OffsetNumber[2]) * nredirected);
 	}
-	if (ndead > 0)
+	if ((xlrec.flags & XLHP_DFOR_COMPRESSED) != 0)
 	{
-		xlrec.flags |= XLHP_HAS_DEAD_ITEMS;
+		int dead_pack_res = 0;
+		int unused_pack_res = 0;
 
-		dead_items.ntargets = ndead;
-		XLogRegisterBufData(0, &dead_items,
-							offsetof(xlhp_prune_items, data));
-		XLogRegisterBufData(0, dead,
-							sizeof(OffsetNumber) * ndead);
+		/*
+		 * Dead tuple offsets are subject to be packed with DFoR.
+		 * After that we have:
+		 * 		dead_meta.pack = dfor_buf + DFOR_BUF_PART_SIZE;
+		 */
+		if (ndead > 0)
+		{
+			qsort(dead, ndead, sizeof(OffsetNumber), heap_log_offset_cmp);
+			dead_pack_res = dfor_u16_pack(ndead, dead, DFOR_EXC_USE, &dead_meta,
+										  4 * DFOR_BUF_PART_SIZE, dfor_buf);
+		}
+
+		/*
+		 * Unused tuple offsets are subject to be packed with DFoR.
+		 * After that we have:
+		 * 		unused_meta.pack = dfor_buf + 2 * DFOR_BUF_PART_SIZE;
+		 */
+		if (nunused > 0)
+		{
+			qsort(unused, nunused, sizeof(OffsetNumber), heap_log_offset_cmp);
+			unused_pack_res = dfor_u16_pack(nunused, unused, DFOR_EXC_USE,
+											&unused_meta,
+											4 * DFOR_BUF_PART_SIZE,
+											dfor_buf + DFOR_BUF_PART_SIZE);
+		}
+
+		if (dead_pack_res == 0 && unused_pack_res == 0)
+		{
+			/* All stages of packing have succeeded. We can save DFoR packets
+			 * into log */
+			size_t meta_pack_sz;
+			if (ndead > 0)
+			{
+				xlrec.flags |= XLHP_HAS_DEAD_ITEMS;
+
+				meta_pack_sz = log_heap_prune_and_freeze_pack_meta(
+					&dead_meta, dead_meta_pack);
+
+				XLogRegisterBufData(0, &dead_meta_pack, meta_pack_sz);
+				XLogRegisterBufData(0, dead_meta.pack, dead_meta.nbytes);
+			}
+			if (nunused > 0)
+			{
+				xlrec.flags |= XLHP_HAS_NOW_UNUSED_ITEMS;
+
+				meta_pack_sz = log_heap_prune_and_freeze_pack_meta(
+					&unused_meta, unused_meta_pack);
+
+				XLogRegisterBufData(0, &unused_meta_pack, meta_pack_sz);
+				XLogRegisterBufData(0, unused_meta.pack, unused_meta.nbytes);
+			}
+		}
+		else
+		{
+			/* Otherwise, we can't use DFoR compression */
+			xlrec.flags &= ~XLHP_DFOR_COMPRESSED;
+		}
 	}
-	if (nunused > 0)
+
+	if ((xlrec.flags & XLHP_DFOR_COMPRESSED) == 0)
 	{
-		xlrec.flags |= XLHP_HAS_NOW_UNUSED_ITEMS;
+		if (ndead > 0)
+		{
+			xlrec.flags |= XLHP_HAS_DEAD_ITEMS;
 
-		unused_items.ntargets = nunused;
-		XLogRegisterBufData(0, &unused_items,
-							offsetof(xlhp_prune_items, data));
-		XLogRegisterBufData(0, unused,
-							sizeof(OffsetNumber) * nunused);
+			dead_items.ntargets = ndead;
+			XLogRegisterBufData(0, &dead_items,
+								offsetof(xlhp_prune_items, data));
+			XLogRegisterBufData(0, dead, sizeof(OffsetNumber) * ndead);
+		}
+		if (nunused > 0)
+		{
+			xlrec.flags |= XLHP_HAS_NOW_UNUSED_ITEMS;
+
+			unused_items.ntargets = nunused;
+			XLogRegisterBufData(0, &unused_items,
+								offsetof(xlhp_prune_items, data));
+			XLogRegisterBufData(0, unused, sizeof(OffsetNumber) * nunused);
+		}
 	}
+
 	if (nfrozen > 0)
 		XLogRegisterBufData(0, frz_offsets,
 							sizeof(OffsetNumber) * nfrozen);
diff --git a/src/backend/access/rmgrdesc/Makefile b/src/backend/access/rmgrdesc/Makefile
index cd95eec37f1..49e9c46145f 100644
--- a/src/backend/access/rmgrdesc/Makefile
+++ b/src/backend/access/rmgrdesc/Makefile
@@ -17,6 +17,7 @@ OBJS = \
 	gindesc.o \
 	gistdesc.o \
 	hashdesc.o \
+	heapam_xlog_dfor.o \
 	heapdesc.o \
 	logicalmsgdesc.o \
 	mxactdesc.o \
diff --git a/src/backend/access/rmgrdesc/heapam_xlog_dfor.c b/src/backend/access/rmgrdesc/heapam_xlog_dfor.c
new file mode 100644
index 00000000000..47fa000e367
--- /dev/null
+++ b/src/backend/access/rmgrdesc/heapam_xlog_dfor.c
@@ -0,0 +1,109 @@
+#include "lib/bitpack_u16.h"
+#include "access/heapam_xlog_dfor.h"
+
+bool wal_prune_dfor_compression = true;
+
+size_t
+log_heap_prune_and_freeze_pack_meta(const dfor_meta_t *meta, uint8 buf[])
+{
+	size_t caret = 0;
+	caret = bitpack_u16_pack(buf, caret, meta->item_cnt,
+							 XLHPF_META_ITEM_COUNT_SZ);
+	caret = bitpack_u16_pack(buf, caret, meta->delta_wid,
+							 XLHPF_META_DELTA_WIDTH_SZ);
+	caret = bitpack_u16_pack(buf, caret, (meta->exc_cnt == 0) ? 0 : 1,
+							 XLHPF_META_EXCEPTION_FLAG_SZ);
+	if (meta->exc_cnt != 0)
+	{
+		caret = bitpack_u16_pack(buf, caret, meta->exc_cnt,
+								 XLHPF_META_EXCEPTION_COUNT_SZ);
+		caret = bitpack_u16_pack(buf, caret, meta->exc_wid,
+								 XLHPF_META_EXCEPTION_WIDTH_SZ);
+		caret = bitpack_u16_pack(buf, caret, meta->exc_pos_wid,
+								 XLHPF_META_EXCEPTION_POSITION_WIDTH_SZ);
+	}
+	Assert(meta->item_cnt != 0);
+	Assert(meta->delta_wid != 0);
+	Assert(meta->nbytes != 0);
+
+#ifdef USE_ASSERT_CHECKING
+	{
+		dfor_meta_t checker;
+		log_heap_prune_and_freeze_unpack_meta(&checker, buf);
+		Assert(meta->item_cnt == checker.item_cnt);
+		Assert(meta->delta_wid == checker.delta_wid);
+		Assert(meta->exc_cnt == checker.exc_cnt);
+		Assert(meta->exc_wid == checker.exc_wid);
+		Assert(meta->exc_pos_wid == checker.exc_pos_wid);
+	}
+#endif
+	return (caret + 7) / 8; /* the length of packed dfor_meta, in bytes*/
+}
+
+size_t
+log_heap_prune_and_freeze_unpack_meta(dfor_meta_t *meta,
+									  const uint8 packed_meta[])
+{
+	size_t caret = 0;
+	bool exc;
+
+	meta->item_cnt = bitpack_u16_unpack(packed_meta, &caret,
+										XLHPF_META_ITEM_COUNT_SZ);
+	meta->delta_wid = bitpack_u16_unpack(packed_meta, &caret,
+										 XLHPF_META_DELTA_WIDTH_SZ);
+	exc = bitpack_u16_unpack(packed_meta, &caret, XLHPF_META_EXCEPTION_FLAG_SZ);
+
+	if (exc)
+	{
+		meta->exc_cnt = bitpack_u16_unpack(packed_meta, &caret,
+										 XLHPF_META_EXCEPTION_COUNT_SZ);
+		meta->exc_wid = bitpack_u16_unpack(packed_meta, &caret,
+										 XLHPF_META_EXCEPTION_WIDTH_SZ);
+		meta->exc_pos_wid =
+			bitpack_u16_unpack(packed_meta, &caret,
+							   XLHPF_META_EXCEPTION_POSITION_WIDTH_SZ);
+	}
+	else
+	{
+		meta->exc_cnt = 0;
+		meta->exc_wid = 0;
+		meta->exc_pos_wid = 0;
+	}
+	meta->nbytes = dfor_u16_calc_nbytes(*meta);
+
+	Assert(meta->item_cnt != 0);
+	Assert(meta->delta_wid != 0);
+	Assert(meta->nbytes != 0);
+
+	return (caret + 7) / 8;
+}
+
+void
+heap_xlog_deserialize_dfor(char **cursor, int *nitems,
+						   OffsetNumber **items,
+						   uint8 dfor_buf[])
+{
+	dfor_meta_t dfor = {0};
+	size_t packed_meta_nbytes;
+	uniqsortvect_u16_t vect;
+
+	packed_meta_nbytes =
+		log_heap_prune_and_freeze_unpack_meta(&dfor, (uint8*) *cursor);
+
+	*cursor += packed_meta_nbytes;
+
+	dfor.pack = (uint8 *)*cursor;
+	dfor_u16_unpack(&dfor, &vect, 4 * DFOR_BUF_PART_SIZE,
+					dfor_buf);
+
+	*cursor += dfor.nbytes;
+
+	Assert(dfor.nbytes != 0);
+
+	Assert(vect.cnt != 0);
+	Assert(vect.mem_is_outer == true);
+	Assert((void*)vect.m == (void*)dfor_buf);
+
+	*nitems = vect.cnt;
+	*items = vect.m;
+}
diff --git a/src/backend/access/rmgrdesc/heapdesc.c b/src/backend/access/rmgrdesc/heapdesc.c
index 02ae91653c1..e8e7569da9c 100644
--- a/src/backend/access/rmgrdesc/heapdesc.c
+++ b/src/backend/access/rmgrdesc/heapdesc.c
@@ -15,6 +15,7 @@
 #include "postgres.h"
 
 #include "access/heapam_xlog.h"
+#include "access/heapam_xlog_dfor.h"
 #include "access/rmgrdesc_utils.h"
 #include "access/visibilitymapdefs.h"
 #include "storage/standbydefs.h"
@@ -108,7 +109,8 @@ heap_xlog_deserialize_prune_and_freeze(char *cursor, uint16 flags,
 									   OffsetNumber **frz_offsets,
 									   int *nredirected, OffsetNumber **redirected,
 									   int *ndead, OffsetNumber **nowdead,
-									   int *nunused, OffsetNumber **nowunused)
+									   int *nunused, OffsetNumber **nowunused,
+									   uint8 dfor_buf[])
 {
 	if (flags & XLHP_HAS_FREEZE_PLANS)
 	{
@@ -146,14 +148,22 @@ heap_xlog_deserialize_prune_and_freeze(char *cursor, uint16 flags,
 
 	if (flags & XLHP_HAS_DEAD_ITEMS)
 	{
-		xlhp_prune_items *subrecord = (xlhp_prune_items *) cursor;
+		if(!(flags & XLHP_DFOR_COMPRESSED))
+		{
+			xlhp_prune_items *subrecord = (xlhp_prune_items *) cursor;
 
-		*ndead = subrecord->ntargets;
-		Assert(*ndead > 0);
-		*nowdead = subrecord->data;
+			*ndead = subrecord->ntargets;
+			Assert(*ndead > 0);
+			*nowdead = subrecord->data;
 
-		cursor += offsetof(xlhp_prune_items, data);
-		cursor += sizeof(OffsetNumber) * *ndead;
+			cursor += offsetof(xlhp_prune_items, data);
+			cursor += sizeof(OffsetNumber) * *ndead;
+		}
+		else
+		{
+			heap_xlog_deserialize_dfor(&cursor, ndead, nowdead,
+									   dfor_buf);
+		}
 	}
 	else
 	{
@@ -163,14 +173,22 @@ heap_xlog_deserialize_prune_and_freeze(char *cursor, uint16 flags,
 
 	if (flags & XLHP_HAS_NOW_UNUSED_ITEMS)
 	{
-		xlhp_prune_items *subrecord = (xlhp_prune_items *) cursor;
+		if(!(flags & XLHP_DFOR_COMPRESSED))
+		{
+			xlhp_prune_items *subrecord = (xlhp_prune_items *) cursor;
 
-		*nunused = subrecord->ntargets;
-		Assert(*nunused > 0);
-		*nowunused = subrecord->data;
+			*nunused = subrecord->ntargets;
+			Assert(*nunused > 0);
+			*nowunused = subrecord->data;
 
-		cursor += offsetof(xlhp_prune_items, data);
-		cursor += sizeof(OffsetNumber) * *nunused;
+			cursor += offsetof(xlhp_prune_items, data);
+			cursor += sizeof(OffsetNumber) * *nunused;
+		}
+		else
+		{
+			heap_xlog_deserialize_dfor(&cursor, nunused, nowunused,
+									   dfor_buf + DFOR_BUF_PART_SIZE);
+		}
 	}
 	else
 	{
@@ -309,13 +327,16 @@ heap2_desc(StringInfo buf, XLogReaderState *record)
 			xlhp_freeze_plan *plans;
 			OffsetNumber *frz_offsets;
 
+			uint8 dfor_buf[5 * DFOR_BUF_PART_SIZE];
+
 			char	   *cursor = XLogRecGetBlockData(record, 0, &datalen);
 
 			heap_xlog_deserialize_prune_and_freeze(cursor, xlrec->flags,
 												   &nplans, &plans, &frz_offsets,
 												   &nredirected, &redirected,
 												   &ndead, &nowdead,
-												   &nunused, &nowunused);
+												   &nunused, &nowunused,
+												   dfor_buf);
 
 			appendStringInfo(buf, ", nplans: %u, nredirected: %u, ndead: %u, nunused: %u",
 							 nplans, nredirected, ndead, nunused);
diff --git a/src/backend/utils/misc/guc_parameters.dat b/src/backend/utils/misc/guc_parameters.dat
index 5ee84a639d8..dfd71f2924e 100644
--- a/src/backend/utils/misc/guc_parameters.dat
+++ b/src/backend/utils/misc/guc_parameters.dat
@@ -3398,6 +3398,13 @@
   boot_val => 'false',
 },
 
+{ name => 'wal_prune_dfor_compression', type => 'bool', context => 'PGC_SUSET', group => 'WAL_SETTINGS',
+  short_desc => 'Compress dead and unused offset arrays at PRUNE/FREEZE WAL records using DFOR.',
+  long_desc => 'Enables compression of dead and unused OffsetNumber arrays stored in heap PRUNE/FREEZE WAL records using customised delta frame-of-reference encoding.',
+  variable => 'wal_prune_dfor_compression',
+  boot_val => 'true'
+},
+
 { name => 'wal_receiver_create_temp_slot', type => 'bool', context => 'PGC_SIGHUP', group => 'REPLICATION_STANDBY',
   short_desc => 'Sets whether a WAL receiver should create a temporary replication slot if no permanent slot is configured.',
   variable => 'wal_receiver_create_temp_slot',
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 38aaf82f120..15fe27e11ef 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -31,6 +31,7 @@
 
 #include "access/commit_ts.h"
 #include "access/gin.h"
+#include "access/heapam_xlog_dfor.h"
 #include "access/slru.h"
 #include "access/toast_compression.h"
 #include "access/twophase.h"
diff --git a/src/bin/pg_waldump/.gitignore b/src/bin/pg_waldump/.gitignore
index ec51f41c767..a3c02446b9d 100644
--- a/src/bin/pg_waldump/.gitignore
+++ b/src/bin/pg_waldump/.gitignore
@@ -10,6 +10,7 @@
 /gistdesc.c
 /hashdesc.c
 /heapdesc.c
+/heapam_xlog_dfor.c
 /logicalmsgdesc.c
 /mxactdesc.c
 /nbtdesc.c
@@ -28,5 +29,13 @@
 /xlogreader.c
 /xlogstats.c
 
+# Source files copied from src/backend/lib
+/bitpack_templ.c
+/bitpack_u16.c
+/dfor_templ.c
+/dfor_u16.c
+/vect_templ.c
+/vect_u16.c
+
 # Generated by test suite
 /tmp_check/
diff --git a/src/bin/pg_waldump/Makefile b/src/bin/pg_waldump/Makefile
index 4c1ee649501..f6f84c989b8 100644
--- a/src/bin/pg_waldump/Makefile
+++ b/src/bin/pg_waldump/Makefile
@@ -5,8 +5,9 @@ PGAPPICON=win32
 
 subdir = src/bin/pg_waldump
 top_builddir = ../../..
-include $(top_builddir)/src/Makefile.global
+dfor_dir := $(top_builddir)/src/backend/lib
 
+include $(top_builddir)/src/Makefile.global
 OBJS = \
 	$(RMGRDESCOBJS) \
 	$(WIN32RES) \
@@ -16,9 +17,12 @@ OBJS = \
 	xlogreader.o \
 	xlogstats.o
 
+include $(dfor_dir)/Makefile.dfor
+OBJS += $(OBJS_DFOR)
+
 override CPPFLAGS := -DFRONTEND $(CPPFLAGS)
 
-RMGRDESCSOURCES = $(sort $(notdir $(wildcard $(top_srcdir)/src/backend/access/rmgrdesc/*desc*.c)))
+RMGRDESCSOURCES = $(sort $(notdir $(wildcard $(top_srcdir)/src/backend/access/rmgrdesc/*desc*.c))) heapam_xlog_dfor.c
 RMGRDESCOBJS = $(patsubst %.c,%.o,$(RMGRDESCSOURCES))
 
 
@@ -27,6 +31,24 @@ all: pg_waldump
 pg_waldump: $(OBJS) | submake-libpgport
 	$(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
 
+bitpack_templ.c: % : $(top_srcdir)/src/backend/lib/%
+	rm -f $@ && $(LN_S) $< .
+
+bitpack_u16.c: % : $(top_srcdir)/src/backend/lib/% bitpack_templ.c
+	rm -f $@ && $(LN_S) $< .
+
+dfor_templ.c: % : $(top_srcdir)/src/backend/lib/%
+	rm -f $@ && $(LN_S) $< .
+
+dfor_u16.c: % : $(top_srcdir)/src/backend/lib/% dfor_templ.c
+	rm -f $@ && $(LN_S) $< .
+
+vect_templ.c: % : $(top_srcdir)/src/backend/lib/%
+	rm -f $@ && $(LN_S) $< .
+
+vect_u16.c: % : $(top_srcdir)/src/backend/lib/% vect_templ.c
+	rm -f $@ && $(LN_S) $< .
+
 xlogreader.c: % : $(top_srcdir)/src/backend/access/transam/%
 	rm -f $@ && $(LN_S) $< .
 
diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h
index ce3566ba949..dd1d06860ba 100644
--- a/src/include/access/heapam_xlog.h
+++ b/src/include/access/heapam_xlog.h
@@ -15,6 +15,7 @@
 #define HEAPAM_XLOG_H
 
 #include "access/htup.h"
+#include "access/htup_details.h"
 #include "access/xlogreader.h"
 #include "lib/stringinfo.h"
 #include "storage/buf.h"
@@ -339,6 +340,8 @@ typedef struct xl_heap_prune
 #define		XLHP_VM_ALL_VISIBLE			(1 << 8)
 #define		XLHP_VM_ALL_FROZEN			(1 << 9)
 
+#define		XLHP_DFOR_COMPRESSED		(1 << 10)
+
 /*
  * xlhp_freeze_plan describes how to freeze a group of one or more heap tuples
  * (appears in xl_heap_prune's xlhp_freeze_plans sub-record)
@@ -511,6 +514,7 @@ extern void heap_xlog_deserialize_prune_and_freeze(char *cursor, uint16 flags,
 												   OffsetNumber **frz_offsets,
 												   int *nredirected, OffsetNumber **redirected,
 												   int *ndead, OffsetNumber **nowdead,
-												   int *nunused, OffsetNumber **nowunused);
+												   int *nunused, OffsetNumber **nowunused,
+												   uint8 dfor_buf[]);
 
-#endif							/* HEAPAM_XLOG_H */
+#endif							/* HEAPAM_XLOG_H */
\ No newline at end of file
diff --git a/src/include/access/heapam_xlog_dfor.h b/src/include/access/heapam_xlog_dfor.h
new file mode 100644
index 00000000000..274b14e891e
--- /dev/null
+++ b/src/include/access/heapam_xlog_dfor.h
@@ -0,0 +1,137 @@
+#ifndef HEAPAM_XLOG_DFOR_H
+#define HEAPAM_XLOG_DFOR_H
+
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "lib/dfor_u16.h"
+#include "storage/bufpage.h"
+
+/*
+ * DFoR's meta block for PRUNE/FREEZE record
+ *
+ * A meta block contains parameters required for decompression of the following
+ * DFoR pack. It is densely bit-packed. If the exception flag is zero, fields
+ * pertaining to exceptions is absent, which means that DFoR pack does not
+ * contain exceptions. Calculation of field widths takes into account
+ * next considerations:
+ *
+ * Max Item Count should be more or equal to MaxHeapTuplesPerPage. Since we
+ can't calculate
+ * MaxHeapTuplesPerPage on preprocessor stage, we intentionally overestimate it
+ * as:
+ *       Max Item Count > BLCKSZ / Min Tuple Size = BLCKSZ / 24
+ * to provide a margin. In general, depending on BLCKSZ, it should not result in
+ * DFoR meta block overhead.
+ * For instance, for a block size of 32768, we have Max Item Count = 1366, and
+ * it needs 11 bits width field.
+ *
+ * Size of field Item Count:
+ *       ITEM_COUNT_SZ = log2(MaxItemCount).
+ *
+ * Maximum Delta Width is equal to ITEM_COUNT_SZ. So DELTA_WIDTH_SZ in a DFoR
+ * meta block can be calculated as:
+ *      DELTA_WIDTH_SZ >= log2(Max Delta Width) = log2(ITEM_COUNT_SZ)
+ *
+ * Max Exception Count = 0.1 * MaxItemCount, according to DFoR algorithm which
+ * guarantees that not less than 90% of items will be covered without using
+ * exceptions. So:
+ *      EXCEPTION_COUNT_SZ >= log2(0.1 * MaxItemCount).
+ *
+ * An exception is part of a delta, exceeding choosen delta width. Exception is
+ * saved in separated part of DFoR pack and, since delta width is not less
+ * than 1:
+ *    EXCEPTION_WIDTH_SZ >= log2(Max Delta Width - 1) = log2(ITEM_COUNT_SZ - 1).
+ *
+ * An exception's position shows the position of of a delta to wich the
+ * exception has to be applied. Values of an exception position must cover the
+ * same value range as an Item Count, so the Max Width of an Exception Position
+ * is equal to width of Delta. Consequently, the size of Exception Position
+ * Width calculated as:
+ *     EXCEPTION_POSITION_WIDTH_SIZE = log2(Max Delta Width) = DELTA_WIDTH_SZ
+ *
+ * For example, Meta for BLCKSZ equal to 32768 has next sizes of field
+ * | sect. | byte | bits      |   param           |  size  | range of values |
+ * |-------|------|-----------|-------------------|--------|-----------------|
+ * |  main | 0, 1 | 0-10      | item count        | 11 bit | 1...1365        |
+ * |  main |    1 | 11-14     | delta width       |  4 bit | 1...11          |
+ * |  main |    1 | 15        | extra sect. flag  |  1 bit | 0...1           |
+ * | extra |    2 | 16-23     | exception count   |  8 bit | 0...137         |
+ * | extra |    3 | 24-27     | exception width   |  4 bit | 0...10          |
+ * | extra | 3, 4 | 28-35     | except pos. width |  4 bit | 1...11          |
+ */
+
+/*
+ * The sizes of fields in the compressed DFoR Meta structure of an
+ * XLOG_HEAP2_PRUNE* record.
+ */
+#if BLCKSZ == 32768
+#define XLHPF_META_ITEM_COUNT_SZ  11
+#define XLHPF_META_DELTA_WIDTH_SZ 4
+#elif BLCKSZ == 16384
+#define XLHPF_META_ITEM_COUNT_SZ  10
+#define XLHPF_META_DELTA_WIDTH_SZ 4
+#elif BLCKSZ == 8192
+#define XLHPF_META_ITEM_COUNT_SZ  9
+#define XLHPF_META_DELTA_WIDTH_SZ 4
+#elif BLCKSZ == 4096
+#define XLHPF_META_ITEM_COUNT_SZ  8
+#define XLHPF_META_DELTA_WIDTH_SZ 4
+#elif BLCKSZ == 2048
+#define XLHPF_META_ITEM_COUNT_SZ  7
+#define XLHPF_META_DELTA_WIDTH_SZ 3
+#elif BLCKSZ == 1024
+#define XLHPF_META_ITEM_COUNT_SZ  6
+#define XLHPF_META_DELTA_WIDTH_SZ 3
+#elif BLCKSZ == 512
+#define XLHPF_META_ITEM_COUNT_SZ  5
+#define XLHPF_META_DELTA_WIDTH_SZ 3
+#elif BLCKSZ == 256
+#define XLHPF_META_ITEM_COUNT_SZ  4
+#define XLHPF_META_DELTA_WIDTH_SZ 3
+#elif BLCKSZ == 128
+#define XLHPF_META_ITEM_COUNT_SZ  3
+#define XLHPF_META_DELTA_WIDTH_SZ 2
+#elif BLCKSZ == 64
+#define XLHPF_META_ITEM_COUNT_SZ  2
+#define XLHPF_META_DELTA_WIDTH_SZ 2
+#else
+#error "Unsupported BLCKSZ in XLog Heap And Prune."
+#endif
+
+#define XLHPF_META_EXCEPTION_FLAG_SZ 1 /* Flag about Extra Section presence */
+
+/* Size of Exception Count field */
+#if XLHPF_META_ITEM_COUNT_SZ > 6
+#define XLHPF_META_EXCEPTION_COUNT_SZ XLHPF_META_ITEM_COUNT_SZ - 3
+#else
+#define XLHPF_META_EXCEPTION_COUNT_SZ XLHPF_META_ITEM_COUNT_SZ 3
+#endif
+
+#define XLHPF_META_EXCEPTION_WIDTH_SZ \
+	XLHPF_META_DELTA_WIDTH_SZ /* Size of Exception Width field */
+
+#define XLHPF_META_EXCEPTION_POSITION_WIDTH_SZ \
+	XLHPF_META_DELTA_WIDTH_SZ /* Size of Exception Position width field */
+
+/* Maximal size of packed meta */
+#define MAX_PACKED_META_SIZE \
+	(XLHPF_META_ITEM_COUNT_SZ + XLHPF_META_DELTA_WIDTH_SZ +                  \
+	 XLHPF_META_EXCEPTION_FLAG_SZ + XLHPF_META_EXCEPTION_POSITION_WIDTH_SZ + \
+	 XLHPF_META_EXCEPTION_WIDTH_SZ + XLHPF_META_EXCEPTION_COUNT_SZ + 7) / 8
+
+/* The size of a typical chunk of memory used by dfor_pack */
+#define DFOR_BUF_PART_SIZE MaxHeapTuplesPerPage * sizeof(uint16)
+
+extern bool wal_prune_dfor_compression; /* GUC */
+
+extern size_t log_heap_prune_and_freeze_pack_meta(const dfor_meta_t *meta,
+												  uint8 buf[]);
+
+extern size_t log_heap_prune_and_freeze_unpack_meta(dfor_meta_t *meta,
+													const uint8 packed_meta[]);
+
+extern void heap_xlog_deserialize_dfor(char **cursor, int *nitems,
+									   OffsetNumber **items, uint8 dfor_buf[]);
+
+#endif							/* HEAPAM_XLOG_DFOR_H */
\ No newline at end of file
diff --git a/src/test/dfor/Makefile b/src/test/dfor/Makefile
index f7204baa207..5e30b52d585 100644
--- a/src/test/dfor/Makefile
+++ b/src/test/dfor/Makefile
@@ -42,6 +42,8 @@ check-unit: $(TESTS)
 	cd $(top_builddir)/$(subdir) && \
 	   $(PROVE) $(PROVE_FLAGS) \
 	    -v $(addprefix ./,$(if $(PROVE_TESTS), $(PROVE_TESTS), $(TESTS)))
+# The example of using the check-unit rule:
+#		make check-unit PROVE_TESTS='test_dfor_u16' PROVE_FLAGS='--verbose'
 
 check: check-unit
 
diff --git a/src/test/recovery/t/052_prune_dfor_compression.pl b/src/test/recovery/t/052_prune_dfor_compression.pl
new file mode 100644
index 00000000000..819984aa568
--- /dev/null
+++ b/src/test/recovery/t/052_prune_dfor_compression.pl
@@ -0,0 +1,155 @@
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+# ------------------------------------------------------------
+# Workload generating dead tuples and PRUNE WAL
+# ------------------------------------------------------------
+sub generate_prune_workload
+{
+    my ($node) = @_;
+
+    $node->safe_psql('postgres', q{
+        CREATE TABLE t_prune (
+            id int,
+            val text
+        ) WITH (fillfactor = 100);
+    });
+
+    $node->safe_psql('postgres', q{
+        SET vacuum_freeze_min_age = 0;
+        SET vacuum_freeze_table_age = 0;
+    });
+
+    # Insert many tuples
+    $node->safe_psql('postgres', q{
+        INSERT INTO t_prune
+        SELECT g, 'x'
+        FROM generate_series(1,300000) g;
+    });
+
+    # Delete 80% of records
+    $node->safe_psql('postgres', q{
+        DELETE FROM t_prune
+		WHERE id % 550 <= 540;
+    });
+
+    # VACUUM cycles to trigger PRUNE
+    for my $i (1..3)
+    {
+        $node->safe_psql('postgres', q{ VACUUM FREEZE t_prune; });
+    }
+}
+
+# ------------------------------------------------------------
+# WAL analyzer
+# ------------------------------------------------------------
+sub collect_prune_stats
+{
+    my ($node) = @_;
+
+	my $wal_dir = $node->data_dir . "/pg_wal";
+
+	print "wal_dir=" . $wal_dir ."\n";
+
+    # Find the first WAL segment
+    my @wal_files = sort glob("$wal_dir/[0-9A-F]*");
+    my $start_seg = $wal_files[0];
+
+    die "No WAL files found" unless defined $start_seg;
+
+	# Run pg_waldump on all segments
+	my $cmd = "pg_waldump --rmgr=Heap2 -p $wal_dir $start_seg 2>/dev/null";
+
+    my @lines = `$cmd`;
+
+print "lines=" . @lines . "\n";
+
+    my $records = 0;
+    my $bytes   = 0;
+
+    foreach my $line (@lines)
+    {
+        next unless $line =~ /PRUNE_VACUUM_SCAN/;
+        $records++;
+        if ($line =~ /len \(rec\/tot\):\s*\d+\/\s*(\d+)/)
+        {
+            $bytes += $1;
+        }
+    }
+	print "records=" . $records . "; bytes=" . $bytes ."\n";
+    return ($records, $bytes);
+}
+
+
+# ------------------------------------------------------------
+# Run test on a fresh cluster
+# ------------------------------------------------------------
+sub run_cluster_test
+{
+    my ($name, $compression) = @_;
+
+    my $node = PostgreSQL::Test::Cluster->new($name);
+
+    $node->init;
+
+    $node->append_conf('postgresql.conf', qq{
+		wal_level = replica
+		autovacuum = off
+		wal_prune_dfor_compression = $compression
+	});
+
+    $node->start;
+
+    generate_prune_workload($node);
+
+    $node->stop;
+
+    return collect_prune_stats($node);
+}
+
+# ------------------------------------------------------------
+# Cluster 1: compression OFF
+# ------------------------------------------------------------
+my ($off_count, $off_bytes) = run_cluster_test(
+    "prune_dfor_off",
+    "off"
+);
+
+note("Compression OFF: $off_count records, $off_bytes bytes");
+
+# ------------------------------------------------------------
+# Cluster 2: compression ON
+# ------------------------------------------------------------
+my ($on_count, $on_bytes) = run_cluster_test(
+    "prune_dfor_on",
+    "on"
+);
+
+note("Compression ON: $on_count records, $on_bytes bytes");
+
+# ------------------------------------------------------------
+# Compression ratio
+# ------------------------------------------------------------
+my $ratio = "N/A";
+
+if ($on_bytes > 0)
+{
+    $ratio = sprintf("%.2f", $off_bytes / $on_bytes);
+}
+
+note("Compression ratio (uncompressed/compressed): $ratio");
+note("Numerator   (uncompressed bytes): $off_bytes");
+note("Denominator (compressed bytes):   $on_bytes");
+
+# ------------------------------------------------------------
+# Expect compression benefit
+# ------------------------------------------------------------
+cmp_ok(
+    $ratio, '>=', 5,
+    'DFOR compression should reduce the size of WAL by at least 5 times.'
+);
+
+done_testing();
\ No newline at end of file
-- 
2.53.0

