From 16b40324a5bc5bbe6e06cbfb86251c907f400151 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Tue, 28 Mar 2023 20:34:31 +0900
Subject: [PATCH v10] Add protections in xlog record APIs against overflows

Before this, it was possible for an extension to create malicious WAL
records that were too large to replay; or that would overflow the
xl_tot_len field, causing potential corruption in WAL record IO ops.

Emitting invalid records was also possible through
pg_logical_emit_message(), which allowed you to emit arbitrary amounts
of data up to 2GB, much higher than the replay limit of approximately
1GB.

This patch adds a limit to the size of an XLogRecord (1020MiB), checks
against overflows, and ensures that the reader infrastructure can read
any max-sized XLogRecords, such that the wal-generating backend will
fail when it attempts to insert unreadable records, instead of that
insertion succeeding but breaking any replication streams.

Author: Matthias van de Meent <boekewurm+postgres@gmail.com>
---
 src/include/access/xlogrecord.h         | 11 ++++
 src/backend/access/transam/xloginsert.c | 73 +++++++++++++++++++++----
 2 files changed, 74 insertions(+), 10 deletions(-)

diff --git a/src/include/access/xlogrecord.h b/src/include/access/xlogrecord.h
index 0d576f7883..0a7b0bb6fc 100644
--- a/src/include/access/xlogrecord.h
+++ b/src/include/access/xlogrecord.h
@@ -54,6 +54,17 @@ typedef struct XLogRecord
 
 #define SizeOfXLogRecord	(offsetof(XLogRecord, xl_crc) + sizeof(pg_crc32c))
 
+/*
+ * XLogReader needs to allocate all the data of an xlog record in a single
+ * chunk.  This means that a single XLogRecord cannot exceed MaxAllocSize
+ * in length if we ignore any allocation overhead of the XLogReader.
+ *
+ * To accommodate some overhead, this value allows for 4M of allocation
+ * overhead, that should be plenty enough for what
+ * DecodeXLogRecordRequiredSpace() expects as extra.
+ */
+#define XLogRecordMaxSize	(1020 * 1024 * 1024)
+
 /*
  * The high 4 bits in xl_info may be used freely by rmgr. The
  * XLR_SPECIAL_REL_UPDATE and XLR_CHECK_CONSISTENCY bits can be passed by
diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c
index 008612e032..86fa6a5276 100644
--- a/src/backend/access/transam/xloginsert.c
+++ b/src/backend/access/transam/xloginsert.c
@@ -33,6 +33,7 @@
 #include "access/xloginsert.h"
 #include "catalog/pg_control.h"
 #include "common/pg_lzcompress.h"
+#include "common/int.h"
 #include "executor/instrument.h"
 #include "miscadmin.h"
 #include "pg_trace.h"
@@ -351,11 +352,13 @@ void
 XLogRegisterData(char *data, uint32 len)
 {
 	XLogRecData *rdata;
+	uint32		result = 0;
 
 	Assert(begininsert_called);
 
 	if (num_rdatas >= max_rdatas)
-		elog(ERROR, "too much WAL data");
+		elog(ERROR, "too much WAL data in main chunk: num_rdatas %d, max_rdatas %d",
+			 num_rdatas, max_rdatas);
 	rdata = &rdatas[num_rdatas++];
 
 	rdata->data = data;
@@ -369,7 +372,11 @@ XLogRegisterData(char *data, uint32 len)
 	mainrdata_last->next = rdata;
 	mainrdata_last = rdata;
 
-	mainrdata_len += len;
+	if (pg_add_u32_overflow(mainrdata_len, len, &result))
+		elog(ERROR, "too much WAL data in main chunk: mainrdata_len %u, len %u",
+			 mainrdata_len, len);
+
+	mainrdata_len = result;
 }
 
 /*
@@ -390,6 +397,7 @@ XLogRegisterBufData(uint8 block_id, char *data, uint32 len)
 {
 	registered_buffer *regbuf;
 	XLogRecData *rdata;
+	uint32 result = 0;
 
 	Assert(begininsert_called);
 
@@ -405,9 +413,9 @@ XLogRegisterBufData(uint8 block_id, char *data, uint32 len)
 	 * regbuf->rdata_len does not grow beyond what
 	 * XLogRecordBlockHeader->data_length can hold.
 	 */
-	if (num_rdatas >= max_rdatas ||
-		regbuf->rdata_len + len > UINT16_MAX)
-		elog(ERROR, "too much WAL data");
+	if (num_rdatas >= max_rdatas)
+		elog(ERROR, "too much WAL data specific to block %u: num_rdatas %u, max_rdatas %u",
+			 block_id, num_rdatas, max_rdatas);
 
 	rdata = &rdatas[num_rdatas++];
 
@@ -416,7 +424,12 @@ XLogRegisterBufData(uint8 block_id, char *data, uint32 len)
 
 	regbuf->rdata_tail->next = rdata;
 	regbuf->rdata_tail = rdata;
-	regbuf->rdata_len += len;
+
+	if (pg_add_u32_overflow(regbuf->rdata_len, len, &result) ||
+		regbuf->rdata_len + len > UINT16_MAX)
+		elog(ERROR, "too much WAL data: rdata_len %u, len %u",
+			 regbuf->rdata_len, len);
+	regbuf->rdata_len = result;
 }
 
 /*
@@ -528,6 +541,7 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
 {
 	XLogRecData *rdt;
 	uint32		total_len = 0;
+	uint32		result = 0;
 	int			block_id;
 	pg_crc32c	rdata_crc;
 	registered_buffer *prev_regbuf = NULL;
@@ -759,7 +773,10 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
 				}
 			}
 
-			total_len += bimg.length;
+			if (pg_add_u32_overflow(total_len, (uint32) bimg.length, &result))
+				elog(ERROR, "too much WAL data: block_id %u, block length %u, total_len %u",
+					 block_id, bimg.length, total_len);
+			total_len = result;
 		}
 
 		if (needs_data)
@@ -776,7 +793,11 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
 			 */
 			bkpb.fork_flags |= BKPBLOCK_HAS_DATA;
 			bkpb.data_length = (uint16) regbuf->rdata_len;
-			total_len += regbuf->rdata_len;
+
+			if (pg_add_u32_overflow(total_len, regbuf->rdata_len, &result))
+				elog(ERROR, "too much WAL data: total_len %u, rdata_len %u",
+					 total_len, regbuf->rdata_len);
+			total_len = result;
 
 			rdt_datas_last->next = regbuf->rdata_head;
 			rdt_datas_last = regbuf->rdata_tail;
@@ -852,12 +873,19 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
 		}
 		rdt_datas_last->next = mainrdata_head;
 		rdt_datas_last = mainrdata_last;
-		total_len += mainrdata_len;
+
+		if (pg_add_u32_overflow(total_len, mainrdata_len, &result))
+			elog(ERROR, "too much WAL data: total_len %u, mainrdata_len %u",
+				 total_len, mainrdata_len);
+		total_len = result;
 	}
 	rdt_datas_last->next = NULL;
 
 	hdr_rdt.len = (scratch - hdr_scratch);
-	total_len += hdr_rdt.len;
+	if (pg_add_u32_overflow(total_len, hdr_rdt.len, &result))
+		elog(ERROR, "too much WAL data: total_len %u, hdr_rdt.len %u",
+			 total_len, hdr_rdt.len);
+	total_len = result;
 
 	/*
 	 * Calculate CRC of the data
@@ -872,6 +900,18 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
 	for (rdt = hdr_rdt.next; rdt != NULL; rdt = rdt->next)
 		COMP_CRC32C(rdata_crc, rdt->data, rdt->len);
 
+	/*
+	 * Ensure that the XLogRecord is not too large.
+	 *
+	 * XLogReader machinery is only able to handle records up to a certain
+	 * size (ignoring machine resource limitations), so make sure we will
+	 * not emit records larger than those sizes we advertise we support.
+	 * This cap is based on DecodeXLogRecordRequiredSpace().
+	 */
+	if (total_len >= XLogRecordMaxSize)
+		elog(ERROR, "too much WAL data: total_len %u, max %u",
+			 total_len, XLogRecordMaxSize);
+
 	/*
 	 * Fill in the fields in the record header. Prev-link is filled in later,
 	 * once we know where in the WAL the record will be inserted. The CRC does
@@ -1297,6 +1337,19 @@ log_newpage_range(Relation rel, ForkNumber forknum,
 void
 InitXLogInsert(void)
 {
+#ifdef USE_ASSERT_CHECKING
+
+	/*
+	 * Check that any records assembled can be decoded.  This is capped
+	 * based on what XLogReader would require at its maximum bound.
+	 * This code path is called once per backend, more than enough for
+	 * this check.
+	 */
+	size_t max_required = DecodeXLogRecordRequiredSpace(XLogRecordMaxSize);
+
+	Assert(AllocSizeIsValid(max_required));
+#endif
+
 	/* Initialize the working areas */
 	if (xloginsert_cxt == NULL)
 	{
-- 
2.40.0

