From 90351be6c08cc6ad5145b243b763b380fa4585af Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Wed, 1 Mar 2023 13:24:19 -0800
Subject: [PATCH v4 09/15] bufmgr: Move relation extension handling into
 ExtendBufferedRel{By,To,}

---
 src/include/pgstat.h                   |   1 +
 src/include/storage/buf_internals.h    |   7 +
 src/include/storage/bufmgr.h           |  49 ++
 src/backend/storage/buffer/bufmgr.c    | 765 +++++++++++++++++++------
 src/backend/storage/buffer/localbuf.c  | 156 ++++-
 src/backend/utils/activity/pgstat_io.c |   8 +-
 src/backend/utils/probes.d             |   6 +-
 doc/src/sgml/monitoring.sgml           |  43 +-
 8 files changed, 853 insertions(+), 182 deletions(-)

diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index f43fac09ede..d608280b990 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -507,6 +507,7 @@ extern PgStat_CheckpointerStats *pgstat_fetch_stat_checkpointer(void);
 extern bool pgstat_bktype_io_stats_valid(PgStat_BktypeIO *context_ops,
 										 BackendType bktype);
 extern void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op);
+extern void pgstat_count_io_op_n(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt);
 extern PgStat_IO *pgstat_fetch_stat_io(void);
 extern const char *pgstat_get_io_context_name(IOContext io_context);
 extern const char *pgstat_get_io_object_name(IOObject io_object);
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index fa5c451b1a9..feca19f5620 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -422,6 +422,13 @@ extern PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr,
 												BlockNumber blockNum);
 extern BufferDesc *LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
 									BlockNumber blockNum, bool *foundPtr, IOContext *io_context);
+extern BlockNumber ExtendBufferedRelLocal(ExtendBufferedWhat eb,
+										  ForkNumber fork,
+										  uint32 flags,
+										  uint32 extend_by,
+										  BlockNumber extend_upto,
+										  Buffer *buffers,
+										  uint32 *extended_by);
 extern void MarkLocalBufferDirty(Buffer buffer);
 extern void DropRelationLocalBuffers(RelFileLocator rlocator,
 									 ForkNumber forkNum,
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index c568336b2b3..4665c82baf2 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -138,6 +138,55 @@ extern void BufferCheckOneLocalPin(Buffer buffer);
 extern Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation,
 								   BlockNumber blockNum);
 
+
+/*
+ * Flags influencing the behaviour of ExtendBufferedRel*.
+ */
+typedef enum ExtendBufferedFlags {
+	EB_SKIP_EXTENSION_LOCK = (1 << 0),
+	EB_IN_RECOVERY = (1 << 1),
+	EB_CREATE_FORK_IF_NEEDED = (1 << 2),
+	EB_LOCK_FIRST = (1 << 3),
+	EB_CLEAR_SIZE_CACHE = (1 << 4),
+
+	/* internal flags follow */
+	EB_LOCK_TARGET = (1 << 5),
+} ExtendBufferedFlags;
+
+/*
+ * To identify the relation - either relation or smgr + relpersistence has to
+ * be specified. Used via the EB_REL()/EB_SMGR() macros below. This allows us
+ * to use the same function for both crash recovery and normal operation.
+ */
+typedef struct ExtendBufferedWhat
+{
+	Relation rel;
+	struct SMgrRelationData *smgr;
+	char relpersistence;
+} ExtendBufferedWhat;
+
+#define EB_REL(p_rel) ((ExtendBufferedWhat){.rel = p_rel})
+#define EB_SMGR(p_smgr, p_relpersistence) ((ExtendBufferedWhat){.smgr = p_smgr, .relpersistence = p_relpersistence})
+
+extern Buffer ExtendBufferedRel(ExtendBufferedWhat eb,
+								ForkNumber forkNum,
+								BufferAccessStrategy strategy,
+								uint32 flags);
+extern BlockNumber ExtendBufferedRelBy(ExtendBufferedWhat eb,
+									   ForkNumber fork,
+									   BufferAccessStrategy strategy,
+									   uint32 flags,
+									   uint32 extend_by,
+									   Buffer *buffers,
+									   uint32 *extended_by);
+extern Buffer ExtendBufferedRelTo(ExtendBufferedWhat eb,
+								  ForkNumber fork,
+								  BufferAccessStrategy strategy,
+								  uint32 flags,
+								  BlockNumber extend_to,
+								  ReadBufferMode mode);
+
+
 extern void InitBufferPoolAccess(void);
 extern void AtEOXact_Buffers(bool isCommit);
 extern void PrintBufferLeakWarning(Buffer buffer);
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index c45ad37ec00..f79e244b74a 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -48,6 +48,7 @@
 #include "storage/buf_internals.h"
 #include "storage/bufmgr.h"
 #include "storage/ipc.h"
+#include "storage/lmgr.h"
 #include "storage/proc.h"
 #include "storage/smgr.h"
 #include "storage/standby.h"
@@ -450,6 +451,22 @@ static Buffer ReadBuffer_common(SMgrRelation smgr, char relpersistence,
 								ForkNumber forkNum, BlockNumber blockNum,
 								ReadBufferMode mode, BufferAccessStrategy strategy,
 								bool *hit);
+static BlockNumber ExtendBufferedRelInternal(ExtendBufferedWhat eb,
+											 ForkNumber fork,
+											 BufferAccessStrategy strategy,
+											 uint32 flags,
+											 uint32 extend_by,
+											 BlockNumber extend_upto,
+											 Buffer *buffers,
+											 uint32 *extended_by);
+static BlockNumber ExtendBufferedRelShared(ExtendBufferedWhat eb,
+										   ForkNumber fork,
+										   BufferAccessStrategy strategy,
+										   uint32 flags,
+										   uint32 extend_by,
+										   BlockNumber extend_upto,
+										   Buffer *buffers,
+										   uint32 *extended_by);
 static bool PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy);
 static void PinBuffer_Locked(BufferDesc *buf);
 static void UnpinBuffer(BufferDesc *buf);
@@ -785,6 +802,166 @@ ReadBufferWithoutRelcache(RelFileLocator rlocator, ForkNumber forkNum,
 							 mode, strategy, &hit);
 }
 
+/*
+ * Convenience wrapper around ExtendBufferedRelBy() extending by one block.
+ */
+Buffer
+ExtendBufferedRel(ExtendBufferedWhat eb,
+				  ForkNumber forkNum,
+				  BufferAccessStrategy strategy,
+				  uint32 flags)
+{
+	Buffer		buf;
+	uint32		extend_by = 1;
+
+	ExtendBufferedRelBy(eb, forkNum, strategy, flags, extend_by,
+						&buf, &extend_by);
+
+	return buf;
+}
+
+/*
+ * Extend relation by multiple blocks.
+ *
+ * Tries to extend the relation by extend_by. Depending on the availability of
+ * resources the relation may end up being extended by a smaller number of
+ * pages. *extended_by is updated to the number of pages the relation has been
+ * extended to.
+ *
+ * buffers needs to be an array that is at least extend_by long. Upon
+ * completion, the first extend_by array elements will point to a pinned
+ * buffer.
+ */
+BlockNumber
+ExtendBufferedRelBy(ExtendBufferedWhat eb,
+					ForkNumber fork,
+					BufferAccessStrategy strategy,
+					uint32 flags,
+					uint32 extend_by,
+					Buffer *buffers,
+					uint32 *extended_by)
+{
+	Assert((eb.rel != NULL) ^ (eb.smgr != NULL));
+	Assert(eb.smgr == NULL || eb.relpersistence != 0);
+	Assert(extend_by > 0);
+
+	if (eb.smgr == NULL)
+	{
+		eb.smgr = RelationGetSmgr(eb.rel);
+		eb.relpersistence = eb.rel->rd_rel->relpersistence;
+	}
+
+	return ExtendBufferedRelInternal(eb, fork, strategy, flags,
+									 extend_by, InvalidBlockNumber,
+									 buffers, extended_by);
+}
+
+Buffer
+ExtendBufferedRelTo(ExtendBufferedWhat eb,
+					ForkNumber fork,
+					BufferAccessStrategy strategy,
+					uint32 flags,
+					BlockNumber extend_to,
+					ReadBufferMode mode)
+{
+	BlockNumber current_size;
+	uint32 extended_by = 0;
+	Buffer buffer = InvalidBuffer;
+	Buffer buffers[64];
+
+	Assert((eb.rel != NULL) ^ (eb.smgr != NULL));
+	Assert(eb.smgr == NULL || eb.relpersistence != 0);
+	Assert(extend_to != InvalidBlockNumber && extend_to > 0);
+	Assert(mode == RBM_NORMAL || mode == RBM_ZERO_ON_ERROR ||
+		   mode == RBM_ZERO_AND_LOCK);
+
+	if (eb.smgr == NULL)
+	{
+		eb.smgr = RelationGetSmgr(eb.rel);
+		eb.relpersistence = eb.rel->rd_rel->relpersistence;
+	}
+
+	/*
+	 * Create the file first if it doesn't exist.  If
+	 * smgr_cached_nblocks[fork] is positive then it must exist, no need for
+	 * an smgrexists call.
+	 */
+	if ((flags & EB_CREATE_FORK_IF_NEEDED) &&
+		(eb.smgr->smgr_cached_nblocks[fork] == 0 ||
+		 eb.smgr->smgr_cached_nblocks[fork] == InvalidBlockNumber) &&
+		!smgrexists(eb.smgr, fork))
+	{
+		LockRelationForExtension(eb.rel, ExclusiveLock);
+
+		/* could have been closed while waiting for lock */
+		eb.smgr = RelationGetSmgr(eb.rel);
+
+		/* recheck, fork might have been created concurrently */
+		if (!smgrexists(eb.smgr, fork))
+			smgrcreate(eb.smgr, fork, flags & EB_IN_RECOVERY);
+
+		UnlockRelationForExtension(eb.rel, ExclusiveLock);
+	}
+
+	/*
+	 * If requested, invalidate size cache, so that smgrnblocks asks the
+	 * kernel.
+	 */
+	if (flags & EB_CLEAR_SIZE_CACHE)
+		eb.smgr->smgr_cached_nblocks[fork] = InvalidBlockNumber;
+
+	/*
+	 * Estimate how many pages we'll need to extend by. This avoids acquiring
+	 * unnecessarily many victim buffers.
+	 */
+	current_size = smgrnblocks(eb.smgr, fork);
+
+	if (mode == RBM_ZERO_AND_LOCK)
+		flags |= EB_LOCK_TARGET;
+
+	while (current_size < extend_to)
+	{
+		uint32 num_pages = lengthof(buffers);
+		BlockNumber first_block;
+
+		if ((uint64) current_size + num_pages > extend_to)
+			num_pages = extend_to - current_size;
+
+		first_block = ExtendBufferedRelInternal(eb, fork, strategy, flags,
+												num_pages, extend_to,
+												buffers, &extended_by);
+
+		current_size = first_block + extended_by;
+		Assert(current_size <= extend_to);
+		Assert(num_pages != 0 || current_size >= extend_to);
+
+		for (int i = 0; i < extended_by; i++)
+		{
+			if (first_block + i + 1 != extend_to)
+				ReleaseBuffer(buffers[i]);
+			else
+				buffer = buffers[i];
+		}
+	}
+
+	/*
+	 * It's possible that another backend concurrently extended the
+	 * relation. In that case read the buffer.
+	 *
+	 * XXX: Should we control this via a flag?
+	 */
+	if (buffer == InvalidBuffer)
+	{
+		bool hit;
+
+		Assert(extended_by == 0);
+		buffer = ReadBuffer_common(eb.smgr, eb.relpersistence,
+								   fork, extend_to - 1, mode, strategy,
+								   &hit);
+	}
+
+	return buffer;
+}
 
 /*
  * ReadBuffer_common -- common logic for all ReadBuffer variants
@@ -801,35 +978,36 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 	bool		found;
 	IOContext	io_context;
 	IOObject	io_object;
-	bool		isExtend;
 	bool		isLocalBuf = SmgrIsTemp(smgr);
 
 	*hit = false;
 
+	/*
+	 * Backward compatibility path, most code should use
+	 * ExtendRelationBuffered() instead, as acquiring the extension lock
+	 * inside ExtendRelationBuffered() scales a lot better.
+	 */
+	if (unlikely(blockNum == P_NEW))
+	{
+		uint32 flags = EB_SKIP_EXTENSION_LOCK;
+
+		Assert(mode == RBM_NORMAL || mode == RBM_ZERO_AND_LOCK);
+
+		if (mode == RBM_ZERO_AND_LOCK)
+			flags |= EB_LOCK_FIRST;
+
+		return ExtendBufferedRel(EB_SMGR(smgr, relpersistence),
+								 forkNum, strategy, flags);
+	}
+
 	/* Make sure we will have room to remember the buffer pin */
 	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
 
-	isExtend = (blockNum == P_NEW);
-
 	TRACE_POSTGRESQL_BUFFER_READ_START(forkNum, blockNum,
 									   smgr->smgr_rlocator.locator.spcOid,
 									   smgr->smgr_rlocator.locator.dbOid,
 									   smgr->smgr_rlocator.locator.relNumber,
-									   smgr->smgr_rlocator.backend,
-									   isExtend);
-
-	/* Substitute proper block number if caller asked for P_NEW */
-	if (isExtend)
-	{
-		blockNum = smgrnblocks(smgr, forkNum);
-		/* Fail if relation is already at maximum possible length */
-		if (blockNum == P_NEW)
-			ereport(ERROR,
-					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-					 errmsg("cannot extend relation %s beyond %u blocks",
-							relpath(smgr->smgr_rlocator, forkNum),
-							P_NEW)));
-	}
+									   smgr->smgr_rlocator.backend);
 
 	if (isLocalBuf)
 	{
@@ -843,8 +1021,6 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 		bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found, &io_context);
 		if (found)
 			pgBufferUsage.local_blks_hit++;
-		else if (isExtend)
-			pgBufferUsage.local_blks_written++;
 		else if (mode == RBM_NORMAL || mode == RBM_NORMAL_NO_LOG ||
 				 mode == RBM_ZERO_ON_ERROR)
 			pgBufferUsage.local_blks_read++;
@@ -859,8 +1035,6 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 							 strategy, &found, &io_context);
 		if (found)
 			pgBufferUsage.shared_blks_hit++;
-		else if (isExtend)
-			pgBufferUsage.shared_blks_written++;
 		else if (mode == RBM_NORMAL || mode == RBM_NORMAL_NO_LOG ||
 				 mode == RBM_ZERO_ON_ERROR)
 			pgBufferUsage.shared_blks_read++;
@@ -871,103 +1045,40 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 	/* if it was already in the buffer pool, we're done */
 	if (found)
 	{
-		if (!isExtend)
-		{
-			/* Just need to update stats before we exit */
-			*hit = true;
-			VacuumPageHit++;
+		/* Just need to update stats before we exit */
+		*hit = true;
+		VacuumPageHit++;
 
-			if (VacuumCostActive)
-				VacuumCostBalance += VacuumCostPageHit;
+		if (VacuumCostActive)
+			VacuumCostBalance += VacuumCostPageHit;
 
-			TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum,
-											  smgr->smgr_rlocator.locator.spcOid,
-											  smgr->smgr_rlocator.locator.dbOid,
-											  smgr->smgr_rlocator.locator.relNumber,
-											  smgr->smgr_rlocator.backend,
-											  isExtend,
-											  found);
-
-			/*
-			 * In RBM_ZERO_AND_LOCK mode the caller expects the page to be
-			 * locked on return.
-			 */
-			if (!isLocalBuf)
-			{
-				if (mode == RBM_ZERO_AND_LOCK)
-					LWLockAcquire(BufferDescriptorGetContentLock(bufHdr),
-								  LW_EXCLUSIVE);
-				else if (mode == RBM_ZERO_AND_CLEANUP_LOCK)
-					LockBufferForCleanup(BufferDescriptorGetBuffer(bufHdr));
-			}
-
-			return BufferDescriptorGetBuffer(bufHdr);
-		}
+		TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum,
+										  smgr->smgr_rlocator.locator.spcOid,
+										  smgr->smgr_rlocator.locator.dbOid,
+										  smgr->smgr_rlocator.locator.relNumber,
+										  smgr->smgr_rlocator.backend,
+										  found);
 
 		/*
-		 * We get here only in the corner case where we are trying to extend
-		 * the relation but we found a pre-existing buffer marked BM_VALID.
-		 * This can happen because mdread doesn't complain about reads beyond
-		 * EOF (when zero_damaged_pages is ON) and so a previous attempt to
-		 * read a block beyond EOF could have left a "valid" zero-filled
-		 * buffer.  Unfortunately, we have also seen this case occurring
-		 * because of buggy Linux kernels that sometimes return an
-		 * lseek(SEEK_END) result that doesn't account for a recent write. In
-		 * that situation, the pre-existing buffer would contain valid data
-		 * that we don't want to overwrite.  Since the legitimate case should
-		 * always have left a zero-filled buffer, complain if not PageIsNew.
+		 * In RBM_ZERO_AND_LOCK mode the caller expects the page to be locked
+		 * on return.
 		 */
-		bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr);
-		if (!PageIsNew((Page) bufBlock))
-			ereport(ERROR,
-					(errmsg("unexpected data beyond EOF in block %u of relation %s",
-							blockNum, relpath(smgr->smgr_rlocator, forkNum)),
-					 errhint("This has been seen to occur with buggy kernels; consider updating your system.")));
-
-		/*
-		 * We *must* do smgrextend before succeeding, else the page will not
-		 * be reserved by the kernel, and the next P_NEW call will decide to
-		 * return the same page.  Clear the BM_VALID bit, do the StartBufferIO
-		 * call that BufferAlloc didn't, and proceed.
-		 */
-		if (isLocalBuf)
+		if (!isLocalBuf)
 		{
-			/* Only need to adjust flags */
-			uint32		buf_state = pg_atomic_read_u32(&bufHdr->state);
-
-			Assert(buf_state & BM_VALID);
-			buf_state &= ~BM_VALID;
-			pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
+			if (mode == RBM_ZERO_AND_LOCK)
+				LWLockAcquire(BufferDescriptorGetContentLock(bufHdr),
+							  LW_EXCLUSIVE);
+			else if (mode == RBM_ZERO_AND_CLEANUP_LOCK)
+				LockBufferForCleanup(BufferDescriptorGetBuffer(bufHdr));
 		}
-		else
-		{
-			/*
-			 * Loop to handle the very small possibility that someone re-sets
-			 * BM_VALID between our clearing it and StartBufferIO inspecting
-			 * it.
-			 */
-			do
-			{
-				uint32		buf_state = LockBufHdr(bufHdr);
 
-				Assert(buf_state & BM_VALID);
-				buf_state &= ~BM_VALID;
-				UnlockBufHdr(bufHdr, buf_state);
-			} while (!StartBufferIO(bufHdr, true));
-		}
+		return BufferDescriptorGetBuffer(bufHdr);
 	}
 
 	/*
 	 * if we have gotten to this point, we have allocated a buffer for the
 	 * page but its contents are not yet valid.  IO_IN_PROGRESS is set for it,
 	 * if it's a shared buffer.
-	 *
-	 * Note: if smgrextend fails, we will end up with a buffer that is
-	 * allocated but not marked BM_VALID.  P_NEW will still select the same
-	 * block number (because the relation didn't get any longer on disk) and
-	 * so future attempts to extend the relation will find the same buffer (if
-	 * it's not been recycled) but come right back here to try smgrextend
-	 * again.
 	 */
 	Assert(!(pg_atomic_read_u32(&bufHdr->state) & BM_VALID));	/* spinlock not needed */
 
@@ -982,72 +1093,51 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 		io_object = IOOBJECT_RELATION;
 	}
 
-	if (isExtend)
-	{
-		/* new buffers are zero-filled */
+	/*
+	 * Read in the page, unless the caller intends to overwrite it and just
+	 * wants us to allocate a buffer.
+	 */
+	if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK)
 		MemSet((char *) bufBlock, 0, BLCKSZ);
-		/* don't set checksum for all-zero page */
-		smgrextend(smgr, forkNum, blockNum, bufBlock, false);
-
-		pgstat_count_io_op(io_object, io_context, IOOP_EXTEND);
-
-		/*
-		 * NB: we're *not* doing a ScheduleBufferTagForWriteback here;
-		 * although we're essentially performing a write. At least on linux
-		 * doing so defeats the 'delayed allocation' mechanism, leading to
-		 * increased file fragmentation.
-		 */
-	}
 	else
 	{
-		/*
-		 * Read in the page, unless the caller intends to overwrite it and
-		 * just wants us to allocate a buffer.
-		 */
-		if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK)
-			MemSet((char *) bufBlock, 0, BLCKSZ);
-		else
+		instr_time	io_start,
+					io_time;
+
+		if (track_io_timing)
+			INSTR_TIME_SET_CURRENT(io_start);
+
+		smgrread(smgr, forkNum, blockNum, bufBlock);
+
+		if (track_io_timing)
 		{
-			instr_time	io_start,
-						io_time;
+			INSTR_TIME_SET_CURRENT(io_time);
+			INSTR_TIME_SUBTRACT(io_time, io_start);
+			pgstat_count_buffer_read_time(INSTR_TIME_GET_MICROSEC(io_time));
+			INSTR_TIME_ADD(pgBufferUsage.blk_read_time, io_time);
+		}
 
-			if (track_io_timing)
-				INSTR_TIME_SET_CURRENT(io_start);
+		pgstat_count_io_op(io_object, io_context, IOOP_READ);
+
+		/* check for garbage data */
+		if (!PageIsVerifiedExtended((Page) bufBlock, blockNum,
+									PIV_LOG_WARNING | PIV_REPORT_STAT))
+		{
+			if (mode == RBM_ZERO_ON_ERROR || zero_damaged_pages)
+			{
+				ereport(WARNING,
+						(errcode(ERRCODE_DATA_CORRUPTED),
+						 errmsg("invalid page in block %u of relation %s; zeroing out page",
+								blockNum,
+								relpath(smgr->smgr_rlocator, forkNum))));
+				MemSet((char *) bufBlock, 0, BLCKSZ);
+			}
 			else
-				INSTR_TIME_SET_ZERO(io_start);
-
-			smgrread(smgr, forkNum, blockNum, bufBlock);
-
-			pgstat_count_io_op(io_object, io_context, IOOP_READ);
-
-			if (track_io_timing)
-			{
-				INSTR_TIME_SET_CURRENT(io_time);
-				INSTR_TIME_SUBTRACT(io_time, io_start);
-				pgstat_count_buffer_read_time(INSTR_TIME_GET_MICROSEC(io_time));
-				INSTR_TIME_ADD(pgBufferUsage.blk_read_time, io_time);
-			}
-
-			/* check for garbage data */
-			if (!PageIsVerifiedExtended((Page) bufBlock, blockNum,
-										PIV_LOG_WARNING | PIV_REPORT_STAT))
-			{
-				if (mode == RBM_ZERO_ON_ERROR || zero_damaged_pages)
-				{
-					ereport(WARNING,
-							(errcode(ERRCODE_DATA_CORRUPTED),
-							 errmsg("invalid page in block %u of relation %s; zeroing out page",
-									blockNum,
-									relpath(smgr->smgr_rlocator, forkNum))));
-					MemSet((char *) bufBlock, 0, BLCKSZ);
-				}
-				else
-					ereport(ERROR,
-							(errcode(ERRCODE_DATA_CORRUPTED),
-							 errmsg("invalid page in block %u of relation %s",
-									blockNum,
-									relpath(smgr->smgr_rlocator, forkNum))));
-			}
+				ereport(ERROR,
+						(errcode(ERRCODE_DATA_CORRUPTED),
+						 errmsg("invalid page in block %u of relation %s",
+								blockNum,
+								relpath(smgr->smgr_rlocator, forkNum))));
 		}
 	}
 
@@ -1090,7 +1180,6 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 									  smgr->smgr_rlocator.locator.dbOid,
 									  smgr->smgr_rlocator.locator.relNumber,
 									  smgr->smgr_rlocator.backend,
-									  isExtend,
 									  found);
 
 	return BufferDescriptorGetBuffer(bufHdr);
@@ -1640,6 +1729,350 @@ again:
 	return buf;
 }
 
+/*
+ * Limit the number of pins a batch operation may additionally acquire, to
+ * avoid running out of pinnable buffers.
+ *
+ * One additional pin is always allowed, as otherwise the operation likely
+ * cannot be performed at all.
+ *
+ * The number of allowed pins for a backend is computed based on
+ * shared_buffers and the maximum number of connections possible. That's very
+ * pessimistic, but oustide of toy-sized shared_buffers it should allow
+ * sufficient pins.
+ */
+static void
+LimitAdditionalPins(uint32 *additional_pins)
+{
+	uint32		max_backends;
+	int			max_proportional_pins;
+
+	if (*additional_pins <= 1)
+		return;
+
+	max_backends = MaxBackends + NUM_AUXILIARY_PROCS;
+	max_proportional_pins = NBuffers / max_backends;
+
+	/*
+	 * Subtract the approximate number of buffers already pinned by this
+	 * backend. We get the number of "overflowed" pins for free, but don't
+	 * know the number of pins in PrivateRefCountArray. The cost of
+	 * calculating that exactly doesn't seem worth it, so just assume the max.
+	 */
+	max_proportional_pins -= PrivateRefCountOverflowed + REFCOUNT_ARRAY_ENTRIES;
+
+	if (max_proportional_pins < 0)
+		max_proportional_pins = 1;
+
+	if (*additional_pins > max_proportional_pins)
+		*additional_pins = max_proportional_pins;
+}
+
+static BlockNumber
+ExtendBufferedRelInternal(ExtendBufferedWhat eb,
+						  ForkNumber fork,
+						  BufferAccessStrategy strategy,
+						  uint32 flags,
+						  uint32 extend_by,
+						  BlockNumber extend_upto,
+						  Buffer *buffers,
+						  uint32 *extended_by)
+{
+	BlockNumber first_block;
+
+	TRACE_POSTGRESQL_BUFFER_EXTEND_START(fork,
+										 eb.smgr->smgr_rlocator.locator.spcOid,
+										 eb.smgr->smgr_rlocator.locator.dbOid,
+										 eb.smgr->smgr_rlocator.locator.relNumber,
+										 eb.smgr->smgr_rlocator.backend,
+										 extend_by);
+
+	if (eb.relpersistence == RELPERSISTENCE_TEMP)
+		first_block = ExtendBufferedRelLocal(eb, fork, flags,
+											 extend_by, extend_upto,
+											 buffers, &extend_by);
+	else
+		first_block = ExtendBufferedRelShared(eb, fork, strategy, flags,
+											  extend_by, extend_upto,
+											  buffers, &extend_by);
+	*extended_by = extend_by;
+
+	TRACE_POSTGRESQL_BUFFER_EXTEND_DONE(fork,
+										eb.smgr->smgr_rlocator.locator.spcOid,
+										eb.smgr->smgr_rlocator.locator.dbOid,
+										eb.smgr->smgr_rlocator.locator.relNumber,
+										eb.smgr->smgr_rlocator.backend,
+										*extended_by,
+										first_block);
+
+	return first_block;
+}
+
+/*
+ * Implementation of ExtendBufferedRelBy() and ExtendBufferedRelTo() for
+ * shared buffers.
+ */
+static BlockNumber
+ExtendBufferedRelShared(ExtendBufferedWhat eb,
+						ForkNumber fork,
+						BufferAccessStrategy strategy,
+						uint32 flags,
+						uint32 extend_by,
+						BlockNumber extend_upto,
+						Buffer *buffers,
+						uint32 *extended_by)
+{
+	BlockNumber first_block;
+	IOContext	io_context = IOContextForStrategy(strategy);
+
+	LimitAdditionalPins(&extend_by);
+
+	/*
+	 * Acquire victim buffers for extension without holding extension lock.
+	 * Writing out victim buffers is the most expensive part of extending the
+	 * relation, particularly when doing so requires WAL flushes. Zeroing out
+	 * the buffers is also quite expensive, so do that before holding the
+	 * extension lock as well.
+	 *
+	 * These pages are pinned by us and not valid. While we hold the pin they
+	 * can't be acquired as victim buffers by another backend.
+	 */
+	for (uint32 i = 0; i < extend_by; i++)
+	{
+		Block		buf_block;
+
+		buffers[i] = GetVictimBuffer(strategy, io_context);
+		buf_block = BufHdrGetBlock(GetBufferDescriptor(buffers[i] - 1));
+
+		/* new buffers are zero-filled */
+		MemSet((char *) buf_block, 0, BLCKSZ);
+	}
+
+	/*
+	 * Lock relation against concurrent extensions, unless requested not to.
+	 *
+	 * We use the same extension lock for all forks. That's unnecessarily
+	 * restrictive, but currently extensions for forks don't happen often
+	 * enough to make it worth locking more granularly.
+	 *
+	 * Note that another backend might have extended the relation by the time
+	 * we get the lock.
+	 */
+	if (!(flags & EB_SKIP_EXTENSION_LOCK))
+	{
+		LockRelationForExtension(eb.rel, ExclusiveLock);
+		eb.smgr = RelationGetSmgr(eb.rel);
+	}
+
+	/*
+	 * If requested, invalidate size cache, so that smgrnblocks asks the
+	 * kernel.
+	 */
+	if (flags & EB_CLEAR_SIZE_CACHE)
+		eb.smgr->smgr_cached_nblocks[fork] = InvalidBlockNumber;
+
+	first_block = smgrnblocks(eb.smgr, fork);
+
+	if (extend_upto != InvalidBlockNumber)
+	{
+		uint32 old_num_pages = extend_by;
+
+		if (first_block > extend_upto)
+			extend_by = 0;
+		else if ((uint64) first_block + extend_by > extend_upto)
+			extend_by = extend_upto - first_block;
+
+		for (uint32 i = extend_by; i < old_num_pages; i++)
+		{
+			BufferDesc *buf_hdr = GetBufferDescriptor(buffers[i] - 1);
+
+			/*
+			 * The victim buffer we acquired peviously is clean and unused,
+			 * let it be found again quickly
+			 */
+			StrategyFreeBuffer(buf_hdr);
+			UnpinBuffer(buf_hdr);
+		}
+
+		if (extend_by == 0)
+		{
+			UnlockRelationForExtension(eb.rel, ExclusiveLock);
+			*extended_by = extend_by;
+			return first_block;
+		}
+	}
+
+	/* Fail if relation is already at maximum possible length */
+	if ((uint64) first_block + extend_by >= MaxBlockNumber)
+		ereport(ERROR,
+				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+				 errmsg("cannot extend relation %s beyond %u blocks",
+						relpath(eb.smgr->smgr_rlocator, fork),
+						MaxBlockNumber)));
+
+	/*
+	 * Insert buffers into buffer table, mark as IO_IN_PROGRESS.
+	 *
+	 * This needs to happen before we extend the relation, because as soon as
+	 * we do, other backends can start to read in those pages.
+	 */
+	for (int i = 0; i < extend_by; i++)
+	{
+		Buffer		victim_buf = buffers[i];
+		BufferDesc *victim_buf_hdr = GetBufferDescriptor(victim_buf - 1);
+		BufferTag	tag;
+		uint32		hash;
+		LWLock	   *partition_lock;
+		int			existing_id;
+
+		InitBufferTag(&tag, &eb.smgr->smgr_rlocator.locator, fork, first_block + i);
+		hash = BufTableHashCode(&tag);
+		partition_lock = BufMappingPartitionLock(hash);
+
+		LWLockAcquire(partition_lock, LW_EXCLUSIVE);
+
+		existing_id = BufTableInsert(&tag, hash, victim_buf_hdr->buf_id);
+
+		/*
+		 * We get here only in the corner case where we are trying to extend
+		 * the relation but we found a pre-existing buffer. This can happen
+		 * because a prior attempt at extending the relation failed, and
+		 * because mdread doesn't complain about reads beyond EOF (when
+		 * zero_damaged_pages is ON) and so a previous attempt to read a block
+		 * beyond EOF could have left a "valid" zero-filled buffer.
+		 * Unfortunately, we have also seen this case occurring because of
+		 * buggy Linux kernels that sometimes return an lseek(SEEK_END) result
+		 * that doesn't account for a recent write. In that situation, the
+		 * pre-existing buffer would contain valid data that we don't want to
+		 * overwrite.  Since the legitimate cases should always have left a
+		 * zero-filled buffer, complain if not PageIsNew.
+		 */
+		if (existing_id >= 0)
+		{
+			BufferDesc *existing_hdr = GetBufferDescriptor(existing_id);
+			Block		buf_block;
+			bool		valid;
+
+			/*
+			 * Pin the existing buffer before releasing the partition lock,
+			 * preventing it from being evicted.
+			 */
+			valid = PinBuffer(existing_hdr, strategy);
+
+			LWLockRelease(partition_lock);
+
+			/*
+			 * The victim buffer we acquired peviously is clean and unused,
+			 * let it be found again quickly
+			 */
+			StrategyFreeBuffer(victim_buf_hdr);
+			UnpinBuffer(victim_buf_hdr);
+
+			buffers[i] = BufferDescriptorGetBuffer(existing_hdr);
+			buf_block = BufHdrGetBlock(existing_hdr);
+
+			if (valid && !PageIsNew((Page) buf_block))
+				ereport(ERROR,
+						(errmsg("unexpected data beyond EOF in block %u of relation %s",
+								existing_hdr->tag.blockNum, relpath(eb.smgr->smgr_rlocator, fork)),
+						 errhint("This has been seen to occur with buggy kernels; consider updating your system.")));
+
+			/*
+			 * We *must* do smgr[zero]extend before succeeding, else the page
+			 * will not be reserved by the kernel, and the next P_NEW call
+			 * will decide to return the same page.  Clear the BM_VALID bit,
+			 * do StartBufferIO() and proceed.
+			 *
+			 * Loop to handle the very small possibility that someone re-sets
+			 * BM_VALID between our clearing it and StartBufferIO inspecting
+			 * it.
+			 */
+			do
+			{
+				uint32		buf_state = LockBufHdr(existing_hdr);
+
+				buf_state &= ~BM_VALID;
+				UnlockBufHdr(existing_hdr, buf_state);
+			} while (!StartBufferIO(existing_hdr, true));
+		}
+		else
+		{
+			uint32		buf_state;
+
+			buf_state = LockBufHdr(victim_buf_hdr);
+
+			/* some sanity checks while we hold the buffer header lock */
+			Assert(!(buf_state & (BM_VALID | BM_TAG_VALID | BM_DIRTY | BM_JUST_DIRTIED)));
+			Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 1);
+
+			victim_buf_hdr->tag = tag;
+
+			buf_state |= BM_TAG_VALID | BUF_USAGECOUNT_ONE;
+			if (eb.relpersistence == RELPERSISTENCE_PERMANENT || fork == INIT_FORKNUM)
+				buf_state |= BM_PERMANENT;
+
+			UnlockBufHdr(victim_buf_hdr, buf_state);
+
+			LWLockRelease(partition_lock);
+
+			/* XXX: could combine the locked operations in it with the above */
+			StartBufferIO(victim_buf_hdr, true);
+		}
+	}
+
+	/*
+	 * Note: if smgzerorextend fails, we will end up with buffers that are
+	 * allocated but not marked BM_VALID.  The next relation extension will
+	 * still select the same block number (because the relation didn't get any
+	 * longer on disk) and so future attempts to extend the relation will find
+	 * the same buffers (if they have not been recycled) but come right back
+	 * here to try smgrzeroextend again.
+	 *
+	 * We don't need to set checksum for all-zero pages.
+	 */
+	smgrzeroextend(eb.smgr, fork, first_block, extend_by, false);
+
+	/*
+	 * Release the file-extension lock; it's now OK for someone else to extend
+	 * the relation some more.
+	 *
+	 * We remove IO_IN_PROGRESS after this, as zeroing the buffer contents and
+	 * waking up waiting backends waiting can take noticeable time.
+	 */
+	if (!(flags & EB_SKIP_EXTENSION_LOCK))
+		UnlockRelationForExtension(eb.rel, ExclusiveLock);
+
+	/* Set BM_VALID, terminate IO, and wake up any waiters */
+	for (int i = 0; i < extend_by; i++)
+	{
+		Buffer		buf = buffers[i];
+		BufferDesc *buf_hdr = GetBufferDescriptor(buf - 1);
+		bool		lock = false;
+
+		if (flags & EB_LOCK_FIRST && i == 0)
+			lock = true;
+		else if (flags & EB_LOCK_TARGET)
+		{
+			Assert(extend_upto != InvalidBlockNumber);
+			if (first_block + i + 1 == extend_upto)
+				lock = true;
+		}
+
+		if (lock)
+			LWLockAcquire(BufferDescriptorGetContentLock(buf_hdr), LW_EXCLUSIVE);
+
+		TerminateBufferIO(buf_hdr, false, BM_VALID);
+	}
+
+	pgBufferUsage.shared_blks_written += extend_by;
+	pgstat_count_io_op_n(IOOBJECT_RELATION, io_context, IOOP_EXTEND,
+						 extend_by);
+
+	*extended_by = extend_by;
+
+	return first_block;
+}
+
 /*
  * MarkBufferDirty
  *
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index 5b44b0be8b5..0528fddf992 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -49,6 +49,9 @@ static int	nextFreeLocalBufId = 0;
 
 static HTAB *LocalBufHash = NULL;
 
+/* number of local buffers pinned at least once */
+static int	NLocalPinnedBuffers = 0;
+
 
 static void InitLocalBuffers(void);
 static Block GetLocalBufferStorage(void);
@@ -280,6 +283,154 @@ GetLocalVictimBuffer(void)
 	return BufferDescriptorGetBuffer(bufHdr);
 }
 
+/* see LimitAdditionalPins() */
+static void
+LimitAdditionalLocalPins(uint32 *additional_pins)
+{
+	uint32		max_pins;
+
+	if (*additional_pins <= 1)
+		return;
+
+	/*
+	 * In contrast to LimitAdditionalPins() other backends don't play a role
+	 * here. We can allow up to NLocBuffer pins in total.
+	 */
+	max_pins = (NLocBuffer - NLocalPinnedBuffers);
+
+	if (*additional_pins >= max_pins)
+		*additional_pins = max_pins;
+}
+
+/*
+ * Implementation of ExtendBufferedRelBy() and ExtendBufferedRelTo() for
+ * temporary buffers.
+ */
+BlockNumber
+ExtendBufferedRelLocal(ExtendBufferedWhat eb,
+					   ForkNumber fork,
+					   uint32 flags,
+					   uint32 extend_by,
+					   BlockNumber extend_upto,
+					   Buffer *buffers,
+					   uint32 *extended_by)
+{
+	BlockNumber first_block;
+
+	/* Initialize local buffers if first request in this session */
+	if (LocalBufHash == NULL)
+		InitLocalBuffers();
+
+	LimitAdditionalLocalPins(&extend_by);
+
+	for (uint32 i = 0; i < extend_by; i++)
+	{
+		BufferDesc *buf_hdr;
+		Block		buf_block;
+
+		buffers[i] = GetLocalVictimBuffer();
+		buf_hdr = GetLocalBufferDescriptor(-(buffers[i] + 1));
+		buf_block = LocalBufHdrGetBlock(buf_hdr);
+
+		/* new buffers are zero-filled */
+		MemSet((char *) buf_block, 0, BLCKSZ);
+	}
+
+	first_block = smgrnblocks(eb.smgr, fork);
+
+	if (extend_upto != InvalidBlockNumber)
+	{
+		/*
+		 * In contranst to shared relations, nothing could change the relation
+		 * size concurrently. Thus we shouldn't end up finding that we don't
+		 * need to do anything.
+		 */
+		Assert(first_block <= extend_upto);
+
+		Assert((uint64) first_block + extend_by <= extend_upto);
+	}
+
+	/* Fail if relation is already at maximum possible length */
+	if ((uint64) first_block + extend_by >= MaxBlockNumber)
+		ereport(ERROR,
+				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+				 errmsg("cannot extend relation %s beyond %u blocks",
+						relpath(eb.smgr->smgr_rlocator, fork),
+						MaxBlockNumber)));
+
+	for (int i = 0; i < extend_by; i++)
+	{
+		int			victim_buf_id;
+		BufferDesc *victim_buf_hdr;
+		BufferTag	tag;
+		LocalBufferLookupEnt *hresult;
+		bool		found;
+
+		victim_buf_id = -(buffers[i] + 1);
+		victim_buf_hdr = GetLocalBufferDescriptor(victim_buf_id);
+
+		InitBufferTag(&tag, &eb.smgr->smgr_rlocator.locator, fork, first_block + i);
+
+		hresult = (LocalBufferLookupEnt *)
+			hash_search(LocalBufHash, (void *) &tag, HASH_ENTER, &found);
+		if (found)
+		{
+			BufferDesc *existing_hdr = GetLocalBufferDescriptor(hresult->id);
+			uint32		buf_state;
+
+			UnpinLocalBuffer(BufferDescriptorGetBuffer(victim_buf_hdr));
+
+			existing_hdr = GetLocalBufferDescriptor(hresult->id);
+			PinLocalBuffer(existing_hdr, false);
+			buffers[i] = BufferDescriptorGetBuffer(existing_hdr);
+
+			buf_state = pg_atomic_read_u32(&existing_hdr->state);
+			Assert(buf_state & BM_TAG_VALID);
+			Assert(!(buf_state & BM_DIRTY));
+			buf_state &= BM_VALID;
+			pg_atomic_unlocked_write_u32(&existing_hdr->state, buf_state);
+		}
+		else
+		{
+			uint32		buf_state = pg_atomic_read_u32(&victim_buf_hdr->state);
+
+			Assert(!(buf_state & (BM_VALID | BM_TAG_VALID | BM_DIRTY | BM_JUST_DIRTIED)));
+
+			victim_buf_hdr->tag = tag;
+
+			buf_state |= BM_TAG_VALID | BUF_USAGECOUNT_ONE;
+
+			pg_atomic_unlocked_write_u32(&victim_buf_hdr->state, buf_state);
+
+			hresult->id = victim_buf_id;
+		}
+	}
+
+	/* actually extend relation */
+	smgrzeroextend(eb.smgr, fork, first_block, extend_by, false);
+
+	for (int i = 0; i < extend_by; i++)
+	{
+		Buffer		buf = buffers[i];
+		BufferDesc *buf_hdr;
+		uint32		buf_state;
+
+		buf_hdr = GetLocalBufferDescriptor(-(buf + 1));
+
+		buf_state = pg_atomic_read_u32(&buf_hdr->state);
+		buf_state |= BM_VALID;
+		pg_atomic_unlocked_write_u32(&buf_hdr->state, buf_state);
+	}
+
+	*extended_by = extend_by;
+
+	pgBufferUsage.temp_blks_written += extend_by;
+	pgstat_count_io_op_n(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_EXTEND,
+						 extend_by);
+
+	return first_block;
+}
+
 /*
  * MarkLocalBufferDirty -
  *	  mark a local buffer dirty
@@ -494,6 +645,7 @@ PinLocalBuffer(BufferDesc *buf_hdr, bool adjust_usagecount)
 
 	if (LocalRefCount[bufid] == 0)
 	{
+		NLocalPinnedBuffers++;
 		if (adjust_usagecount &&
 			BUF_STATE_GET_USAGECOUNT(buf_state) < BM_MAX_USAGE_COUNT)
 		{
@@ -515,9 +667,11 @@ UnpinLocalBuffer(Buffer buffer)
 
 	Assert(BufferIsLocal(buffer));
 	Assert(LocalRefCount[buffid] > 0);
+	Assert(NLocalPinnedBuffers > 0);
 
 	ResourceOwnerForgetBuffer(CurrentResourceOwner, buffer);
-	LocalRefCount[buffid]--;
+	if (--LocalRefCount[buffid] == 0)
+		NLocalPinnedBuffers--;
 }
 
 /*
diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c
index af5d5546101..f2f6eae8031 100644
--- a/src/backend/utils/activity/pgstat_io.c
+++ b/src/backend/utils/activity/pgstat_io.c
@@ -64,13 +64,19 @@ pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io,
 
 void
 pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op)
+{
+	pgstat_count_io_op_n(io_object, io_context, io_op, 1);
+}
+
+void
+pgstat_count_io_op_n(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt)
 {
 	Assert((unsigned int) io_object < IOOBJECT_NUM_TYPES);
 	Assert((unsigned int) io_context < IOCONTEXT_NUM_TYPES);
 	Assert((unsigned int) io_op < IOOP_NUM_TYPES);
 	Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op));
 
-	PendingIOStats.data[io_object][io_context][io_op]++;
+	PendingIOStats.data[io_object][io_context][io_op] += cnt;
 
 	have_iostats = true;
 }
diff --git a/src/backend/utils/probes.d b/src/backend/utils/probes.d
index c064d679e94..fd3df2f7900 100644
--- a/src/backend/utils/probes.d
+++ b/src/backend/utils/probes.d
@@ -55,10 +55,12 @@ provider postgresql {
 	probe sort__start(int, bool, int, int, bool, int);
 	probe sort__done(bool, long);
 
-	probe buffer__read__start(ForkNumber, BlockNumber, Oid, Oid, Oid, int, bool);
-	probe buffer__read__done(ForkNumber, BlockNumber, Oid, Oid, Oid, int, bool, bool);
+	probe buffer__read__start(ForkNumber, BlockNumber, Oid, Oid, Oid, int);
+	probe buffer__read__done(ForkNumber, BlockNumber, Oid, Oid, Oid, int, bool);
 	probe buffer__flush__start(ForkNumber, BlockNumber, Oid, Oid, Oid);
 	probe buffer__flush__done(ForkNumber, BlockNumber, Oid, Oid, Oid);
+	probe buffer__extend__start(ForkNumber, Oid, Oid, Oid, int, unsigned int);
+	probe buffer__extend__done(ForkNumber, Oid, Oid, Oid, int, unsigned int, BlockNumber);
 
 	probe buffer__checkpoint__start(int);
 	probe buffer__checkpoint__sync__start();
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 77bb13ba0a7..62a7dd2785c 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -7713,33 +7713,52 @@ FROM pg_stat_get_backend_idset() AS backendid;
      <entry>Probe that fires when the two-phase portion of a checkpoint is
       complete.</entry>
     </row>
+    <row>
+     <entry><literal>buffer-extend-start</literal></entry>
+     <entry><literal>(ForkNumber, BlockNumber, Oid, Oid, Oid, int, unsigned int)</literal></entry>
+     <entry>Probe that fires when a relation extension starts.
+       arg0 contains the fork to be extended. arg1, arg2, and arg3 contain the
+       tablespace, database, and relation OIDs identifying the relation.  arg4
+       is the ID of the backend which created the temporary relation for a
+       local buffer, or <symbol>InvalidBackendId</symbol> (-1) for a shared
+       buffer. arg5 is the number of blocks the caller would like to extend
+       by.</entry>
+    </row>
+    <row>
+     <entry><literal>buffer-extend-done</literal></entry>
+     <entry><literal>(ForkNumber, BlockNumber, Oid, Oid, Oid, int, unsigned int, BlockNumber)</literal></entry>
+     <entry>Probe that fires when a relation extension is complete.
+       arg0 contains the fork to be extended. arg1, arg2, and arg3 contain the
+       tablespace, database, and relation OIDs identifying the relation.  arg4
+       is the ID of the backend which created the temporary relation for a
+       local buffer, or <symbol>InvalidBackendId</symbol> (-1) for a shared
+       buffer. arg5 is the number of blocks the relation was extended by, this
+       can be less than the number in the
+       <literal>buffer-extend-start</literal> due to resource
+       constraints. arg6 contains the BlockNumber of the first new
+       block.</entry>
+    </row>
     <row>
      <entry><literal>buffer-read-start</literal></entry>
-     <entry><literal>(ForkNumber, BlockNumber, Oid, Oid, Oid, int, bool)</literal></entry>
+     <entry><literal>(ForkNumber, BlockNumber, Oid, Oid, Oid, int)</literal></entry>
      <entry>Probe that fires when a buffer read is started.
-      arg0 and arg1 contain the fork and block numbers of the page (but
-      arg1 will be -1 if this is a relation extension request).
+      arg0 and arg1 contain the fork and block numbers of the page.
       arg2, arg3, and arg4 contain the tablespace, database, and relation OIDs
       identifying the relation.
       arg5 is the ID of the backend which created the temporary relation for a
       local buffer, or <symbol>InvalidBackendId</symbol> (-1) for a shared buffer.
-      arg6 is true for a relation extension request, false for normal
-      read.</entry>
+      </entry>
     </row>
     <row>
      <entry><literal>buffer-read-done</literal></entry>
-     <entry><literal>(ForkNumber, BlockNumber, Oid, Oid, Oid, int, bool, bool)</literal></entry>
+     <entry><literal>(ForkNumber, BlockNumber, Oid, Oid, Oid, int, bool)</literal></entry>
      <entry>Probe that fires when a buffer read is complete.
-      arg0 and arg1 contain the fork and block numbers of the page (if this
-      is a relation extension request, arg1 now contains the block number
-      of the newly added block).
+      arg0 and arg1 contain the fork and block numbers of the page.
       arg2, arg3, and arg4 contain the tablespace, database, and relation OIDs
       identifying the relation.
       arg5 is the ID of the backend which created the temporary relation for a
       local buffer, or <symbol>InvalidBackendId</symbol> (-1) for a shared buffer.
-      arg6 is true for a relation extension request, false for normal
-      read.
-      arg7 is true if the buffer was found in the pool, false if not.</entry>
+      arg6 is true if the buffer was found in the pool, false if not.</entry>
     </row>
     <row>
      <entry><literal>buffer-flush-start</literal></entry>
-- 
2.38.0

