From 6fffe00e39ec837cb08afb57bce413b8fad456ed Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Tue, 17 Mar 2020 17:26:41 +1300
Subject: [PATCH v6 5/8] Allow PrefetchBuffer() to report what happened.

Report whether a prefetch was actually initiated due to a cache miss, so
that callers can limit the number of concurrent I/Os they try to issue,
without counting the prefetch calls that did nothing because the page
was already in our buffers.

If the requested block was already cached, return a valid buffer.  This
might enable future code to avoid a buffer mapping lookup, though it
will need to recheck the buffer before using it because it's not pinned
so could be reclaimed at any time.

Report neither hit nor miss when a relation's backing file is missing,
to prepare for use during recovery.  This will be used to handle cases
of relations that are referenced in the WAL but have been unlinked
already due to actions covered by WAL records that haven't been replayed
yet, after a crash.

Reviewed-by: Alvaro Herrera <alvherre@2ndquadrant.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/CA%2BhUKGJ4VJN8ttxScUFM8dOKX0BrBiboo5uz1cq%3DAovOddfHpA%40mail.gmail.com
---
 src/backend/storage/buffer/bufmgr.c   | 57 +++++++++++++++++++++------
 src/backend/storage/buffer/localbuf.c | 18 ++++++---
 src/backend/storage/smgr/md.c         |  9 ++++-
 src/backend/storage/smgr/smgr.c       | 10 +++--
 src/include/storage/buf_internals.h   |  5 ++-
 src/include/storage/bufmgr.h          | 19 ++++++---
 src/include/storage/md.h              |  2 +-
 src/include/storage/smgr.h            |  2 +-
 8 files changed, 90 insertions(+), 32 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 22087a1c3c..23f269ae74 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -483,14 +483,14 @@ static int	ts_ckpt_progress_comparator(Datum a, Datum b, void *arg);
 /*
  * Implementation of PrefetchBuffer() for shared buffers.
  */
-void
+PrefetchBufferResult
 PrefetchSharedBuffer(struct SMgrRelationData *smgr_reln,
 					 ForkNumber forkNum,
 					 BlockNumber blockNum)
 {
-#ifdef USE_PREFETCH
-	BufferTag	newTag;		/* identity of requested block */
-	uint32		newHash;	/* hash value for newTag */
+	PrefetchBufferResult result = {InvalidBuffer, false};
+	BufferTag	newTag;			/* identity of requested block */
+	uint32		newHash;		/* hash value for newTag */
 	LWLock	   *newPartitionLock;	/* buffer partition lock for it */
 	int			buf_id;
 
@@ -511,7 +511,25 @@ PrefetchSharedBuffer(struct SMgrRelationData *smgr_reln,
 
 	/* If not in buffers, initiate prefetch */
 	if (buf_id < 0)
-		smgrprefetch(smgr_reln, forkNum, blockNum);
+	{
+#ifdef USE_PREFETCH
+		/*
+		 * Try to initiate an asynchronous read.  This returns false in
+		 * recovery if the relation file doesn't exist.
+		 */
+		if (smgrprefetch(smgr_reln, forkNum, blockNum))
+			result.initiated_io = true;
+#endif							/* USE_PREFETCH */
+	}
+	else
+	{
+		/*
+		 * Report the buffer it was in at that time.  The caller may be able
+		 * to avoid a buffer table lookup, but it's not pinned and it must be
+		 * rechecked!
+		 */
+		result.recent_buffer = buf_id + 1;
+	}
 
 	/*
 	 * If the block *is* in buffers, we do nothing.  This is not really ideal:
@@ -524,7 +542,8 @@ PrefetchSharedBuffer(struct SMgrRelationData *smgr_reln,
 	 * additional per-buffer state, and it's not clear that there's enough of
 	 * a problem to justify that.
 	 */
-#endif							/* USE_PREFETCH */
+
+	return result;
 }
 
 /*
@@ -533,12 +552,27 @@ PrefetchSharedBuffer(struct SMgrRelationData *smgr_reln,
  * This is named by analogy to ReadBuffer but doesn't actually allocate a
  * buffer.  Instead it tries to ensure that a future ReadBuffer for the given
  * block will not be delayed by the I/O.  Prefetching is optional.
- * No-op if prefetching isn't compiled in.
+ *
+ * There are three possible outcomes:
+ *
+ * 1.  If the block is already cached, the result includes a valid buffer that
+ * could be used by the caller to avoid the need for a later buffer lookup, but
+ * it's not pinned, so the caller must recheck it.
+ *
+ * 2.  If the kernel has been asked to initiate I/O, the initated_io member is
+ * true.  Currently there is no way to know if the data was already cached by
+ * the kernel and therefore didn't really initiate I/O, and no way to know when
+ * the I/O completes other than using synchronous ReadBuffer().
+ *
+ * 3.  Otherwise, the buffer wasn't already cached by PostgreSQL, and either
+ * USE_PREFETCH is not defined (this build doesn't support prefetching due to
+ * lack of a kernel facility), or the underlying relation file was found and we
+ * are in recovery.  (If the relation file isn't found and we are not in
+ * recovery, an error is raised).
  */
-void
+PrefetchBufferResult
 PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
 {
-#ifdef USE_PREFETCH
 	Assert(RelationIsValid(reln));
 	Assert(BlockNumberIsValid(blockNum));
 
@@ -554,14 +588,13 @@ PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
 					 errmsg("cannot access temporary tables of other sessions")));
 
 		/* pass it off to localbuf.c */
-		PrefetchLocalBuffer(reln->rd_smgr, forkNum, blockNum);
+		return PrefetchLocalBuffer(reln->rd_smgr, forkNum, blockNum);
 	}
 	else
 	{
 		/* pass it to the shared buffer version */
-		PrefetchSharedBuffer(reln->rd_smgr, forkNum, blockNum);
+		return PrefetchSharedBuffer(reln->rd_smgr, forkNum, blockNum);
 	}
-#endif							/* USE_PREFETCH */
 }
 
 
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index b528bc9553..1614ca03ea 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -60,11 +60,11 @@ static Block GetLocalBufferStorage(void);
  * Do PrefetchBuffer's work for temporary relations.
  * No-op if prefetching isn't compiled in.
  */
-void
+PrefetchBufferResult
 PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum,
 					BlockNumber blockNum)
 {
-#ifdef USE_PREFETCH
+	PrefetchBufferResult result = { InvalidBuffer, false };
 	BufferTag	newTag;			/* identity of requested block */
 	LocalBufferLookupEnt *hresult;
 
@@ -81,12 +81,18 @@ PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum,
 	if (hresult)
 	{
 		/* Yes, so nothing to do */
-		return;
+		result.recent_buffer = -hresult->id - 1;
 	}
-
-	/* Not in buffers, so initiate prefetch */
-	smgrprefetch(smgr, forkNum, blockNum);
+	else
+	{
+#ifdef USE_PREFETCH
+		/* Not in buffers, so initiate prefetch */
+		smgrprefetch(smgr, forkNum, blockNum);
+		result.initiated_io = true;
 #endif							/* USE_PREFETCH */
+	}
+
+	return result;
 }
 
 
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index ee9822c6e1..e0b020da11 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -524,14 +524,17 @@ mdclose(SMgrRelation reln, ForkNumber forknum)
 /*
  *	mdprefetch() -- Initiate asynchronous read of the specified block of a relation
  */
-void
+bool
 mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 {
 #ifdef USE_PREFETCH
 	off_t		seekpos;
 	MdfdVec    *v;
 
-	v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_FAIL);
+	v = _mdfd_getseg(reln, forknum, blocknum, false,
+					 InRecovery ? EXTENSION_RETURN_NULL : EXTENSION_FAIL);
+	if (v == NULL)
+		return false;
 
 	seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
 
@@ -539,6 +542,8 @@ mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 
 	(void) FilePrefetch(v->mdfd_vfd, seekpos, BLCKSZ, WAIT_EVENT_DATA_FILE_PREFETCH);
 #endif							/* USE_PREFETCH */
+
+	return true;
 }
 
 /*
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 72c9696ad1..b053a4dc76 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -49,7 +49,7 @@ typedef struct f_smgr
 								bool isRedo);
 	void		(*smgr_extend) (SMgrRelation reln, ForkNumber forknum,
 								BlockNumber blocknum, char *buffer, bool skipFsync);
-	void		(*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
+	bool		(*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
 								  BlockNumber blocknum);
 	void		(*smgr_read) (SMgrRelation reln, ForkNumber forknum,
 							  BlockNumber blocknum, char *buffer);
@@ -524,11 +524,15 @@ smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 
 /*
  *	smgrprefetch() -- Initiate asynchronous read of the specified block of a relation.
+ *
+ *		In recovery only, this can return false to indicate that a file
+ *		doesn't	exist (presumably it has been dropped by a later WAL
+ *		record).
  */
-void
+bool
 smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 {
-	smgrsw[reln->smgr_which].smgr_prefetch(reln, forknum, blocknum);
+	return smgrsw[reln->smgr_which].smgr_prefetch(reln, forknum, blocknum);
 }
 
 /*
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 166fe334c7..e57f84ee9c 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -327,8 +327,9 @@ extern int	BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id);
 extern void BufTableDelete(BufferTag *tagPtr, uint32 hashcode);
 
 /* localbuf.c */
-extern void PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum,
-								BlockNumber blockNum);
+extern PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr,
+												ForkNumber forkNum,
+												BlockNumber blockNum);
 extern BufferDesc *LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
 									BlockNumber blockNum, bool *foundPtr);
 extern void MarkLocalBufferDirty(Buffer buffer);
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index 39660aacba..ee91b8fa26 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -46,6 +46,15 @@ typedef enum
 								 * replay; otherwise same as RBM_NORMAL */
 } ReadBufferMode;
 
+/*
+ * Type returned by PrefetchBuffer().
+ */
+typedef struct PrefetchBufferResult
+{
+	Buffer		recent_buffer;	/* If valid, a hit (recheck needed!) */
+	bool		initiated_io;	/* If true, a miss resulting in async I/O */
+} PrefetchBufferResult;
+
 /* forward declared, to avoid having to expose buf_internals.h here */
 struct WritebackContext;
 
@@ -162,11 +171,11 @@ extern PGDLLIMPORT int32 *LocalRefCount;
 /*
  * prototypes for functions in bufmgr.c
  */
-extern void PrefetchSharedBuffer(struct SMgrRelationData *smgr_reln,
-								 ForkNumber forkNum,
-								 BlockNumber blockNum);
-extern void PrefetchBuffer(Relation reln, ForkNumber forkNum,
-						   BlockNumber blockNum);
+extern PrefetchBufferResult PrefetchSharedBuffer(struct SMgrRelationData *smgr_reln,
+												 ForkNumber forkNum,
+												 BlockNumber blockNum);
+extern PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum,
+										   BlockNumber blockNum);
 extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
 extern Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum,
 								 BlockNumber blockNum, ReadBufferMode mode,
diff --git a/src/include/storage/md.h b/src/include/storage/md.h
index ec7630ce3b..07fd1bb7d0 100644
--- a/src/include/storage/md.h
+++ b/src/include/storage/md.h
@@ -28,7 +28,7 @@ extern bool mdexists(SMgrRelation reln, ForkNumber forknum);
 extern void mdunlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo);
 extern void mdextend(SMgrRelation reln, ForkNumber forknum,
 					 BlockNumber blocknum, char *buffer, bool skipFsync);
-extern void mdprefetch(SMgrRelation reln, ForkNumber forknum,
+extern bool mdprefetch(SMgrRelation reln, ForkNumber forknum,
 					   BlockNumber blocknum);
 extern void mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 				   char *buffer);
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index 79dfe0e373..bb8428f27f 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -93,7 +93,7 @@ extern void smgrdosyncall(SMgrRelation *rels, int nrels);
 extern void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo);
 extern void smgrextend(SMgrRelation reln, ForkNumber forknum,
 					   BlockNumber blocknum, char *buffer, bool skipFsync);
-extern void smgrprefetch(SMgrRelation reln, ForkNumber forknum,
+extern bool smgrprefetch(SMgrRelation reln, ForkNumber forknum,
 						 BlockNumber blocknum);
 extern void smgrread(SMgrRelation reln, ForkNumber forknum,
 					 BlockNumber blocknum, char *buffer);
-- 
2.20.1

