From a63818a32d661dba563cedfdb85731e522b3c6a9 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Thu, 24 Nov 2022 13:28:22 +1300
Subject: [PATCH 2/2] Try to tolerate concurrent reads and writes of control
 file.

Various frontend programs and SQL-callable backend functions read the
control file without any kind of interlocking against concurrent writes.
Linux ext4 doesn't implement the atomicity required by POSIX here, so a
concurrent reader can see only partial effects of an in-progress write.

Tolerate this by retrying until we get two reads in a row with the same
checksum, after an idea from Tom Lane.

Reported-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/20221123014224.xisi44byq3cf5psi%40awork3.anarazel.de
---
 src/common/controldata_utils.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/src/common/controldata_utils.c b/src/common/controldata_utils.c
index 2d1f35bbd1..200d24df02 100644
--- a/src/common/controldata_utils.c
+++ b/src/common/controldata_utils.c
@@ -56,12 +56,19 @@ get_controlfile(const char *DataDir, bool *crc_ok_p)
 	char		ControlFilePath[MAXPGPATH];
 	pg_crc32c	crc;
 	int			r;
+	bool		first_try;
+	pg_crc32c	last_crc;
 
 	Assert(crc_ok_p);
 
 	ControlFile = palloc_object(ControlFileData);
 	snprintf(ControlFilePath, MAXPGPATH, "%s/global/pg_control", DataDir);
 
+	first_try = true;
+	INIT_CRC32C(last_crc);
+
+retry:
+
 #ifndef FRONTEND
 	if ((fd = OpenTransientFile(ControlFilePath, O_RDONLY | PG_BINARY)) == -1)
 		ereport(ERROR,
@@ -117,6 +124,24 @@ get_controlfile(const char *DataDir, bool *crc_ok_p)
 
 	*crc_ok_p = EQ_CRC32C(crc, ControlFile->crc);
 
+	/*
+	 * With unlucky timing on filesystems that don't implement atomicity of
+	 * concurrent reads and writes (such as Linux ext4), we might have seen
+	 * garbage if the server was writing to the file at the same time.  Keep
+	 * retrying until we see the same CRC twice.
+	 */
+	if (!*crc_ok_p && (first_try || !EQ_CRC32C(crc, last_crc)))
+	{
+		first_try = false;
+		last_crc = crc;
+		pg_usleep(10000);
+
+#ifndef FRONTEND
+		CHECK_FOR_INTERRUPTS();
+#endif
+		goto retry;
+	}
+
 	/* Make sure the control file is valid byte order. */
 	if (ControlFile->pg_control_version % 65536 == 0 &&
 		ControlFile->pg_control_version / 65536 != 0)
-- 
2.35.1

