>From 944e7c4e27fca5589b8a103f7f470df23a5161c2 Mon Sep 17 00:00:00 2001
From: Abhijit Menon-Sen <ams@2ndQuadrant.com>
Date: Wed, 24 Sep 2014 16:01:37 +0530
Subject: =?UTF-8?q?Make=20ResetUnloggedRelations(=E2=80=A6=5FINIT)=20fsync?=
 =?UTF-8?q?=20newly-created=20main=20forks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

During UNLOGGED_RELATION_INIT, after we have copied ${x}_init to $x, we
issue fsync()s for the newly-created files. We depend on their existence
and a checkpoint isn't going to fsync them for us during recovery.

See thread from 20140912112246.GA4984@alap3.anarazel.de for details.
---
 src/backend/storage/file/reinit.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/src/backend/storage/file/reinit.c b/src/backend/storage/file/reinit.c
index 3229f41..4ad5987 100644
--- a/src/backend/storage/file/reinit.c
+++ b/src/backend/storage/file/reinit.c
@@ -339,6 +339,44 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
 		}
 
 		FreeDir(dbspace_dir);
+
+		/*
+		 * copy_file() above has already called pg_flush_data() on the
+		 * files it created. Now we need to fsync those files, because
+		 * a checkpoint won't do it for us while we're in recovery. We
+		 * do this in a separate pass to allow the kernel to perform
+		 * all the flushes at once.
+		 */
+
+		dbspace_dir = AllocateDir(dbspacedirname);
+		while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
+		{
+			ForkNumber forkNum;
+			int			oidchars;
+			char		oidbuf[OIDCHARS + 1];
+			char		mainpath[MAXPGPATH];
+
+			/* Skip anything that doesn't look like a relation data file. */
+			if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
+													 &forkNum))
+				continue;
+
+			/* Also skip it unless this is the init fork. */
+			if (forkNum != INIT_FORKNUM)
+				continue;
+
+			/* Construct main fork pathname. */
+			memcpy(oidbuf, de->d_name, oidchars);
+			oidbuf[oidchars] = '\0';
+			snprintf(mainpath, sizeof(mainpath), "%s/%s%s",
+					 dbspacedirname, oidbuf, de->d_name + oidchars + 1 +
+					 strlen(forkNames[INIT_FORKNUM]));
+
+			fsync_fname(mainpath, false);
+		}
+		FreeDir(dbspace_dir);
+
+		fsync_fname((char *)dbspacedirname, true);
 	}
 }
 
-- 
1.9.1

