Fix some error handling for read() and errno

Started by Michael Paquierover 7 years ago26 messages
#1Michael Paquier
michael@paquier.xyz
1 attachment(s)

Hi all,

This is basically a new thread after what has been discussed for
pg_controldata with its error handling for read():
/messages/by-id/CABUevEx8ZRV5Ut_FvP2etXiQppx3xVzm7oOaV3AcdHxX81Yt8Q@mail.gmail.com

While reviewing the core code, I have noticed similar weird error
handling for read(). At the same time, some of those places may use an
incorrect errno, as an error is invoked using an errno which may be
overwritten by another system call. I found a funny one in slru.c,
for which I have added a note in the patch. I don't think that this is
worth addressing with more facility, thoughts are welcome.

Attached is a patch addressing the issues I found.

Thanks,
--
Michael

Attachments:

read-errno-handling.patchtext/x-diff; charset=us-asciiDownload
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index 87942b4cca..d487347cc6 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -683,6 +683,11 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
 	pgstat_report_wait_start(WAIT_EVENT_SLRU_READ);
 	if (read(fd, shared->page_buffer[slotno], BLCKSZ) != BLCKSZ)
 	{
+		/*
+		 * XXX: Note that this may actually report sucess if the number
+		 * of bytes read is positive, but lacking data so that errno is
+		 * not set.
+		 */
 		pgstat_report_wait_end();
 		slru_errcause = SLRU_READ_FAILED;
 		slru_errno = errno;
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 65194db70e..52f634e706 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -1219,6 +1219,7 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 	uint32		crc_offset;
 	pg_crc32c	calc_crc,
 				file_crc;
+	int			r;
 
 	TwoPhaseFilePath(path, xid);
 
@@ -1272,15 +1273,28 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 	buf = (char *) palloc(stat.st_size);
 
 	pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_READ);
-	if (read(fd, buf, stat.st_size) != stat.st_size)
+	r = read(fd, buf, stat.st_size);
+	if (r != stat.st_size)
 	{
+		int		save_errno = errno;
+
 		pgstat_report_wait_end();
 		CloseTransientFile(fd);
 		if (give_warnings)
-			ereport(WARNING,
-					(errcode_for_file_access(),
-					 errmsg("could not read two-phase state file \"%s\": %m",
-							path)));
+		{
+			if (r < 0)
+			{
+				errno = save_errno;
+				ereport(WARNING,
+						(errcode_for_file_access(),
+						 errmsg("could not read two-phase state file \"%s\": %m",
+								path)));
+			}
+			else
+				ereport(WARNING,
+						(errmsg("could not read two-phase state file \"%s\": %d read, expected %zu",
+								path, r, stat.st_size)));
+		}
 		pfree(buf);
 		return NULL;
 	}
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index adbd6a2126..72fd800646 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -3395,21 +3395,24 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
 
 		if (nread > 0)
 		{
+			int		r;
+
 			if (nread > sizeof(buffer))
 				nread = sizeof(buffer);
 			errno = 0;
 			pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_READ);
-			if (read(srcfd, buffer, nread) != nread)
+			r = read(srcfd, buffer, nread);
+			if (r != nread)
 			{
-				if (errno != 0)
+				if (r < 0)
 					ereport(ERROR,
 							(errcode_for_file_access(),
 							 errmsg("could not read file \"%s\": %m",
 									path)));
 				else
 					ereport(ERROR,
-							(errmsg("not enough data in file \"%s\"",
-									path)));
+							(errmsg("not enough data in file \"%s\": read %d, expected %d",
+									path, r, nread)));
 			}
 			pgstat_report_wait_end();
 		}
@@ -11594,6 +11597,7 @@ XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
 	int			emode = private->emode;
 	uint32		targetPageOff;
 	XLogSegNo	targetSegNo PG_USED_FOR_ASSERTS_ONLY;
+	int			r;
 
 	XLByteToSeg(targetPagePtr, targetSegNo, wal_segment_size);
 	targetPageOff = XLogSegmentOffset(targetPagePtr, wal_segment_size);
@@ -11675,8 +11679,10 @@ retry:
 	if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0)
 	{
 		char		fname[MAXFNAMELEN];
+		int			save_errno = errno;
 
 		XLogFileName(fname, curFileTLI, readSegNo, wal_segment_size);
+		errno = save_errno;
 		ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
 				(errcode_for_file_access(),
 				 errmsg("could not seek in log segment %s to offset %u: %m",
@@ -11685,16 +11691,29 @@ retry:
 	}
 
 	pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
-	if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+	r = read(readFile, readBuf, XLOG_BLCKSZ);
+	if (r != XLOG_BLCKSZ)
 	{
 		char		fname[MAXFNAMELEN];
+		int			save_errno = errno;
 
 		pgstat_report_wait_end();
 		XLogFileName(fname, curFileTLI, readSegNo, wal_segment_size);
-		ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
-				(errcode_for_file_access(),
-				 errmsg("could not read from log segment %s, offset %u: %m",
-						fname, readOff)));
+
+		if (r < 0)
+		{
+			errno = save_errno;
+			ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
+					(errcode_for_file_access(),
+					 errmsg("could not read from log segment %s, offset %u: %m",
+							fname, readOff)));
+		}
+		else
+			ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
+					(errcode_for_file_access(),
+					 errmsg("could not read from log segment %s, offset %u: read %d, expected %d",
+							fname, readOff, r, XLOG_BLCKSZ)));
+
 		goto next_record_is_invalid;
 	}
 	pgstat_report_wait_end();
diff --git a/src/backend/replication/logical/origin.c b/src/backend/replication/logical/origin.c
index 963878a5d8..c39d023d22 100644
--- a/src/backend/replication/logical/origin.c
+++ b/src/backend/replication/logical/origin.c
@@ -697,9 +697,16 @@ StartupReplicationOrigin(void)
 	/* verify magic, that is written even if nothing was active */
 	readBytes = read(fd, &magic, sizeof(magic));
 	if (readBytes != sizeof(magic))
-		ereport(PANIC,
-				(errmsg("could not read file \"%s\": %m",
-						path)));
+	{
+		if (readBytes < 0)
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": %m",
+							path)));
+		else
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": %d read of %zu",
+							path, readBytes, sizeof(magic))));
+	}
 	COMP_CRC32C(crc, &magic, sizeof(magic));
 
 	if (magic != REPLICATION_STATE_MAGIC)
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index 056628fe8e..cc64ff22ac 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -1394,11 +1394,15 @@ RestoreSlotFromDisk(const char *name)
 
 		CloseTransientFile(fd);
 		errno = saved_errno;
-		ereport(PANIC,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %u: %m",
-						path, readBytes,
-						(uint32) ReplicationSlotOnDiskConstantSize)));
+		if (readBytes < 0)
+			ereport(PANIC,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		else
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": read %d of %u",
+							path, readBytes,
+							(uint32) ReplicationSlotOnDiskConstantSize)));
 	}
 
 	/* verify magic */
@@ -1434,10 +1438,14 @@ RestoreSlotFromDisk(const char *name)
 
 		CloseTransientFile(fd);
 		errno = saved_errno;
-		ereport(PANIC,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %u: %m",
-						path, readBytes, cp.length)));
+		if (readBytes < 0)
+			ereport(PANIC,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		else
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": read %d of %u",
+							path, readBytes, cp.length)));
 	}
 
 	CloseTransientFile(fd);
diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c
index 99d095f2df..c4839b11f6 100644
--- a/src/backend/utils/cache/relmapper.c
+++ b/src/backend/utils/cache/relmapper.c
@@ -629,6 +629,7 @@ load_relmap_file(bool shared)
 	char		mapfilename[MAXPGPATH];
 	pg_crc32c	crc;
 	int			fd;
+	int			r;
 
 	if (shared)
 	{
@@ -659,11 +660,19 @@ load_relmap_file(bool shared)
 	 * are able to access any relation that's affected by the change.
 	 */
 	pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_READ);
-	if (read(fd, map, sizeof(RelMapFile)) != sizeof(RelMapFile))
-		ereport(FATAL,
-				(errcode_for_file_access(),
-				 errmsg("could not read relation mapping file \"%s\": %m",
-						mapfilename)));
+	r = read(fd, map, sizeof(RelMapFile));
+	if (r != sizeof(RelMapFile))
+	{
+		if (r < 0)
+			ereport(FATAL,
+					(errcode_for_file_access(),
+					 errmsg("could not read relation mapping file \"%s\": %m",
+							mapfilename)));
+		else
+			ereport(FATAL,
+					(errmsg("could not read relation mapping file \"%s\": %d read, expected %zu",
+							mapfilename, r, sizeof(RelMapFile))));
+	}
 	pgstat_report_wait_end();
 
 	CloseTransientFile(fd);
diff --git a/src/bin/pg_rewind/file_ops.c b/src/bin/pg_rewind/file_ops.c
index 94bcc13ae8..174ca7216d 100644
--- a/src/bin/pg_rewind/file_ops.c
+++ b/src/bin/pg_rewind/file_ops.c
@@ -289,6 +289,7 @@ slurpFile(const char *datadir, const char *path, size_t *filesize)
 	struct stat statbuf;
 	char		fullpath[MAXPGPATH];
 	int			len;
+	int			r;
 
 	snprintf(fullpath, sizeof(fullpath), "%s/%s", datadir, path);
 
@@ -304,9 +305,17 @@ slurpFile(const char *datadir, const char *path, size_t *filesize)
 
 	buffer = pg_malloc(len + 1);
 
-	if (read(fd, buffer, len) != len)
-		pg_fatal("could not read file \"%s\": %s\n",
-				 fullpath, strerror(errno));
+	r = read(fd, buffer, len);
+	if (r != len)
+	{
+		if (r < 0)
+			pg_fatal("could not read file \"%s\": %s\n",
+					 fullpath, strerror(errno));
+		else
+			pg_fatal("could not read file \"%s\": read %d, expected %d\n",
+					 fullpath, r, len);
+
+	}
 	close(fd);
 
 	/* Zero-terminate the buffer. */
diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c
index b4c1f827a6..4f27cfe42b 100644
--- a/src/bin/pg_rewind/parsexlog.c
+++ b/src/bin/pg_rewind/parsexlog.c
@@ -246,6 +246,7 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
 	uint32		targetPageOff;
 	XLogRecPtr	targetSegEnd;
 	XLogSegNo	targetSegNo;
+	int			r;
 
 	XLByteToSeg(targetPagePtr, targetSegNo, WalSegSz);
 	XLogSegNoOffsetToRecPtr(targetSegNo + 1, 0, targetSegEnd, WalSegSz);
@@ -309,10 +310,17 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
 		return -1;
 	}
 
-	if (read(xlogreadfd, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+
+	r = read(xlogreadfd, readBuf, XLOG_BLCKSZ);
+	if (r != XLOG_BLCKSZ)
 	{
-		printf(_("could not read from file \"%s\": %s\n"), xlogfpath,
-			   strerror(errno));
+		if (r < 0)
+			printf(_("could not read from file \"%s\": %s\n"), xlogfpath,
+				   strerror(errno));
+		else
+			printf(_("could not read from file \"%s\": read %d, expected %d\n"),
+				   xlogfpath, r, XLOG_BLCKSZ);
+
 		return -1;
 	}
 
diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c
index 5c4f38e597..64391a7e5c 100644
--- a/src/bin/pg_waldump/pg_waldump.c
+++ b/src/bin/pg_waldump/pg_waldump.c
@@ -210,8 +210,10 @@ search_directory(const char *directory, const char *fname)
 	if (fd >= 0)
 	{
 		char		buf[XLOG_BLCKSZ];
+		int			r;
 
-		if (read(fd, buf, XLOG_BLCKSZ) == XLOG_BLCKSZ)
+		r = read(fd, buf, XLOG_BLCKSZ);
+		if (r == XLOG_BLCKSZ)
 		{
 			XLogLongPageHeader longhdr = (XLogLongPageHeader) buf;
 
@@ -229,7 +231,8 @@ search_directory(const char *directory, const char *fname)
 				fatal_error("could not read file \"%s\": %s",
 							fname, strerror(errno));
 			else
-				fatal_error("not enough data in file \"%s\"", fname);
+				fatal_error("not enough data in file \"%s\": %d read, %d expected",
+							fname, r, XLOG_BLCKSZ);
 		}
 		close(fd);
 		return true;
#2Kyotaro HORIGUCHI
horiguchi.kyotaro@lab.ntt.co.jp
In reply to: Michael Paquier (#1)
Re: Fix some error handling for read() and errno

Hello.

At Sun, 20 May 2018 09:05:22 +0900, Michael Paquier <michael@paquier.xyz> wrote in <20180520000522.GB1603@paquier.xyz>

Hi all,

This is basically a new thread after what has been discussed for
pg_controldata with its error handling for read():
/messages/by-id/CABUevEx8ZRV5Ut_FvP2etXiQppx3xVzm7oOaV3AcdHxX81Yt8Q@mail.gmail.com

While reviewing the core code, I have noticed similar weird error
handling for read(). At the same time, some of those places may use an
incorrect errno, as an error is invoked using an errno which may be
overwritten by another system call. I found a funny one in slru.c,
for which I have added a note in the patch. I don't think that this is
worth addressing with more facility, thoughts are welcome.

Attached is a patch addressing the issues I found.

I see the same issue in snapbuild.c(4 places).

| readBytes = read(fd, &ondisk, SnapBuildOnDiskConstantSize);
| pgstat_report_wait_end();
| if (readBytes != SnapBuildOnDiskConstantSize)
| {
| CloseTransientFile(fd);
| ereport(ERROR,
| (errcode_for_file_access(),
| errmsg("could not read file \"%s\", read %d of %d: %m",
| path, readBytes, (int) SnapBuildOnDiskConstantSize)));
| }

and walsender.c (2 places)

| if (nread <= 0)
| ereport(ERROR,
| (errcode_for_file_access(),
| errmsg("could not read file \"%s\": %m",
| path)));

and pg_receivewal.c

| if (read(fd, (char *) buf, sizeof(buf)) != sizeof(buf))
| {
| fprintf(stderr, _("%s: could not read compressed file \"%s\": %s\n"),
| progname, fullpath, strerror(errno));

pg_waldump.c

| if (readbytes <= 0)
...
| fatal_error("could not read from log file %s, offset %u, length %d: %s",
| fname, sendOff, segbytes, strerror(err));

A bit differnt issue, but in pg_waldump.c, search_directory can
check uninitialized errno when read returns a non-zero value.

regards.

--
Kyotaro Horiguchi
NTT Open Source Software Center

#3Michael Paquier
michael@paquier.xyz
In reply to: Kyotaro HORIGUCHI (#2)
1 attachment(s)
Re: Fix some error handling for read() and errno

On Tue, May 22, 2018 at 04:51:00PM +0900, Kyotaro HORIGUCHI wrote:

I see the same issue in snapbuild.c(4 places).

| readBytes = read(fd, &ondisk, SnapBuildOnDiskConstantSize);
| pgstat_report_wait_end();
| if (readBytes != SnapBuildOnDiskConstantSize)
| {
| CloseTransientFile(fd);
| ereport(ERROR,
| (errcode_for_file_access(),
| errmsg("could not read file \"%s\", read %d of %d: %m",
| path, readBytes, (int) SnapBuildOnDiskConstantSize)));
| }

Four times the same pattern, which also bloat errno when closing the
file descriptor. I did not catch those.

and walsender.c (2 places)

| if (nread <= 0)
| ereport(ERROR,
| (errcode_for_file_access(),
| errmsg("could not read file \"%s\": %m",
| path)));

Those two ones I saw, but I was not sure if it is worth the complication
to error on an empty file. We could do something like the attached which
would be an improvement in readability?

and pg_receivewal.c

| if (read(fd, (char *) buf, sizeof(buf)) != sizeof(buf))
| {
| fprintf(stderr, _("%s: could not read compressed file \"%s\": %s\n"),
| progname, fullpath, strerror(errno));

Okay.

pg_waldump.c

| if (readbytes <= 0)
...
| fatal_error("could not read from log file %s, offset %u, length %d: %s",
| fname, sendOff, segbytes, strerror(err));

A bit different issue, but in pg_waldump.c, search_directory can
check uninitialized errno when read returns a non-zero value.

Yeah, the error message could be improved as well if the result is an
empty file.

Updated patch is attached. Thanks for your review.
--
Michael

Attachments:

read-errno-handling-v2.patchtext/x-diff; charset=us-asciiDownload
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index 87942b4cca..d487347cc6 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -683,6 +683,11 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
 	pgstat_report_wait_start(WAIT_EVENT_SLRU_READ);
 	if (read(fd, shared->page_buffer[slotno], BLCKSZ) != BLCKSZ)
 	{
+		/*
+		 * XXX: Note that this may actually report sucess if the number
+		 * of bytes read is positive, but lacking data so that errno is
+		 * not set.
+		 */
 		pgstat_report_wait_end();
 		slru_errcause = SLRU_READ_FAILED;
 		slru_errno = errno;
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 65194db70e..52f634e706 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -1219,6 +1219,7 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 	uint32		crc_offset;
 	pg_crc32c	calc_crc,
 				file_crc;
+	int			r;
 
 	TwoPhaseFilePath(path, xid);
 
@@ -1272,15 +1273,28 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 	buf = (char *) palloc(stat.st_size);
 
 	pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_READ);
-	if (read(fd, buf, stat.st_size) != stat.st_size)
+	r = read(fd, buf, stat.st_size);
+	if (r != stat.st_size)
 	{
+		int		save_errno = errno;
+
 		pgstat_report_wait_end();
 		CloseTransientFile(fd);
 		if (give_warnings)
-			ereport(WARNING,
-					(errcode_for_file_access(),
-					 errmsg("could not read two-phase state file \"%s\": %m",
-							path)));
+		{
+			if (r < 0)
+			{
+				errno = save_errno;
+				ereport(WARNING,
+						(errcode_for_file_access(),
+						 errmsg("could not read two-phase state file \"%s\": %m",
+								path)));
+			}
+			else
+				ereport(WARNING,
+						(errmsg("could not read two-phase state file \"%s\": %d read, expected %zu",
+								path, r, stat.st_size)));
+		}
 		pfree(buf);
 		return NULL;
 	}
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index adbd6a2126..2f2102eb71 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -3395,21 +3395,24 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
 
 		if (nread > 0)
 		{
+			int		r;
+
 			if (nread > sizeof(buffer))
 				nread = sizeof(buffer);
 			errno = 0;
 			pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_READ);
-			if (read(srcfd, buffer, nread) != nread)
+			r = read(srcfd, buffer, nread);
+			if (r != nread)
 			{
-				if (errno != 0)
+				if (r < 0)
 					ereport(ERROR,
 							(errcode_for_file_access(),
 							 errmsg("could not read file \"%s\": %m",
 									path)));
 				else
 					ereport(ERROR,
-							(errmsg("not enough data in file \"%s\"",
-									path)));
+							(errmsg("not enough data in file \"%s\": read %d, expected %d",
+									path, r, nread)));
 			}
 			pgstat_report_wait_end();
 		}
@@ -11594,6 +11597,7 @@ XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
 	int			emode = private->emode;
 	uint32		targetPageOff;
 	XLogSegNo	targetSegNo PG_USED_FOR_ASSERTS_ONLY;
+	int			r;
 
 	XLByteToSeg(targetPagePtr, targetSegNo, wal_segment_size);
 	targetPageOff = XLogSegmentOffset(targetPagePtr, wal_segment_size);
@@ -11675,8 +11679,10 @@ retry:
 	if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0)
 	{
 		char		fname[MAXFNAMELEN];
+		int			save_errno = errno;
 
 		XLogFileName(fname, curFileTLI, readSegNo, wal_segment_size);
+		errno = save_errno;
 		ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
 				(errcode_for_file_access(),
 				 errmsg("could not seek in log segment %s to offset %u: %m",
@@ -11685,16 +11691,28 @@ retry:
 	}
 
 	pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
-	if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+	r = read(readFile, readBuf, XLOG_BLCKSZ);
+	if (r != XLOG_BLCKSZ)
 	{
 		char		fname[MAXFNAMELEN];
+		int			save_errno = errno;
 
 		pgstat_report_wait_end();
 		XLogFileName(fname, curFileTLI, readSegNo, wal_segment_size);
-		ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
-				(errcode_for_file_access(),
-				 errmsg("could not read from log segment %s, offset %u: %m",
-						fname, readOff)));
+
+		if (r < 0)
+		{
+			errno = save_errno;
+			ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
+					(errcode_for_file_access(),
+					 errmsg("could not read from log segment %s, offset %u: %m",
+							fname, readOff)));
+		}
+		else
+			ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
+					(errmsg("could not read from log segment %s, offset %u: read %d, expected %d",
+							fname, readOff, r, XLOG_BLCKSZ)));
+
 		goto next_record_is_invalid;
 	}
 	pgstat_report_wait_end();
diff --git a/src/backend/replication/logical/origin.c b/src/backend/replication/logical/origin.c
index 963878a5d8..c39d023d22 100644
--- a/src/backend/replication/logical/origin.c
+++ b/src/backend/replication/logical/origin.c
@@ -697,9 +697,16 @@ StartupReplicationOrigin(void)
 	/* verify magic, that is written even if nothing was active */
 	readBytes = read(fd, &magic, sizeof(magic));
 	if (readBytes != sizeof(magic))
-		ereport(PANIC,
-				(errmsg("could not read file \"%s\": %m",
-						path)));
+	{
+		if (readBytes < 0)
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": %m",
+							path)));
+		else
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": %d read of %zu",
+							path, readBytes, sizeof(magic))));
+	}
 	COMP_CRC32C(crc, &magic, sizeof(magic));
 
 	if (magic != REPLICATION_STATE_MAGIC)
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index 4123cdebcf..f99a885c5d 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -1708,11 +1708,20 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 	pgstat_report_wait_end();
 	if (readBytes != SnapBuildOnDiskConstantSize)
 	{
+		int		save_errno = errno;
 		CloseTransientFile(fd);
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %d: %m",
-						path, readBytes, (int) SnapBuildOnDiskConstantSize)));
+
+		if (readBytes < 0)
+		{
+			errno = save_errno;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		}
+		else
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							path, readBytes, SnapBuildOnDiskConstantSize)));
 	}
 
 	if (ondisk.magic != SNAPBUILD_MAGIC)
@@ -1736,11 +1745,20 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 	pgstat_report_wait_end();
 	if (readBytes != sizeof(SnapBuild))
 	{
+		int		save_errno = errno;
 		CloseTransientFile(fd);
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %d: %m",
-						path, readBytes, (int) sizeof(SnapBuild))));
+
+		if (readBytes < 0)
+		{
+			errno = save_errno;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		}
+		else
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							path, readBytes, sizeof(SnapBuild))));
 	}
 	COMP_CRC32C(checksum, &ondisk.builder, sizeof(SnapBuild));
 
@@ -1753,11 +1771,20 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 	pgstat_report_wait_end();
 	if (readBytes != sz)
 	{
+		int		save_errno = errno;
 		CloseTransientFile(fd);
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %d: %m",
-						path, readBytes, (int) sz)));
+
+		if (readBytes < 0)
+		{
+			errno = save_errno;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		}
+		else
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							path, readBytes, sz)));
 	}
 	COMP_CRC32C(checksum, ondisk.builder.was_running.was_xip, sz);
 
@@ -1769,11 +1796,20 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 	pgstat_report_wait_end();
 	if (readBytes != sz)
 	{
+		int		save_errno = errno;
 		CloseTransientFile(fd);
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %d: %m",
-						path, readBytes, (int) sz)));
+
+		if (readBytes < 0)
+		{
+			errno = save_errno;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		}
+		else
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							path, readBytes, sz)));
 	}
 	COMP_CRC32C(checksum, ondisk.builder.committed.xip, sz);
 
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index 056628fe8e..cc64ff22ac 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -1394,11 +1394,15 @@ RestoreSlotFromDisk(const char *name)
 
 		CloseTransientFile(fd);
 		errno = saved_errno;
-		ereport(PANIC,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %u: %m",
-						path, readBytes,
-						(uint32) ReplicationSlotOnDiskConstantSize)));
+		if (readBytes < 0)
+			ereport(PANIC,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		else
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": read %d of %u",
+							path, readBytes,
+							(uint32) ReplicationSlotOnDiskConstantSize)));
 	}
 
 	/* verify magic */
@@ -1434,10 +1438,14 @@ RestoreSlotFromDisk(const char *name)
 
 		CloseTransientFile(fd);
 		errno = saved_errno;
-		ereport(PANIC,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %u: %m",
-						path, readBytes, cp.length)));
+		if (readBytes < 0)
+			ereport(PANIC,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		else
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": read %d of %u",
+							path, readBytes, cp.length)));
 	}
 
 	CloseTransientFile(fd);
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index e47ddca6bc..727c3acbcb 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -501,11 +501,16 @@ SendTimeLineHistory(TimeLineHistoryCmd *cmd)
 		pgstat_report_wait_start(WAIT_EVENT_WALSENDER_TIMELINE_HISTORY_READ);
 		nread = read(fd, rbuf, sizeof(rbuf));
 		pgstat_report_wait_end();
-		if (nread <= 0)
+		if (nread < 0)
 			ereport(ERROR,
 					(errcode_for_file_access(),
 					 errmsg("could not read file \"%s\": %m",
 							path)));
+		else if (nread == 0)
+			ereport(ERROR,
+					(errmsg("could not read empty file \"%s\"",
+							path)));
+
 		pq_sendbytes(&buf, rbuf, nread);
 		bytesleft -= nread;
 	}
@@ -2425,7 +2430,7 @@ retry:
 		pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
 		readbytes = read(sendFile, p, segbytes);
 		pgstat_report_wait_end();
-		if (readbytes <= 0)
+		if (readbytes < 0)
 		{
 			ereport(ERROR,
 					(errcode_for_file_access(),
@@ -2433,6 +2438,13 @@ retry:
 							XLogFileNameP(curFileTimeLine, sendSegNo),
 							sendOff, (unsigned long) segbytes)));
 		}
+		else if (readbytes == 0)
+		{
+			ereport(ERROR,
+					(errmsg("could not read any data from log segment %s, offset %u, length %lu",
+							XLogFileNameP(curFileTimeLine, sendSegNo),
+							sendOff, (unsigned long) segbytes)));
+		}
 
 		/* Update state for read */
 		recptr += readbytes;
diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c
index 99d095f2df..c4839b11f6 100644
--- a/src/backend/utils/cache/relmapper.c
+++ b/src/backend/utils/cache/relmapper.c
@@ -629,6 +629,7 @@ load_relmap_file(bool shared)
 	char		mapfilename[MAXPGPATH];
 	pg_crc32c	crc;
 	int			fd;
+	int			r;
 
 	if (shared)
 	{
@@ -659,11 +660,19 @@ load_relmap_file(bool shared)
 	 * are able to access any relation that's affected by the change.
 	 */
 	pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_READ);
-	if (read(fd, map, sizeof(RelMapFile)) != sizeof(RelMapFile))
-		ereport(FATAL,
-				(errcode_for_file_access(),
-				 errmsg("could not read relation mapping file \"%s\": %m",
-						mapfilename)));
+	r = read(fd, map, sizeof(RelMapFile));
+	if (r != sizeof(RelMapFile))
+	{
+		if (r < 0)
+			ereport(FATAL,
+					(errcode_for_file_access(),
+					 errmsg("could not read relation mapping file \"%s\": %m",
+							mapfilename)));
+		else
+			ereport(FATAL,
+					(errmsg("could not read relation mapping file \"%s\": %d read, expected %zu",
+							mapfilename, r, sizeof(RelMapFile))));
+	}
 	pgstat_report_wait_end();
 
 	CloseTransientFile(fd);
diff --git a/src/bin/pg_basebackup/pg_receivewal.c b/src/bin/pg_basebackup/pg_receivewal.c
index 071b32d19d..ddc3e90e79 100644
--- a/src/bin/pg_basebackup/pg_receivewal.c
+++ b/src/bin/pg_basebackup/pg_receivewal.c
@@ -284,6 +284,7 @@ FindStreamingStart(uint32 *tli)
 			char		buf[4];
 			int			bytes_out;
 			char		fullpath[MAXPGPATH * 2];
+			int			r;
 
 			snprintf(fullpath, sizeof(fullpath), "%s/%s", basedir, dirent->d_name);
 
@@ -300,10 +301,16 @@ FindStreamingStart(uint32 *tli)
 						progname, fullpath, strerror(errno));
 				disconnect_and_exit(1);
 			}
-			if (read(fd, (char *) buf, sizeof(buf)) != sizeof(buf))
+			r = read(fd, (char *) buf, sizeof(buf));
+			if (r != sizeof(buf))
 			{
-				fprintf(stderr, _("%s: could not read compressed file \"%s\": %s\n"),
-						progname, fullpath, strerror(errno));
+				if (r < 0)
+					fprintf(stderr, _("%s: could not read compressed file \"%s\": %s\n"),
+							progname, fullpath, strerror(errno));
+				else
+					fprintf(stderr,
+							_("%s: could not read compressed file \"%s\": read %d out of %zu\n"),
+							progname, fullpath, r, sizeof(buf));
 				disconnect_and_exit(1);
 			}
 
diff --git a/src/bin/pg_rewind/file_ops.c b/src/bin/pg_rewind/file_ops.c
index 94bcc13ae8..174ca7216d 100644
--- a/src/bin/pg_rewind/file_ops.c
+++ b/src/bin/pg_rewind/file_ops.c
@@ -289,6 +289,7 @@ slurpFile(const char *datadir, const char *path, size_t *filesize)
 	struct stat statbuf;
 	char		fullpath[MAXPGPATH];
 	int			len;
+	int			r;
 
 	snprintf(fullpath, sizeof(fullpath), "%s/%s", datadir, path);
 
@@ -304,9 +305,17 @@ slurpFile(const char *datadir, const char *path, size_t *filesize)
 
 	buffer = pg_malloc(len + 1);
 
-	if (read(fd, buffer, len) != len)
-		pg_fatal("could not read file \"%s\": %s\n",
-				 fullpath, strerror(errno));
+	r = read(fd, buffer, len);
+	if (r != len)
+	{
+		if (r < 0)
+			pg_fatal("could not read file \"%s\": %s\n",
+					 fullpath, strerror(errno));
+		else
+			pg_fatal("could not read file \"%s\": read %d, expected %d\n",
+					 fullpath, r, len);
+
+	}
 	close(fd);
 
 	/* Zero-terminate the buffer. */
diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c
index b4c1f827a6..4f27cfe42b 100644
--- a/src/bin/pg_rewind/parsexlog.c
+++ b/src/bin/pg_rewind/parsexlog.c
@@ -246,6 +246,7 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
 	uint32		targetPageOff;
 	XLogRecPtr	targetSegEnd;
 	XLogSegNo	targetSegNo;
+	int			r;
 
 	XLByteToSeg(targetPagePtr, targetSegNo, WalSegSz);
 	XLogSegNoOffsetToRecPtr(targetSegNo + 1, 0, targetSegEnd, WalSegSz);
@@ -309,10 +310,17 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
 		return -1;
 	}
 
-	if (read(xlogreadfd, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+
+	r = read(xlogreadfd, readBuf, XLOG_BLCKSZ);
+	if (r != XLOG_BLCKSZ)
 	{
-		printf(_("could not read from file \"%s\": %s\n"), xlogfpath,
-			   strerror(errno));
+		if (r < 0)
+			printf(_("could not read from file \"%s\": %s\n"), xlogfpath,
+				   strerror(errno));
+		else
+			printf(_("could not read from file \"%s\": read %d, expected %d\n"),
+				   xlogfpath, r, XLOG_BLCKSZ);
+
 		return -1;
 	}
 
diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c
index 5c4f38e597..61464d44c5 100644
--- a/src/bin/pg_waldump/pg_waldump.c
+++ b/src/bin/pg_waldump/pg_waldump.c
@@ -210,8 +210,10 @@ search_directory(const char *directory, const char *fname)
 	if (fd >= 0)
 	{
 		char		buf[XLOG_BLCKSZ];
+		int			r;
 
-		if (read(fd, buf, XLOG_BLCKSZ) == XLOG_BLCKSZ)
+		r = read(fd, buf, XLOG_BLCKSZ);
+		if (r == XLOG_BLCKSZ)
 		{
 			XLogLongPageHeader longhdr = (XLogLongPageHeader) buf;
 
@@ -229,7 +231,8 @@ search_directory(const char *directory, const char *fname)
 				fatal_error("could not read file \"%s\": %s",
 							fname, strerror(errno));
 			else
-				fatal_error("not enough data in file \"%s\"", fname);
+				fatal_error("not enough data in file \"%s\": %d read, %d expected",
+							fname, r, XLOG_BLCKSZ);
 		}
 		close(fd);
 		return true;
@@ -411,11 +414,17 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id,
 		{
 			int			err = errno;
 			char		fname[MAXPGPATH];
+			int			save_errno = errno;
 
 			XLogFileName(fname, timeline_id, sendSegNo, WalSegSz);
+			errno = save_errno;
 
-			fatal_error("could not read from log file %s, offset %u, length %d: %s",
-						fname, sendOff, segbytes, strerror(err));
+			if (readbytes < 0)
+				fatal_error("could not read from log file %s, offset %u, length %d: %s",
+							fname, sendOff, segbytes, strerror(err));
+			else if (readbytes == 0)
+				fatal_error("could not read any data from log file %s, offset %u, length %d",
+							fname, sendOff, segbytes);
 		}
 
 		/* Update state for read */
#4Kyotaro HORIGUCHI
horiguchi.kyotaro@lab.ntt.co.jp
In reply to: Michael Paquier (#3)
Re: Fix some error handling for read() and errno

At Wed, 23 May 2018 09:00:40 +0900, Michael Paquier <michael@paquier.xyz> wrote in <20180523000040.GA3461@paquier.xyz>

On Tue, May 22, 2018 at 04:51:00PM +0900, Kyotaro HORIGUCHI wrote:

I see the same issue in snapbuild.c(4 places).

| readBytes = read(fd, &ondisk, SnapBuildOnDiskConstantSize);
| pgstat_report_wait_end();
| if (readBytes != SnapBuildOnDiskConstantSize)
| {
| CloseTransientFile(fd);
| ereport(ERROR,
| (errcode_for_file_access(),
| errmsg("could not read file \"%s\", read %d of %d: %m",
| path, readBytes, (int) SnapBuildOnDiskConstantSize)));
| }

Four times the same pattern, which also bloat errno when closing the
file descriptor. I did not catch those.

and walsender.c (2 places)

| if (nread <= 0)
| ereport(ERROR,
| (errcode_for_file_access(),
| errmsg("could not read file \"%s\": %m",
| path)));

Those two ones I saw, but I was not sure if it is worth the complication
to error on an empty file. We could do something like the attached which
would be an improvement in readability?

The case is not of an empty file. read() reads 0 bytes without
error while lseek have told that the file has *more* data. I
don't think that can happen. How about just commenting with
something like the following?

nread = read(fd, rbuf, sizeof(rbuf));
/*
* errno is E_OK in the case where nread == 0, but that can
* scarecely happen so we don't separate the case.
*/
if (nread <= 0)
ereport(ERROR,

If we ereport(%m) for the nread == 0 case, we need to initialize
errno.

and pg_receivewal.c

| if (read(fd, (char *) buf, sizeof(buf)) != sizeof(buf))
| {
| fprintf(stderr, _("%s: could not read compressed file \"%s\": %s\n"),
| progname, fullpath, strerror(errno));

Okay.

pg_waldump.c

| if (readbytes <= 0)
...
| fatal_error("could not read from log file %s, offset %u, length %d: %s",
| fname, sendOff, segbytes, strerror(err));

A bit different issue, but in pg_waldump.c, search_directory can
check uninitialized errno when read returns a non-zero value.

Yeah, the error message could be improved as well if the result is an
empty file.

Updated patch is attached. Thanks for your review.

regards.

--
Kyotaro Horiguchi
NTT Open Source Software Center

#5Michael Paquier
michael@paquier.xyz
In reply to: Kyotaro HORIGUCHI (#4)
1 attachment(s)
Re: Fix some error handling for read() and errno

On Fri, May 25, 2018 at 01:19:58PM +0900, Kyotaro HORIGUCHI wrote:

The case is not of an empty file. read() reads 0 bytes without
error while lseek have told that the file has *more* data. I
don't think that can happen. How about just commenting with
something like the following?

Actually it can be useful to report that no data has been read and that
more data was expected, like that:
+       else if (nread == 0)
+           ereport(ERROR,
+                   (errmsg("no data read from file \"%s\": expected %zu more bytes",
+                           path, bytesleft)));
--
Michael

Attachments:

read-errno-handling-v3.patchtext/x-diff; charset=us-asciiDownload
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index 87942b4cca..d487347cc6 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -683,6 +683,11 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
 	pgstat_report_wait_start(WAIT_EVENT_SLRU_READ);
 	if (read(fd, shared->page_buffer[slotno], BLCKSZ) != BLCKSZ)
 	{
+		/*
+		 * XXX: Note that this may actually report sucess if the number
+		 * of bytes read is positive, but lacking data so that errno is
+		 * not set.
+		 */
 		pgstat_report_wait_end();
 		slru_errcause = SLRU_READ_FAILED;
 		slru_errno = errno;
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 65194db70e..52f634e706 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -1219,6 +1219,7 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 	uint32		crc_offset;
 	pg_crc32c	calc_crc,
 				file_crc;
+	int			r;
 
 	TwoPhaseFilePath(path, xid);
 
@@ -1272,15 +1273,28 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 	buf = (char *) palloc(stat.st_size);
 
 	pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_READ);
-	if (read(fd, buf, stat.st_size) != stat.st_size)
+	r = read(fd, buf, stat.st_size);
+	if (r != stat.st_size)
 	{
+		int		save_errno = errno;
+
 		pgstat_report_wait_end();
 		CloseTransientFile(fd);
 		if (give_warnings)
-			ereport(WARNING,
-					(errcode_for_file_access(),
-					 errmsg("could not read two-phase state file \"%s\": %m",
-							path)));
+		{
+			if (r < 0)
+			{
+				errno = save_errno;
+				ereport(WARNING,
+						(errcode_for_file_access(),
+						 errmsg("could not read two-phase state file \"%s\": %m",
+								path)));
+			}
+			else
+				ereport(WARNING,
+						(errmsg("could not read two-phase state file \"%s\": %d read, expected %zu",
+								path, r, stat.st_size)));
+		}
 		pfree(buf);
 		return NULL;
 	}
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index adbd6a2126..2f2102eb71 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -3395,21 +3395,24 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
 
 		if (nread > 0)
 		{
+			int		r;
+
 			if (nread > sizeof(buffer))
 				nread = sizeof(buffer);
 			errno = 0;
 			pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_READ);
-			if (read(srcfd, buffer, nread) != nread)
+			r = read(srcfd, buffer, nread);
+			if (r != nread)
 			{
-				if (errno != 0)
+				if (r < 0)
 					ereport(ERROR,
 							(errcode_for_file_access(),
 							 errmsg("could not read file \"%s\": %m",
 									path)));
 				else
 					ereport(ERROR,
-							(errmsg("not enough data in file \"%s\"",
-									path)));
+							(errmsg("not enough data in file \"%s\": read %d, expected %d",
+									path, r, nread)));
 			}
 			pgstat_report_wait_end();
 		}
@@ -11594,6 +11597,7 @@ XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
 	int			emode = private->emode;
 	uint32		targetPageOff;
 	XLogSegNo	targetSegNo PG_USED_FOR_ASSERTS_ONLY;
+	int			r;
 
 	XLByteToSeg(targetPagePtr, targetSegNo, wal_segment_size);
 	targetPageOff = XLogSegmentOffset(targetPagePtr, wal_segment_size);
@@ -11675,8 +11679,10 @@ retry:
 	if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0)
 	{
 		char		fname[MAXFNAMELEN];
+		int			save_errno = errno;
 
 		XLogFileName(fname, curFileTLI, readSegNo, wal_segment_size);
+		errno = save_errno;
 		ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
 				(errcode_for_file_access(),
 				 errmsg("could not seek in log segment %s to offset %u: %m",
@@ -11685,16 +11691,28 @@ retry:
 	}
 
 	pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
-	if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+	r = read(readFile, readBuf, XLOG_BLCKSZ);
+	if (r != XLOG_BLCKSZ)
 	{
 		char		fname[MAXFNAMELEN];
+		int			save_errno = errno;
 
 		pgstat_report_wait_end();
 		XLogFileName(fname, curFileTLI, readSegNo, wal_segment_size);
-		ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
-				(errcode_for_file_access(),
-				 errmsg("could not read from log segment %s, offset %u: %m",
-						fname, readOff)));
+
+		if (r < 0)
+		{
+			errno = save_errno;
+			ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
+					(errcode_for_file_access(),
+					 errmsg("could not read from log segment %s, offset %u: %m",
+							fname, readOff)));
+		}
+		else
+			ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
+					(errmsg("could not read from log segment %s, offset %u: read %d, expected %d",
+							fname, readOff, r, XLOG_BLCKSZ)));
+
 		goto next_record_is_invalid;
 	}
 	pgstat_report_wait_end();
diff --git a/src/backend/replication/logical/origin.c b/src/backend/replication/logical/origin.c
index 963878a5d8..c39d023d22 100644
--- a/src/backend/replication/logical/origin.c
+++ b/src/backend/replication/logical/origin.c
@@ -697,9 +697,16 @@ StartupReplicationOrigin(void)
 	/* verify magic, that is written even if nothing was active */
 	readBytes = read(fd, &magic, sizeof(magic));
 	if (readBytes != sizeof(magic))
-		ereport(PANIC,
-				(errmsg("could not read file \"%s\": %m",
-						path)));
+	{
+		if (readBytes < 0)
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": %m",
+							path)));
+		else
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": %d read of %zu",
+							path, readBytes, sizeof(magic))));
+	}
 	COMP_CRC32C(crc, &magic, sizeof(magic));
 
 	if (magic != REPLICATION_STATE_MAGIC)
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index 4123cdebcf..f99a885c5d 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -1708,11 +1708,20 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 	pgstat_report_wait_end();
 	if (readBytes != SnapBuildOnDiskConstantSize)
 	{
+		int		save_errno = errno;
 		CloseTransientFile(fd);
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %d: %m",
-						path, readBytes, (int) SnapBuildOnDiskConstantSize)));
+
+		if (readBytes < 0)
+		{
+			errno = save_errno;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		}
+		else
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							path, readBytes, SnapBuildOnDiskConstantSize)));
 	}
 
 	if (ondisk.magic != SNAPBUILD_MAGIC)
@@ -1736,11 +1745,20 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 	pgstat_report_wait_end();
 	if (readBytes != sizeof(SnapBuild))
 	{
+		int		save_errno = errno;
 		CloseTransientFile(fd);
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %d: %m",
-						path, readBytes, (int) sizeof(SnapBuild))));
+
+		if (readBytes < 0)
+		{
+			errno = save_errno;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		}
+		else
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							path, readBytes, sizeof(SnapBuild))));
 	}
 	COMP_CRC32C(checksum, &ondisk.builder, sizeof(SnapBuild));
 
@@ -1753,11 +1771,20 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 	pgstat_report_wait_end();
 	if (readBytes != sz)
 	{
+		int		save_errno = errno;
 		CloseTransientFile(fd);
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %d: %m",
-						path, readBytes, (int) sz)));
+
+		if (readBytes < 0)
+		{
+			errno = save_errno;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		}
+		else
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							path, readBytes, sz)));
 	}
 	COMP_CRC32C(checksum, ondisk.builder.was_running.was_xip, sz);
 
@@ -1769,11 +1796,20 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 	pgstat_report_wait_end();
 	if (readBytes != sz)
 	{
+		int		save_errno = errno;
 		CloseTransientFile(fd);
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %d: %m",
-						path, readBytes, (int) sz)));
+
+		if (readBytes < 0)
+		{
+			errno = save_errno;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		}
+		else
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							path, readBytes, sz)));
 	}
 	COMP_CRC32C(checksum, ondisk.builder.committed.xip, sz);
 
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index 056628fe8e..cc64ff22ac 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -1394,11 +1394,15 @@ RestoreSlotFromDisk(const char *name)
 
 		CloseTransientFile(fd);
 		errno = saved_errno;
-		ereport(PANIC,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %u: %m",
-						path, readBytes,
-						(uint32) ReplicationSlotOnDiskConstantSize)));
+		if (readBytes < 0)
+			ereport(PANIC,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		else
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": read %d of %u",
+							path, readBytes,
+							(uint32) ReplicationSlotOnDiskConstantSize)));
 	}
 
 	/* verify magic */
@@ -1434,10 +1438,14 @@ RestoreSlotFromDisk(const char *name)
 
 		CloseTransientFile(fd);
 		errno = saved_errno;
-		ereport(PANIC,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %u: %m",
-						path, readBytes, cp.length)));
+		if (readBytes < 0)
+			ereport(PANIC,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		else
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": read %d of %u",
+							path, readBytes, cp.length)));
 	}
 
 	CloseTransientFile(fd);
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index e47ddca6bc..5003860673 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -501,11 +501,16 @@ SendTimeLineHistory(TimeLineHistoryCmd *cmd)
 		pgstat_report_wait_start(WAIT_EVENT_WALSENDER_TIMELINE_HISTORY_READ);
 		nread = read(fd, rbuf, sizeof(rbuf));
 		pgstat_report_wait_end();
-		if (nread <= 0)
+		if (nread < 0)
 			ereport(ERROR,
 					(errcode_for_file_access(),
 					 errmsg("could not read file \"%s\": %m",
 							path)));
+		else if (nread == 0)
+			ereport(ERROR,
+					(errmsg("no data read from file \"%s\": expected %zu more bytes",
+							path, bytesleft)));
+
 		pq_sendbytes(&buf, rbuf, nread);
 		bytesleft -= nread;
 	}
@@ -2425,7 +2430,7 @@ retry:
 		pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
 		readbytes = read(sendFile, p, segbytes);
 		pgstat_report_wait_end();
-		if (readbytes <= 0)
+		if (readbytes < 0)
 		{
 			ereport(ERROR,
 					(errcode_for_file_access(),
@@ -2433,6 +2438,13 @@ retry:
 							XLogFileNameP(curFileTimeLine, sendSegNo),
 							sendOff, (unsigned long) segbytes)));
 		}
+		else if (readbytes == 0)
+		{
+			ereport(ERROR,
+					(errmsg("could not read any data from log segment %s, offset %u, length %lu",
+							XLogFileNameP(curFileTimeLine, sendSegNo),
+							sendOff, (unsigned long) segbytes)));
+		}
 
 		/* Update state for read */
 		recptr += readbytes;
diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c
index 99d095f2df..c4839b11f6 100644
--- a/src/backend/utils/cache/relmapper.c
+++ b/src/backend/utils/cache/relmapper.c
@@ -629,6 +629,7 @@ load_relmap_file(bool shared)
 	char		mapfilename[MAXPGPATH];
 	pg_crc32c	crc;
 	int			fd;
+	int			r;
 
 	if (shared)
 	{
@@ -659,11 +660,19 @@ load_relmap_file(bool shared)
 	 * are able to access any relation that's affected by the change.
 	 */
 	pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_READ);
-	if (read(fd, map, sizeof(RelMapFile)) != sizeof(RelMapFile))
-		ereport(FATAL,
-				(errcode_for_file_access(),
-				 errmsg("could not read relation mapping file \"%s\": %m",
-						mapfilename)));
+	r = read(fd, map, sizeof(RelMapFile));
+	if (r != sizeof(RelMapFile))
+	{
+		if (r < 0)
+			ereport(FATAL,
+					(errcode_for_file_access(),
+					 errmsg("could not read relation mapping file \"%s\": %m",
+							mapfilename)));
+		else
+			ereport(FATAL,
+					(errmsg("could not read relation mapping file \"%s\": %d read, expected %zu",
+							mapfilename, r, sizeof(RelMapFile))));
+	}
 	pgstat_report_wait_end();
 
 	CloseTransientFile(fd);
diff --git a/src/bin/pg_basebackup/pg_receivewal.c b/src/bin/pg_basebackup/pg_receivewal.c
index 071b32d19d..ddc3e90e79 100644
--- a/src/bin/pg_basebackup/pg_receivewal.c
+++ b/src/bin/pg_basebackup/pg_receivewal.c
@@ -284,6 +284,7 @@ FindStreamingStart(uint32 *tli)
 			char		buf[4];
 			int			bytes_out;
 			char		fullpath[MAXPGPATH * 2];
+			int			r;
 
 			snprintf(fullpath, sizeof(fullpath), "%s/%s", basedir, dirent->d_name);
 
@@ -300,10 +301,16 @@ FindStreamingStart(uint32 *tli)
 						progname, fullpath, strerror(errno));
 				disconnect_and_exit(1);
 			}
-			if (read(fd, (char *) buf, sizeof(buf)) != sizeof(buf))
+			r = read(fd, (char *) buf, sizeof(buf));
+			if (r != sizeof(buf))
 			{
-				fprintf(stderr, _("%s: could not read compressed file \"%s\": %s\n"),
-						progname, fullpath, strerror(errno));
+				if (r < 0)
+					fprintf(stderr, _("%s: could not read compressed file \"%s\": %s\n"),
+							progname, fullpath, strerror(errno));
+				else
+					fprintf(stderr,
+							_("%s: could not read compressed file \"%s\": read %d out of %zu\n"),
+							progname, fullpath, r, sizeof(buf));
 				disconnect_and_exit(1);
 			}
 
diff --git a/src/bin/pg_rewind/file_ops.c b/src/bin/pg_rewind/file_ops.c
index 94bcc13ae8..174ca7216d 100644
--- a/src/bin/pg_rewind/file_ops.c
+++ b/src/bin/pg_rewind/file_ops.c
@@ -289,6 +289,7 @@ slurpFile(const char *datadir, const char *path, size_t *filesize)
 	struct stat statbuf;
 	char		fullpath[MAXPGPATH];
 	int			len;
+	int			r;
 
 	snprintf(fullpath, sizeof(fullpath), "%s/%s", datadir, path);
 
@@ -304,9 +305,17 @@ slurpFile(const char *datadir, const char *path, size_t *filesize)
 
 	buffer = pg_malloc(len + 1);
 
-	if (read(fd, buffer, len) != len)
-		pg_fatal("could not read file \"%s\": %s\n",
-				 fullpath, strerror(errno));
+	r = read(fd, buffer, len);
+	if (r != len)
+	{
+		if (r < 0)
+			pg_fatal("could not read file \"%s\": %s\n",
+					 fullpath, strerror(errno));
+		else
+			pg_fatal("could not read file \"%s\": read %d, expected %d\n",
+					 fullpath, r, len);
+
+	}
 	close(fd);
 
 	/* Zero-terminate the buffer. */
diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c
index b4c1f827a6..4f27cfe42b 100644
--- a/src/bin/pg_rewind/parsexlog.c
+++ b/src/bin/pg_rewind/parsexlog.c
@@ -246,6 +246,7 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
 	uint32		targetPageOff;
 	XLogRecPtr	targetSegEnd;
 	XLogSegNo	targetSegNo;
+	int			r;
 
 	XLByteToSeg(targetPagePtr, targetSegNo, WalSegSz);
 	XLogSegNoOffsetToRecPtr(targetSegNo + 1, 0, targetSegEnd, WalSegSz);
@@ -309,10 +310,17 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
 		return -1;
 	}
 
-	if (read(xlogreadfd, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+
+	r = read(xlogreadfd, readBuf, XLOG_BLCKSZ);
+	if (r != XLOG_BLCKSZ)
 	{
-		printf(_("could not read from file \"%s\": %s\n"), xlogfpath,
-			   strerror(errno));
+		if (r < 0)
+			printf(_("could not read from file \"%s\": %s\n"), xlogfpath,
+				   strerror(errno));
+		else
+			printf(_("could not read from file \"%s\": read %d, expected %d\n"),
+				   xlogfpath, r, XLOG_BLCKSZ);
+
 		return -1;
 	}
 
diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c
index 5c4f38e597..61464d44c5 100644
--- a/src/bin/pg_waldump/pg_waldump.c
+++ b/src/bin/pg_waldump/pg_waldump.c
@@ -210,8 +210,10 @@ search_directory(const char *directory, const char *fname)
 	if (fd >= 0)
 	{
 		char		buf[XLOG_BLCKSZ];
+		int			r;
 
-		if (read(fd, buf, XLOG_BLCKSZ) == XLOG_BLCKSZ)
+		r = read(fd, buf, XLOG_BLCKSZ);
+		if (r == XLOG_BLCKSZ)
 		{
 			XLogLongPageHeader longhdr = (XLogLongPageHeader) buf;
 
@@ -229,7 +231,8 @@ search_directory(const char *directory, const char *fname)
 				fatal_error("could not read file \"%s\": %s",
 							fname, strerror(errno));
 			else
-				fatal_error("not enough data in file \"%s\"", fname);
+				fatal_error("not enough data in file \"%s\": %d read, %d expected",
+							fname, r, XLOG_BLCKSZ);
 		}
 		close(fd);
 		return true;
@@ -411,11 +414,17 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id,
 		{
 			int			err = errno;
 			char		fname[MAXPGPATH];
+			int			save_errno = errno;
 
 			XLogFileName(fname, timeline_id, sendSegNo, WalSegSz);
+			errno = save_errno;
 
-			fatal_error("could not read from log file %s, offset %u, length %d: %s",
-						fname, sendOff, segbytes, strerror(err));
+			if (readbytes < 0)
+				fatal_error("could not read from log file %s, offset %u, length %d: %s",
+							fname, sendOff, segbytes, strerror(err));
+			else if (readbytes == 0)
+				fatal_error("could not read any data from log file %s, offset %u, length %d",
+							fname, sendOff, segbytes);
 		}
 
 		/* Update state for read */
#6Robbie Harwood
rharwood@redhat.com
In reply to: Michael Paquier (#5)
Re: Fix some error handling for read() and errno

Michael Paquier <michael@paquier.xyz> writes:

diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index 87942b4cca..d487347cc6 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -683,6 +683,11 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
pgstat_report_wait_start(WAIT_EVENT_SLRU_READ);
if (read(fd, shared->page_buffer[slotno], BLCKSZ) != BLCKSZ)
{
+		/*
+		 * XXX: Note that this may actually report sucess if the number
+		 * of bytes read is positive, but lacking data so that errno is
+		 * not set.
+		 */
pgstat_report_wait_end();
slru_errcause = SLRU_READ_FAILED;
slru_errno = errno;

It might be less confusing to just set errno if it's not set already
(e.g., to EIO, or something). Up to you though - this is a bit of a
niche case.

The rest of the patch looks good to me.

Thanks,
--Robbie

#7Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: Robbie Harwood (#6)
Re: Fix some error handling for read() and errno

On 2018-Jun-11, Robbie Harwood wrote:

It might be less confusing to just set errno if it's not set already
(e.g., to EIO, or something). Up to you though - this is a bit of a
niche case.

I think that would be very confusing, if we receive a report that really
is a short read but looks like EIO. I'm for keeping the code as
proposed.

As for the messages, I propose to make them more regular, i.e. always
use the wording "could not read from file "%s": read %d, expected %zu",
avoiding variations such as
not enough data in file \"%s\": %d read, %d expected"
could not read compressed file \"%s\": read %d out of %zu
could not read any data from log segment %s, offset %u, length %lu
and others that appear in other places. (In the last case, I even go as
far as proposing "read %d, expected %zu" where the the first %d is
constant zero. Extra points if the sprintf format specifiers are always
the same (say %zu) instead of, say, %d in odd places.

I would go as far as suggesting to remove qualifiers that indicate what
the file is for (such as "relation mapping file"); relying on the path
as indicator of what's going wrong should be sufficient, since it's an
error that affects internals anyway, not anything that users can do much
about. Keep variations to a minimum, to ease translator's work;
sometimes it's hard enough to come up with good translations for things
like "relation mapping file" in the first place, and they don't help the
end user.

--
�lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#8Andres Freund
andres@anarazel.de
In reply to: Alvaro Herrera (#7)
Re: Fix some error handling for read() and errno

On 2018-06-11 18:11:05 -0400, Alvaro Herrera wrote:

As for the messages, I propose to make them more regular, i.e. always
use the wording "could not read from file "%s": read %d, expected %zu",
avoiding variations such as
not enough data in file \"%s\": %d read, %d expected"
could not read compressed file \"%s\": read %d out of %zu
could not read any data from log segment %s, offset %u, length %lu
and others that appear in other places. (In the last case, I even go as
far as proposing "read %d, expected %zu" where the the first %d is
constant zero. Extra points if the sprintf format specifiers are always
the same (say %zu) instead of, say, %d in odd places.

I would go as far as suggesting to remove qualifiers that indicate what
the file is for (such as "relation mapping file"); relying on the path
as indicator of what's going wrong should be sufficient, since it's an
error that affects internals anyway, not anything that users can do much
about. Keep variations to a minimum, to ease translator's work;
sometimes it's hard enough to come up with good translations for things
like "relation mapping file" in the first place, and they don't help the
end user.

If we go there, why not wrap the read/write/etc calls into a wrapper,
including the error handling?

I'm not quite convinced that "relation mapping file" doesn't provide any
information. It's likley to be easier to search for than a specific
filename, particularly if there's oids or such in the name...

Greetings,

Andres Freund

#9Michael Paquier
michael@paquier.xyz
In reply to: Andres Freund (#8)
Re: Fix some error handling for read() and errno

On Mon, Jun 11, 2018 at 03:17:15PM -0700, Andres Freund wrote:

On 2018-06-11 18:11:05 -0400, Alvaro Herrera wrote:

As for the messages, I propose to make them more regular, i.e. always
use the wording "could not read from file "%s": read %d, expected %zu",
avoiding variations such as
not enough data in file \"%s\": %d read, %d expected"
could not read compressed file \"%s\": read %d out of %zu
could not read any data from log segment %s, offset %u, length %lu
and others that appear in other places. (In the last case, I even go as
far as proposing "read %d, expected %zu" where the the first %d is
constant zero. Extra points if the sprintf format specifiers are always
the same (say %zu) instead of, say, %d in odd places.

I would go as far as suggesting to remove qualifiers that indicate what
the file is for (such as "relation mapping file"); relying on the path
as indicator of what's going wrong should be sufficient, since it's an
error that affects internals anyway, not anything that users can do much
about. Keep variations to a minimum, to ease translator's work;
sometimes it's hard enough to come up with good translations for things
like "relation mapping file" in the first place, and they don't help the
end user.

If we go there, why not wrap the read/write/etc calls into a wrapper,
including the error handling?

On this thread have been spotted 17 code paths involved: 13 for the
backend and 4 for src/bin. For the frontend part the error handling of
each of of them is slightly different (pg_rewind uses pg_fatal,
pg_waldump uses fatal_error) so I don't think that for this part another
wrapper would bring more clarity. For the backend part, 7 are working
on transient files and 6 are not, which would make the wrapper a tad
more complex if we would need for example a set of flags in input to
control if the fd passed down by the caller should use
CloseTransientFile() before emitting an error or not. If the balance
was way more in favor of one or the other though...

I'm not quite convinced that "relation mapping file" doesn't provide any
information. It's likley to be easier to search for than a specific
filename, particularly if there's oids or such in the name...

Agreed. I also quite like the message mentioning directly 2PC files as
well. I think that we could gain by making all end messages more
consistent, as the markers used and the style of each message is
slightly different, so I would suggest something like that instead to
gain in consistency:
if (readBytes < 0)
ereport(elevel, "could not blah: %m");
else
ereport(elevel, "could not blah: %d read, expected %zu");

My point is that if we use the same markers and the same end messages,
then those are easier to grep for, and callers are still free to provide
the head of error messages the way they want depending on the situation.
--
Michael

#10Michael Paquier
michael@paquier.xyz
In reply to: Michael Paquier (#9)
1 attachment(s)
Re: Fix some error handling for read() and errno

On Tue, Jun 12, 2018 at 01:19:54PM +0900, Michael Paquier wrote:

Agreed. I also quite like the message mentioning directly 2PC files as
well. I think that we could gain by making all end messages more
consistent, as the markers used and the style of each message is
slightly different, so I would suggest something like that instead to
gain in consistency:
if (readBytes < 0)
ereport(elevel, "could not blah: %m");
else
ereport(elevel, "could not blah: %d read, expected %zu");

My point is that if we use the same markers and the same end messages,
then those are easier to grep for, and callers are still free to provide
the head of error messages the way they want depending on the situation.

I have dug again into this stuff, and I have finished with the attached
which uses mainly "could not read file %s: read %d bytes, expected
%zu". The markers are harder to make consistent without being more
invasive so I stopped on that.

There is also this bit in slru.c which I'd like to discuss:
+       /*
+        * Note that this would report success if the number of bytes read is
+        * positive, but lacking data so that errno is not set, which would be
+        * confusing, so set errno to EIO in this case.
+        */
+       if (errno == 0)
+           errno = EIO;
Please note that I don't necessarily propose to add this in the final
patch, and I think that at least an XXX comment should be added here to
mention that errno may not be set.

Thoughts?
--
Michael

Attachments:

read-errno-handling-v4.patchtext/x-diff; charset=us-asciiDownload
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index 1132eef038..805aa521cf 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -684,6 +684,14 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
 	if (read(fd, shared->page_buffer[slotno], BLCKSZ) != BLCKSZ)
 	{
 		pgstat_report_wait_end();
+
+		/*
+		 * Note that this would report success if the number of bytes read is
+		 * positive, but lacking data so that errno is not set, which would be
+		 * confusing, so set errno to EIO in this case.
+		 */
+		if (errno == 0)
+			errno = EIO;
 		slru_errcause = SLRU_READ_FAILED;
 		slru_errno = errno;
 		CloseTransientFile(fd);
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 65194db70e..95ab03628f 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -1219,6 +1219,7 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 	uint32		crc_offset;
 	pg_crc32c	calc_crc,
 				file_crc;
+	int			r;
 
 	TwoPhaseFilePath(path, xid);
 
@@ -1272,15 +1273,28 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 	buf = (char *) palloc(stat.st_size);
 
 	pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_READ);
-	if (read(fd, buf, stat.st_size) != stat.st_size)
+	r = read(fd, buf, stat.st_size);
+	if (r != stat.st_size)
 	{
+		int			save_errno = errno;
+
 		pgstat_report_wait_end();
 		CloseTransientFile(fd);
 		if (give_warnings)
-			ereport(WARNING,
-					(errcode_for_file_access(),
-					 errmsg("could not read two-phase state file \"%s\": %m",
-							path)));
+		{
+			if (r < 0)
+			{
+				errno = save_errno;
+				ereport(WARNING,
+						(errcode_for_file_access(),
+						 errmsg("could not read two-phase state file \"%s\": %m",
+								path)));
+			}
+			else
+				ereport(WARNING,
+						(errmsg("could not read two-phase state file \"%s\": read %d bytes, expected %zu",
+								path, r, stat.st_size)));
+		}
 		pfree(buf);
 		return NULL;
 	}
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index adbd6a2126..471ef87842 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -3395,21 +3395,24 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
 
 		if (nread > 0)
 		{
+			int			r;
+
 			if (nread > sizeof(buffer))
 				nread = sizeof(buffer);
 			errno = 0;
 			pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_READ);
-			if (read(srcfd, buffer, nread) != nread)
+			r = read(srcfd, buffer, nread);
+			if (r != nread)
 			{
-				if (errno != 0)
+				if (r < 0)
 					ereport(ERROR,
 							(errcode_for_file_access(),
 							 errmsg("could not read file \"%s\": %m",
 									path)));
 				else
 					ereport(ERROR,
-							(errmsg("not enough data in file \"%s\"",
-									path)));
+							(errmsg("could not read file \"%s\": read %d bytes, expected %d",
+									path, r, nread)));
 			}
 			pgstat_report_wait_end();
 		}
@@ -4509,7 +4512,8 @@ ReadControlFile(void)
 					 errmsg("could not read from control file: %m")));
 		else
 			ereport(PANIC,
-					 (errmsg("could not read from control file: read %d bytes, expected %d", r, (int) sizeof(ControlFileData))));
+					(errmsg("could not read from control file: read %d bytes, expected %zu",
+							r, sizeof(ControlFileData))));
 	}
 	pgstat_report_wait_end();
 
@@ -11594,6 +11598,7 @@ XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
 	int			emode = private->emode;
 	uint32		targetPageOff;
 	XLogSegNo	targetSegNo PG_USED_FOR_ASSERTS_ONLY;
+	int			r;
 
 	XLByteToSeg(targetPagePtr, targetSegNo, wal_segment_size);
 	targetPageOff = XLogSegmentOffset(targetPagePtr, wal_segment_size);
@@ -11675,8 +11680,10 @@ retry:
 	if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0)
 	{
 		char		fname[MAXFNAMELEN];
+		int			save_errno = errno;
 
 		XLogFileName(fname, curFileTLI, readSegNo, wal_segment_size);
+		errno = save_errno;
 		ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
 				(errcode_for_file_access(),
 				 errmsg("could not seek in log segment %s to offset %u: %m",
@@ -11685,16 +11692,28 @@ retry:
 	}
 
 	pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
-	if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+	r = read(readFile, readBuf, XLOG_BLCKSZ);
+	if (r != XLOG_BLCKSZ)
 	{
 		char		fname[MAXFNAMELEN];
+		int			save_errno = errno;
 
 		pgstat_report_wait_end();
 		XLogFileName(fname, curFileTLI, readSegNo, wal_segment_size);
-		ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
-				(errcode_for_file_access(),
-				 errmsg("could not read from log segment %s, offset %u: %m",
-						fname, readOff)));
+
+		if (r < 0)
+		{
+			errno = save_errno;
+			ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
+					(errcode_for_file_access(),
+					 errmsg("could not read from log segment %s, offset %u: %m",
+							fname, readOff)));
+		}
+		else
+			ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
+					(errmsg("could not read from log segment %s, offset %u: read %d bytes, expected %d",
+							fname, readOff, r, XLOG_BLCKSZ)));
+
 		goto next_record_is_invalid;
 	}
 	pgstat_report_wait_end();
diff --git a/src/backend/replication/logical/origin.c b/src/backend/replication/logical/origin.c
index 963878a5d8..053c13c43e 100644
--- a/src/backend/replication/logical/origin.c
+++ b/src/backend/replication/logical/origin.c
@@ -697,9 +697,16 @@ StartupReplicationOrigin(void)
 	/* verify magic, that is written even if nothing was active */
 	readBytes = read(fd, &magic, sizeof(magic));
 	if (readBytes != sizeof(magic))
-		ereport(PANIC,
-				(errmsg("could not read file \"%s\": %m",
-						path)));
+	{
+		if (readBytes < 0)
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": %m",
+							path)));
+		else
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": read %d bytes, expected %zu",
+							path, readBytes, sizeof(magic))));
+	}
 	COMP_CRC32C(crc, &magic, sizeof(magic));
 
 	if (magic != REPLICATION_STATE_MAGIC)
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index 4123cdebcf..448f6a87c5 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -1708,11 +1708,21 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 	pgstat_report_wait_end();
 	if (readBytes != SnapBuildOnDiskConstantSize)
 	{
+		int			save_errno = errno;
+
 		CloseTransientFile(fd);
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %d: %m",
-						path, readBytes, (int) SnapBuildOnDiskConstantSize)));
+
+		if (readBytes < 0)
+		{
+			errno = save_errno;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		}
+		else
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d bytes, expected %zu",
+							path, readBytes, SnapBuildOnDiskConstantSize)));
 	}
 
 	if (ondisk.magic != SNAPBUILD_MAGIC)
@@ -1736,11 +1746,21 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 	pgstat_report_wait_end();
 	if (readBytes != sizeof(SnapBuild))
 	{
+		int			save_errno = errno;
+
 		CloseTransientFile(fd);
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %d: %m",
-						path, readBytes, (int) sizeof(SnapBuild))));
+
+		if (readBytes < 0)
+		{
+			errno = save_errno;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		}
+		else
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d bytes, expected %zu",
+							path, readBytes, sizeof(SnapBuild))));
 	}
 	COMP_CRC32C(checksum, &ondisk.builder, sizeof(SnapBuild));
 
@@ -1753,11 +1773,21 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 	pgstat_report_wait_end();
 	if (readBytes != sz)
 	{
+		int			save_errno = errno;
+
 		CloseTransientFile(fd);
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %d: %m",
-						path, readBytes, (int) sz)));
+
+		if (readBytes < 0)
+		{
+			errno = save_errno;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		}
+		else
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d bytes, expected %zu",
+							path, readBytes, sz)));
 	}
 	COMP_CRC32C(checksum, ondisk.builder.was_running.was_xip, sz);
 
@@ -1769,11 +1799,21 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 	pgstat_report_wait_end();
 	if (readBytes != sz)
 	{
+		int			save_errno = errno;
+
 		CloseTransientFile(fd);
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %d: %m",
-						path, readBytes, (int) sz)));
+
+		if (readBytes < 0)
+		{
+			errno = save_errno;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		}
+		else
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d bytes, expected %zu",
+							path, readBytes, sz)));
 	}
 	COMP_CRC32C(checksum, ondisk.builder.committed.xip, sz);
 
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index e8b76b2f20..54e02f9963 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -1401,11 +1401,15 @@ RestoreSlotFromDisk(const char *name)
 
 		CloseTransientFile(fd);
 		errno = saved_errno;
-		ereport(PANIC,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %u: %m",
-						path, readBytes,
-						(uint32) ReplicationSlotOnDiskConstantSize)));
+		if (readBytes < 0)
+			ereport(PANIC,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		else
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": read %d bytes, expected %u",
+							path, readBytes,
+							(uint32) ReplicationSlotOnDiskConstantSize)));
 	}
 
 	/* verify magic */
@@ -1441,10 +1445,14 @@ RestoreSlotFromDisk(const char *name)
 
 		CloseTransientFile(fd);
 		errno = saved_errno;
-		ereport(PANIC,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %u: %m",
-						path, readBytes, cp.length)));
+		if (readBytes < 0)
+			ereport(PANIC,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		else
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": read %d bytes, expected %u",
+							path, readBytes, cp.length)));
 	}
 
 	CloseTransientFile(fd);
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index e47ddca6bc..15508ae633 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -501,11 +501,16 @@ SendTimeLineHistory(TimeLineHistoryCmd *cmd)
 		pgstat_report_wait_start(WAIT_EVENT_WALSENDER_TIMELINE_HISTORY_READ);
 		nread = read(fd, rbuf, sizeof(rbuf));
 		pgstat_report_wait_end();
-		if (nread <= 0)
+		if (nread < 0)
 			ereport(ERROR,
 					(errcode_for_file_access(),
 					 errmsg("could not read file \"%s\": %m",
 							path)));
+		else if (nread == 0)
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d bytes, expected %zu",
+							path, nread, bytesleft)));
+
 		pq_sendbytes(&buf, rbuf, nread);
 		bytesleft -= nread;
 	}
@@ -2425,7 +2430,7 @@ retry:
 		pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
 		readbytes = read(sendFile, p, segbytes);
 		pgstat_report_wait_end();
-		if (readbytes <= 0)
+		if (readbytes < 0)
 		{
 			ereport(ERROR,
 					(errcode_for_file_access(),
@@ -2433,6 +2438,13 @@ retry:
 							XLogFileNameP(curFileTimeLine, sendSegNo),
 							sendOff, (unsigned long) segbytes)));
 		}
+		else if (readbytes == 0)
+		{
+			ereport(ERROR,
+					(errmsg("could not read from log segment %s, offset %u: read %d bytes, expected %lu",
+							XLogFileNameP(curFileTimeLine, sendSegNo),
+							sendOff, readbytes, (unsigned long) segbytes)));
+		}
 
 		/* Update state for read */
 		recptr += readbytes;
diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c
index 99d095f2df..f4fd0f1819 100644
--- a/src/backend/utils/cache/relmapper.c
+++ b/src/backend/utils/cache/relmapper.c
@@ -629,6 +629,7 @@ load_relmap_file(bool shared)
 	char		mapfilename[MAXPGPATH];
 	pg_crc32c	crc;
 	int			fd;
+	int			r;
 
 	if (shared)
 	{
@@ -659,11 +660,19 @@ load_relmap_file(bool shared)
 	 * are able to access any relation that's affected by the change.
 	 */
 	pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_READ);
-	if (read(fd, map, sizeof(RelMapFile)) != sizeof(RelMapFile))
-		ereport(FATAL,
-				(errcode_for_file_access(),
-				 errmsg("could not read relation mapping file \"%s\": %m",
-						mapfilename)));
+	r = read(fd, map, sizeof(RelMapFile));
+	if (r != sizeof(RelMapFile))
+	{
+		if (r < 0)
+			ereport(FATAL,
+					(errcode_for_file_access(),
+					 errmsg("could not read relation mapping file \"%s\": %m",
+							mapfilename)));
+		else
+			ereport(FATAL,
+					(errmsg("could not read relation mapping file \"%s\": read %d bytes, expected %zu",
+							mapfilename, r, sizeof(RelMapFile))));
+	}
 	pgstat_report_wait_end();
 
 	CloseTransientFile(fd);
diff --git a/src/bin/pg_basebackup/pg_receivewal.c b/src/bin/pg_basebackup/pg_receivewal.c
index 071b32d19d..fc4e2d0492 100644
--- a/src/bin/pg_basebackup/pg_receivewal.c
+++ b/src/bin/pg_basebackup/pg_receivewal.c
@@ -284,6 +284,7 @@ FindStreamingStart(uint32 *tli)
 			char		buf[4];
 			int			bytes_out;
 			char		fullpath[MAXPGPATH * 2];
+			int			r;
 
 			snprintf(fullpath, sizeof(fullpath), "%s/%s", basedir, dirent->d_name);
 
@@ -300,10 +301,15 @@ FindStreamingStart(uint32 *tli)
 						progname, fullpath, strerror(errno));
 				disconnect_and_exit(1);
 			}
-			if (read(fd, (char *) buf, sizeof(buf)) != sizeof(buf))
+			r = read(fd, (char *) buf, sizeof(buf));
+			if (r != sizeof(buf))
 			{
-				fprintf(stderr, _("%s: could not read compressed file \"%s\": %s\n"),
-						progname, fullpath, strerror(errno));
+				if (r < 0)
+					fprintf(stderr, _("%s: could not read compressed file \"%s\": %s\n"),
+							progname, fullpath, strerror(errno));
+				else
+					fprintf(stderr, _("%s: could not read compressed file \"%s\": read %d bytes, expected %zu\n"),
+							progname, fullpath, r, sizeof(buf));
 				disconnect_and_exit(1);
 			}
 
diff --git a/src/bin/pg_rewind/file_ops.c b/src/bin/pg_rewind/file_ops.c
index 94bcc13ae8..895eb50cc0 100644
--- a/src/bin/pg_rewind/file_ops.c
+++ b/src/bin/pg_rewind/file_ops.c
@@ -289,6 +289,7 @@ slurpFile(const char *datadir, const char *path, size_t *filesize)
 	struct stat statbuf;
 	char		fullpath[MAXPGPATH];
 	int			len;
+	int			r;
 
 	snprintf(fullpath, sizeof(fullpath), "%s/%s", datadir, path);
 
@@ -304,9 +305,16 @@ slurpFile(const char *datadir, const char *path, size_t *filesize)
 
 	buffer = pg_malloc(len + 1);
 
-	if (read(fd, buffer, len) != len)
-		pg_fatal("could not read file \"%s\": %s\n",
-				 fullpath, strerror(errno));
+	r = read(fd, buffer, len);
+	if (r != len)
+	{
+		if (r < 0)
+			pg_fatal("could not read file \"%s\": %s\n",
+					 fullpath, strerror(errno));
+		else
+			pg_fatal("not enough data in file \"%s\": read %d bytes, expected %d\n",
+					 fullpath, r, len);
+	}
 	close(fd);
 
 	/* Zero-terminate the buffer. */
diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c
index b4c1f827a6..de3112a1bd 100644
--- a/src/bin/pg_rewind/parsexlog.c
+++ b/src/bin/pg_rewind/parsexlog.c
@@ -246,6 +246,7 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
 	uint32		targetPageOff;
 	XLogRecPtr	targetSegEnd;
 	XLogSegNo	targetSegNo;
+	int			r;
 
 	XLByteToSeg(targetPagePtr, targetSegNo, WalSegSz);
 	XLogSegNoOffsetToRecPtr(targetSegNo + 1, 0, targetSegEnd, WalSegSz);
@@ -309,10 +310,17 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
 		return -1;
 	}
 
-	if (read(xlogreadfd, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+
+	r = read(xlogreadfd, readBuf, XLOG_BLCKSZ);
+	if (r != XLOG_BLCKSZ)
 	{
-		printf(_("could not read from file \"%s\": %s\n"), xlogfpath,
-			   strerror(errno));
+		if (r < 0)
+			printf(_("could not read from file \"%s\": %s\n"), xlogfpath,
+				   strerror(errno));
+		else
+			printf(_("could not read from file \"%s\": read %d bytes, expected %d\n"),
+				   xlogfpath, r, XLOG_BLCKSZ);
+
 		return -1;
 	}
 
diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c
index 5c4f38e597..ef142e8095 100644
--- a/src/bin/pg_waldump/pg_waldump.c
+++ b/src/bin/pg_waldump/pg_waldump.c
@@ -210,8 +210,10 @@ search_directory(const char *directory, const char *fname)
 	if (fd >= 0)
 	{
 		char		buf[XLOG_BLCKSZ];
+		int			r;
 
-		if (read(fd, buf, XLOG_BLCKSZ) == XLOG_BLCKSZ)
+		r = read(fd, buf, XLOG_BLCKSZ);
+		if (r == XLOG_BLCKSZ)
 		{
 			XLogLongPageHeader longhdr = (XLogLongPageHeader) buf;
 
@@ -229,7 +231,8 @@ search_directory(const char *directory, const char *fname)
 				fatal_error("could not read file \"%s\": %s",
 							fname, strerror(errno));
 			else
-				fatal_error("not enough data in file \"%s\"", fname);
+				fatal_error("could not read file \"%s\": read %d bytes, %d expected",
+							fname, r, XLOG_BLCKSZ);
 		}
 		close(fd);
 		return true;
@@ -411,11 +414,17 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id,
 		{
 			int			err = errno;
 			char		fname[MAXPGPATH];
+			int			save_errno = errno;
 
 			XLogFileName(fname, timeline_id, sendSegNo, WalSegSz);
+			errno = save_errno;
 
-			fatal_error("could not read from log file %s, offset %u, length %d: %s",
-						fname, sendOff, segbytes, strerror(err));
+			if (readbytes < 0)
+				fatal_error("could not read from log file %s, offset %u, length %d: %s",
+							fname, sendOff, segbytes, strerror(err));
+			else if (readbytes == 0)
+				fatal_error("could not read from log file %s, offset %u: read %d bytes, expected %d",
+							fname, sendOff, readbytes, segbytes);
 		}
 
 		/* Update state for read */
#11Robert Haas
robertmhaas@gmail.com
In reply to: Alvaro Herrera (#7)
Re: Fix some error handling for read() and errno

On Mon, Jun 11, 2018 at 6:11 PM, Alvaro Herrera
<alvherre@2ndquadrant.com> wrote:

I would go as far as suggesting to remove qualifiers that indicate what
the file is for (such as "relation mapping file"); relying on the path
as indicator of what's going wrong should be sufficient, since it's an
error that affects internals anyway, not anything that users can do much
about. Keep variations to a minimum, to ease translator's work;
sometimes it's hard enough to come up with good translations for things
like "relation mapping file" in the first place, and they don't help the
end user.

+1. I think those things are often hard to phrase even in English.
It makes the messages long, awkward, and almost invariably the style
differs from one message to the next depending on the author and how
easy it is to describe the type of file involved.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#12Michael Paquier
michael@paquier.xyz
In reply to: Robert Haas (#11)
Re: Fix some error handling for read() and errno

On Thu, Jun 14, 2018 at 09:50:33AM -0400, Robert Haas wrote:

On Mon, Jun 11, 2018 at 6:11 PM, Alvaro Herrera
<alvherre@2ndquadrant.com> wrote:

I would go as far as suggesting to remove qualifiers that indicate what
the file is for (such as "relation mapping file"); relying on the path
as indicator of what's going wrong should be sufficient, since it's an
error that affects internals anyway, not anything that users can do much
about. Keep variations to a minimum, to ease translator's work;
sometimes it's hard enough to come up with good translations for things
like "relation mapping file" in the first place, and they don't help the
end user.

+1. I think those things are often hard to phrase even in English.
It makes the messages long, awkward, and almost invariably the style
differs from one message to the next depending on the author and how
easy it is to describe the type of file involved.

Okay, so this makes two votes in favor of keeping the messages simple
without context, shaped like "could not read file %s", with Robert and
Alvaro, and two votes for keeping some context with Andres and I.
Anybody else has an opinion to offer?

Please note that I think that some messages should keep some context
anyway, for example the WAL-related information is useful.  An example
is this one where the offset is good to know about:
+   ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
+           (errmsg("could not read from log segment %s, offset %u: read %d bytes, expected %d",
+                   fname, readOff, r, XLOG_BLCKSZ)));
--
Michael
#13Magnus Hagander
magnus@hagander.net
In reply to: Michael Paquier (#12)
Re: Fix some error handling for read() and errno

On Mon, Jun 18, 2018 at 6:42 AM, Michael Paquier <michael@paquier.xyz>
wrote:

On Thu, Jun 14, 2018 at 09:50:33AM -0400, Robert Haas wrote:

On Mon, Jun 11, 2018 at 6:11 PM, Alvaro Herrera
<alvherre@2ndquadrant.com> wrote:

I would go as far as suggesting to remove qualifiers that indicate what
the file is for (such as "relation mapping file"); relying on the path
as indicator of what's going wrong should be sufficient, since it's an
error that affects internals anyway, not anything that users can do much
about. Keep variations to a minimum, to ease translator's work;
sometimes it's hard enough to come up with good translations for things
like "relation mapping file" in the first place, and they don't help the
end user.

+1. I think those things are often hard to phrase even in English.
It makes the messages long, awkward, and almost invariably the style
differs from one message to the next depending on the author and how
easy it is to describe the type of file involved.

Okay, so this makes two votes in favor of keeping the messages simple
without context, shaped like "could not read file %s", with Robert and
Alvaro, and two votes for keeping some context with Andres and I.
Anybody else has an opinion to offer?

Count me in with Robert and Alvaro with a +0.5 :)

Please note that I think that some messages should keep some context

anyway, for example the WAL-related information is useful.  An example
is this one where the offset is good to know about:
+   ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
+           (errmsg("could not read from log segment %s, offset %u: read
%d bytes, expected %d",
+                   fname, readOff, r, XLOG_BLCKSZ)));

Yeah, I think you'd need to make a call for the individual message to see
how much it helps. In this one the context definitely does, in some others
it doesn't.

--
Magnus Hagander
Me: https://www.hagander.net/ <http://www.hagander.net/&gt;
Work: https://www.redpill-linpro.com/ <http://www.redpill-linpro.com/&gt;

#14Michael Paquier
michael@paquier.xyz
In reply to: Magnus Hagander (#13)
1 attachment(s)
Re: Fix some error handling for read() and errno

On Wed, Jun 20, 2018 at 02:52:23PM +0200, Magnus Hagander wrote:

On Mon, Jun 18, 2018 at 6:42 AM, Michael Paquier <michael@paquier.xyz>
wrote:

Okay, so this makes two votes in favor of keeping the messages simple
without context, shaped like "could not read file %s", with Robert and
Alvaro, and two votes for keeping some context with Andres and I.
Anybody else has an opinion to offer?

Count me in with Robert and Alvaro with a +0.5 :)

Thanks for your opinion.

Please note that I think that some messages should keep some context
anyway, for example the WAL-related information is useful.  An example
is this one where the offset is good to know about:
+   ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
+           (errmsg("could not read from log segment %s, offset %u: read
%d bytes, expected %d",
+                   fname, readOff, r, XLOG_BLCKSZ)));

Yeah, I think you'd need to make a call for the individual message to see
how much it helps. In this one the context definitely does, in some others
it doesn't.

Sure. I have looked at that and I am finishing with the updated version
attached.

I removed the portion about slru in the code, but I would like to add at
least a mention about it in the commit message. The simplifications are
also applied for the control file messages you changed as well in
cfb758b. Also in ReadControlFile(), we use "could not open control
file", so for consistency this should be replaced with a more simple
message. I noticed as well that the 2PC file handling loses errno a
couple of times, and that we had better keep the messages also
consistent if we do the simplification move... relmapper.c also gains
simplifications.

All the incorrect errno handling I found should be back-patched in my
opinion as we did so in the past with 2a11b188 for example. I am not
really willing to bother about the error message improvements on
anything else than HEAD (not v11, but v12), but... Opinions about all
that?
--
Michael

Attachments:

read-errno-handling-v5.patchtext/x-diff; charset=us-asciiDownload
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 65194db70e..f72c149d46 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -1219,6 +1219,7 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 	uint32		crc_offset;
 	pg_crc32c	calc_crc,
 				file_crc;
+	int			r;
 
 	TwoPhaseFilePath(path, xid);
 
@@ -1228,8 +1229,7 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 		if (give_warnings)
 			ereport(WARNING,
 					(errcode_for_file_access(),
-					 errmsg("could not open two-phase state file \"%s\": %m",
-							path)));
+					 errmsg("could not open file \"%s\": %m", path)));
 		return NULL;
 	}
 
@@ -1245,8 +1245,7 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 		if (give_warnings)
 			ereport(WARNING,
 					(errcode_for_file_access(),
-					 errmsg("could not stat two-phase state file \"%s\": %m",
-							path)));
+					 errmsg("could not stat file \"%s\": %m", path)));
 		return NULL;
 	}
 
@@ -1272,15 +1271,27 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 	buf = (char *) palloc(stat.st_size);
 
 	pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_READ);
-	if (read(fd, buf, stat.st_size) != stat.st_size)
+	r = read(fd, buf, stat.st_size);
+	if (r != stat.st_size)
 	{
+		int			save_errno = errno;
+
 		pgstat_report_wait_end();
 		CloseTransientFile(fd);
 		if (give_warnings)
-			ereport(WARNING,
-					(errcode_for_file_access(),
-					 errmsg("could not read two-phase state file \"%s\": %m",
-							path)));
+		{
+			if (r < 0)
+			{
+				errno = save_errno;
+				ereport(WARNING,
+						(errcode_for_file_access(),
+						 errmsg("could not read file \"%s\": %m", path)));
+			}
+			else
+				ereport(WARNING,
+						(errmsg("could not read file \"%s\": read %d bytes, expected %zu",
+								path, r, stat.st_size)));
+		}
 		pfree(buf);
 		return NULL;
 	}
@@ -1627,8 +1638,7 @@ RemoveTwoPhaseFile(TransactionId xid, bool giveWarning)
 		if (errno != ENOENT || giveWarning)
 			ereport(WARNING,
 					(errcode_for_file_access(),
-					 errmsg("could not remove two-phase state file \"%s\": %m",
-							path)));
+					 errmsg("could not remove file \"%s\": %m", path)));
 }
 
 /*
@@ -1656,26 +1666,31 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
 	if (fd < 0)
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not recreate two-phase state file \"%s\": %m",
-						path)));
+				 errmsg("could not recreate file \"%s\": %m", path)));
 
 	/* Write content and CRC */
 	pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_WRITE);
 	if (write(fd, content, len) != len)
 	{
+		int			save_errno = errno;
+
 		pgstat_report_wait_end();
 		CloseTransientFile(fd);
+		errno = save_errno;
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not write two-phase state file: %m")));
+				 errmsg("could not write file \"%s\": %m", path)));
 	}
 	if (write(fd, &statefile_crc, sizeof(pg_crc32c)) != sizeof(pg_crc32c))
 	{
+		int			save_errno = errno;
+
 		pgstat_report_wait_end();
 		CloseTransientFile(fd);
+		errno = save_errno;
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not write two-phase state file: %m")));
+				 errmsg("could not write file \"%s\": %m", path)));
 	}
 	pgstat_report_wait_end();
 
@@ -1686,17 +1701,20 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
 	pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_SYNC);
 	if (pg_fsync(fd) != 0)
 	{
+		int			save_errno = errno;
+
 		CloseTransientFile(fd);
+		errno = save_errno;
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not fsync two-phase state file: %m")));
+				 errmsg("could not fsync file \"%s\": %m", path)));
 	}
 	pgstat_report_wait_end();
 
 	if (CloseTransientFile(fd) != 0)
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not close two-phase state file: %m")));
+				 errmsg("could not close file \"%s\": %m", path)));
 }
 
 /*
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index adbd6a2126..0123f93705 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -3395,21 +3395,24 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
 
 		if (nread > 0)
 		{
+			int			r;
+
 			if (nread > sizeof(buffer))
 				nread = sizeof(buffer);
 			errno = 0;
 			pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_READ);
-			if (read(srcfd, buffer, nread) != nread)
+			r = read(srcfd, buffer, nread);
+			if (r != nread)
 			{
-				if (errno != 0)
+				if (r < 0)
 					ereport(ERROR,
 							(errcode_for_file_access(),
 							 errmsg("could not read file \"%s\": %m",
 									path)));
 				else
 					ereport(ERROR,
-							(errmsg("not enough data in file \"%s\"",
-									path)));
+							(errmsg("could not read file \"%s\": read %d bytes, expected %d",
+									path, r, nread)));
 			}
 			pgstat_report_wait_end();
 		}
@@ -4496,7 +4499,7 @@ ReadControlFile(void)
 	if (fd < 0)
 		ereport(PANIC,
 				(errcode_for_file_access(),
-				 errmsg("could not open control file \"%s\": %m",
+				 errmsg("could not open file \"%s\": %m",
 						XLOG_CONTROL_FILE)));
 
 	pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_READ);
@@ -4506,10 +4509,12 @@ ReadControlFile(void)
 		if (r < 0)
 			ereport(PANIC,
 					(errcode_for_file_access(),
-					 errmsg("could not read from control file: %m")));
+					 errmsg("could not read file \"%s\": %m",
+							XLOG_CONTROL_FILE)));
 		else
 			ereport(PANIC,
-					 (errmsg("could not read from control file: read %d bytes, expected %d", r, (int) sizeof(ControlFileData))));
+					(errmsg("could not read file \"%s\": read %d bytes, expected %zu",
+							XLOG_CONTROL_FILE, r, sizeof(ControlFileData))));
 	}
 	pgstat_report_wait_end();
 
@@ -11594,6 +11599,7 @@ XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
 	int			emode = private->emode;
 	uint32		targetPageOff;
 	XLogSegNo	targetSegNo PG_USED_FOR_ASSERTS_ONLY;
+	int			r;
 
 	XLByteToSeg(targetPagePtr, targetSegNo, wal_segment_size);
 	targetPageOff = XLogSegmentOffset(targetPagePtr, wal_segment_size);
@@ -11675,8 +11681,10 @@ retry:
 	if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0)
 	{
 		char		fname[MAXFNAMELEN];
+		int			save_errno = errno;
 
 		XLogFileName(fname, curFileTLI, readSegNo, wal_segment_size);
+		errno = save_errno;
 		ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
 				(errcode_for_file_access(),
 				 errmsg("could not seek in log segment %s to offset %u: %m",
@@ -11685,16 +11693,28 @@ retry:
 	}
 
 	pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
-	if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+	r = read(readFile, readBuf, XLOG_BLCKSZ);
+	if (r != XLOG_BLCKSZ)
 	{
 		char		fname[MAXFNAMELEN];
+		int			save_errno = errno;
 
 		pgstat_report_wait_end();
 		XLogFileName(fname, curFileTLI, readSegNo, wal_segment_size);
-		ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
-				(errcode_for_file_access(),
-				 errmsg("could not read from log segment %s, offset %u: %m",
-						fname, readOff)));
+
+		if (r < 0)
+		{
+			errno = save_errno;
+			ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
+					(errcode_for_file_access(),
+					 errmsg("could not read from log segment %s, offset %u: %m",
+							fname, readOff)));
+		}
+		else
+			ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
+					(errmsg("could not read from log segment %s, offset %u: read %d bytes, expected %d",
+							fname, readOff, r, XLOG_BLCKSZ)));
+
 		goto next_record_is_invalid;
 	}
 	pgstat_report_wait_end();
diff --git a/src/backend/replication/logical/origin.c b/src/backend/replication/logical/origin.c
index 963878a5d8..053c13c43e 100644
--- a/src/backend/replication/logical/origin.c
+++ b/src/backend/replication/logical/origin.c
@@ -697,9 +697,16 @@ StartupReplicationOrigin(void)
 	/* verify magic, that is written even if nothing was active */
 	readBytes = read(fd, &magic, sizeof(magic));
 	if (readBytes != sizeof(magic))
-		ereport(PANIC,
-				(errmsg("could not read file \"%s\": %m",
-						path)));
+	{
+		if (readBytes < 0)
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": %m",
+							path)));
+		else
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": read %d bytes, expected %zu",
+							path, readBytes, sizeof(magic))));
+	}
 	COMP_CRC32C(crc, &magic, sizeof(magic));
 
 	if (magic != REPLICATION_STATE_MAGIC)
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index 4123cdebcf..448f6a87c5 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -1708,11 +1708,21 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 	pgstat_report_wait_end();
 	if (readBytes != SnapBuildOnDiskConstantSize)
 	{
+		int			save_errno = errno;
+
 		CloseTransientFile(fd);
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %d: %m",
-						path, readBytes, (int) SnapBuildOnDiskConstantSize)));
+
+		if (readBytes < 0)
+		{
+			errno = save_errno;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		}
+		else
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d bytes, expected %zu",
+							path, readBytes, SnapBuildOnDiskConstantSize)));
 	}
 
 	if (ondisk.magic != SNAPBUILD_MAGIC)
@@ -1736,11 +1746,21 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 	pgstat_report_wait_end();
 	if (readBytes != sizeof(SnapBuild))
 	{
+		int			save_errno = errno;
+
 		CloseTransientFile(fd);
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %d: %m",
-						path, readBytes, (int) sizeof(SnapBuild))));
+
+		if (readBytes < 0)
+		{
+			errno = save_errno;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		}
+		else
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d bytes, expected %zu",
+							path, readBytes, sizeof(SnapBuild))));
 	}
 	COMP_CRC32C(checksum, &ondisk.builder, sizeof(SnapBuild));
 
@@ -1753,11 +1773,21 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 	pgstat_report_wait_end();
 	if (readBytes != sz)
 	{
+		int			save_errno = errno;
+
 		CloseTransientFile(fd);
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %d: %m",
-						path, readBytes, (int) sz)));
+
+		if (readBytes < 0)
+		{
+			errno = save_errno;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		}
+		else
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d bytes, expected %zu",
+							path, readBytes, sz)));
 	}
 	COMP_CRC32C(checksum, ondisk.builder.was_running.was_xip, sz);
 
@@ -1769,11 +1799,21 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 	pgstat_report_wait_end();
 	if (readBytes != sz)
 	{
+		int			save_errno = errno;
+
 		CloseTransientFile(fd);
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %d: %m",
-						path, readBytes, (int) sz)));
+
+		if (readBytes < 0)
+		{
+			errno = save_errno;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		}
+		else
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d bytes, expected %zu",
+							path, readBytes, sz)));
 	}
 	COMP_CRC32C(checksum, ondisk.builder.committed.xip, sz);
 
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index e8b76b2f20..54e02f9963 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -1401,11 +1401,15 @@ RestoreSlotFromDisk(const char *name)
 
 		CloseTransientFile(fd);
 		errno = saved_errno;
-		ereport(PANIC,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %u: %m",
-						path, readBytes,
-						(uint32) ReplicationSlotOnDiskConstantSize)));
+		if (readBytes < 0)
+			ereport(PANIC,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		else
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": read %d bytes, expected %u",
+							path, readBytes,
+							(uint32) ReplicationSlotOnDiskConstantSize)));
 	}
 
 	/* verify magic */
@@ -1441,10 +1445,14 @@ RestoreSlotFromDisk(const char *name)
 
 		CloseTransientFile(fd);
 		errno = saved_errno;
-		ereport(PANIC,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %u: %m",
-						path, readBytes, cp.length)));
+		if (readBytes < 0)
+			ereport(PANIC,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		else
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": read %d bytes, expected %u",
+							path, readBytes, cp.length)));
 	}
 
 	CloseTransientFile(fd);
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index e47ddca6bc..15508ae633 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -501,11 +501,16 @@ SendTimeLineHistory(TimeLineHistoryCmd *cmd)
 		pgstat_report_wait_start(WAIT_EVENT_WALSENDER_TIMELINE_HISTORY_READ);
 		nread = read(fd, rbuf, sizeof(rbuf));
 		pgstat_report_wait_end();
-		if (nread <= 0)
+		if (nread < 0)
 			ereport(ERROR,
 					(errcode_for_file_access(),
 					 errmsg("could not read file \"%s\": %m",
 							path)));
+		else if (nread == 0)
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d bytes, expected %zu",
+							path, nread, bytesleft)));
+
 		pq_sendbytes(&buf, rbuf, nread);
 		bytesleft -= nread;
 	}
@@ -2425,7 +2430,7 @@ retry:
 		pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
 		readbytes = read(sendFile, p, segbytes);
 		pgstat_report_wait_end();
-		if (readbytes <= 0)
+		if (readbytes < 0)
 		{
 			ereport(ERROR,
 					(errcode_for_file_access(),
@@ -2433,6 +2438,13 @@ retry:
 							XLogFileNameP(curFileTimeLine, sendSegNo),
 							sendOff, (unsigned long) segbytes)));
 		}
+		else if (readbytes == 0)
+		{
+			ereport(ERROR,
+					(errmsg("could not read from log segment %s, offset %u: read %d bytes, expected %lu",
+							XLogFileNameP(curFileTimeLine, sendSegNo),
+							sendOff, readbytes, (unsigned long) segbytes)));
+		}
 
 		/* Update state for read */
 		recptr += readbytes;
diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c
index 99d095f2df..3f97c8d597 100644
--- a/src/backend/utils/cache/relmapper.c
+++ b/src/backend/utils/cache/relmapper.c
@@ -629,6 +629,7 @@ load_relmap_file(bool shared)
 	char		mapfilename[MAXPGPATH];
 	pg_crc32c	crc;
 	int			fd;
+	int			r;
 
 	if (shared)
 	{
@@ -648,7 +649,7 @@ load_relmap_file(bool shared)
 	if (fd < 0)
 		ereport(FATAL,
 				(errcode_for_file_access(),
-				 errmsg("could not open relation mapping file \"%s\": %m",
+				 errmsg("could not open file \"%s\": %m",
 						mapfilename)));
 
 	/*
@@ -659,11 +660,18 @@ load_relmap_file(bool shared)
 	 * are able to access any relation that's affected by the change.
 	 */
 	pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_READ);
-	if (read(fd, map, sizeof(RelMapFile)) != sizeof(RelMapFile))
-		ereport(FATAL,
-				(errcode_for_file_access(),
-				 errmsg("could not read relation mapping file \"%s\": %m",
-						mapfilename)));
+	r = read(fd, map, sizeof(RelMapFile));
+	if (r != sizeof(RelMapFile))
+	{
+		if (r < 0)
+			ereport(FATAL,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", mapfilename)));
+		else
+			ereport(FATAL,
+					(errmsg("could not read file \"%s\": read %d bytes, expected %zu",
+							mapfilename, r, sizeof(RelMapFile))));
+	}
 	pgstat_report_wait_end();
 
 	CloseTransientFile(fd);
@@ -748,7 +756,7 @@ write_relmap_file(bool shared, RelMapFile *newmap,
 	if (fd < 0)
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not open relation mapping file \"%s\": %m",
+				 errmsg("could not open file \"%s\": %m",
 						mapfilename)));
 
 	if (write_wal)
@@ -782,7 +790,7 @@ write_relmap_file(bool shared, RelMapFile *newmap,
 			errno = ENOSPC;
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not write to relation mapping file \"%s\": %m",
+				 errmsg("could not write file \"%s\": %m",
 						mapfilename)));
 	}
 	pgstat_report_wait_end();
@@ -797,14 +805,14 @@ write_relmap_file(bool shared, RelMapFile *newmap,
 	if (pg_fsync(fd) != 0)
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not fsync relation mapping file \"%s\": %m",
+				 errmsg("could not fsync file \"%s\": %m",
 						mapfilename)));
 	pgstat_report_wait_end();
 
 	if (CloseTransientFile(fd))
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not close relation mapping file \"%s\": %m",
+				 errmsg("could not close file \"%s\": %m",
 						mapfilename)));
 
 	/*
diff --git a/src/bin/pg_basebackup/pg_receivewal.c b/src/bin/pg_basebackup/pg_receivewal.c
index 071b32d19d..fc4e2d0492 100644
--- a/src/bin/pg_basebackup/pg_receivewal.c
+++ b/src/bin/pg_basebackup/pg_receivewal.c
@@ -284,6 +284,7 @@ FindStreamingStart(uint32 *tli)
 			char		buf[4];
 			int			bytes_out;
 			char		fullpath[MAXPGPATH * 2];
+			int			r;
 
 			snprintf(fullpath, sizeof(fullpath), "%s/%s", basedir, dirent->d_name);
 
@@ -300,10 +301,15 @@ FindStreamingStart(uint32 *tli)
 						progname, fullpath, strerror(errno));
 				disconnect_and_exit(1);
 			}
-			if (read(fd, (char *) buf, sizeof(buf)) != sizeof(buf))
+			r = read(fd, (char *) buf, sizeof(buf));
+			if (r != sizeof(buf))
 			{
-				fprintf(stderr, _("%s: could not read compressed file \"%s\": %s\n"),
-						progname, fullpath, strerror(errno));
+				if (r < 0)
+					fprintf(stderr, _("%s: could not read compressed file \"%s\": %s\n"),
+							progname, fullpath, strerror(errno));
+				else
+					fprintf(stderr, _("%s: could not read compressed file \"%s\": read %d bytes, expected %zu\n"),
+							progname, fullpath, r, sizeof(buf));
 				disconnect_and_exit(1);
 			}
 
diff --git a/src/bin/pg_rewind/file_ops.c b/src/bin/pg_rewind/file_ops.c
index 94bcc13ae8..20bbf32705 100644
--- a/src/bin/pg_rewind/file_ops.c
+++ b/src/bin/pg_rewind/file_ops.c
@@ -289,6 +289,7 @@ slurpFile(const char *datadir, const char *path, size_t *filesize)
 	struct stat statbuf;
 	char		fullpath[MAXPGPATH];
 	int			len;
+	int			r;
 
 	snprintf(fullpath, sizeof(fullpath), "%s/%s", datadir, path);
 
@@ -304,9 +305,16 @@ slurpFile(const char *datadir, const char *path, size_t *filesize)
 
 	buffer = pg_malloc(len + 1);
 
-	if (read(fd, buffer, len) != len)
-		pg_fatal("could not read file \"%s\": %s\n",
-				 fullpath, strerror(errno));
+	r = read(fd, buffer, len);
+	if (r != len)
+	{
+		if (r < 0)
+			pg_fatal("could not read file \"%s\": %s\n",
+					 fullpath, strerror(errno));
+		else
+			pg_fatal("could not read file \"%s\": read %d bytes, expected %d\n",
+					 fullpath, r, len);
+	}
 	close(fd);
 
 	/* Zero-terminate the buffer. */
diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c
index b4c1f827a6..de3112a1bd 100644
--- a/src/bin/pg_rewind/parsexlog.c
+++ b/src/bin/pg_rewind/parsexlog.c
@@ -246,6 +246,7 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
 	uint32		targetPageOff;
 	XLogRecPtr	targetSegEnd;
 	XLogSegNo	targetSegNo;
+	int			r;
 
 	XLByteToSeg(targetPagePtr, targetSegNo, WalSegSz);
 	XLogSegNoOffsetToRecPtr(targetSegNo + 1, 0, targetSegEnd, WalSegSz);
@@ -309,10 +310,17 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
 		return -1;
 	}
 
-	if (read(xlogreadfd, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+
+	r = read(xlogreadfd, readBuf, XLOG_BLCKSZ);
+	if (r != XLOG_BLCKSZ)
 	{
-		printf(_("could not read from file \"%s\": %s\n"), xlogfpath,
-			   strerror(errno));
+		if (r < 0)
+			printf(_("could not read from file \"%s\": %s\n"), xlogfpath,
+				   strerror(errno));
+		else
+			printf(_("could not read from file \"%s\": read %d bytes, expected %d\n"),
+				   xlogfpath, r, XLOG_BLCKSZ);
+
 		return -1;
 	}
 
diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c
index 5c4f38e597..ef142e8095 100644
--- a/src/bin/pg_waldump/pg_waldump.c
+++ b/src/bin/pg_waldump/pg_waldump.c
@@ -210,8 +210,10 @@ search_directory(const char *directory, const char *fname)
 	if (fd >= 0)
 	{
 		char		buf[XLOG_BLCKSZ];
+		int			r;
 
-		if (read(fd, buf, XLOG_BLCKSZ) == XLOG_BLCKSZ)
+		r = read(fd, buf, XLOG_BLCKSZ);
+		if (r == XLOG_BLCKSZ)
 		{
 			XLogLongPageHeader longhdr = (XLogLongPageHeader) buf;
 
@@ -229,7 +231,8 @@ search_directory(const char *directory, const char *fname)
 				fatal_error("could not read file \"%s\": %s",
 							fname, strerror(errno));
 			else
-				fatal_error("not enough data in file \"%s\"", fname);
+				fatal_error("could not read file \"%s\": read %d bytes, %d expected",
+							fname, r, XLOG_BLCKSZ);
 		}
 		close(fd);
 		return true;
@@ -411,11 +414,17 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id,
 		{
 			int			err = errno;
 			char		fname[MAXPGPATH];
+			int			save_errno = errno;
 
 			XLogFileName(fname, timeline_id, sendSegNo, WalSegSz);
+			errno = save_errno;
 
-			fatal_error("could not read from log file %s, offset %u, length %d: %s",
-						fname, sendOff, segbytes, strerror(err));
+			if (readbytes < 0)
+				fatal_error("could not read from log file %s, offset %u, length %d: %s",
+							fname, sendOff, segbytes, strerror(err));
+			else if (readbytes == 0)
+				fatal_error("could not read from log file %s, offset %u: read %d bytes, expected %d",
+							fname, sendOff, readbytes, segbytes);
 		}
 
 		/* Update state for read */
#15Robert Haas
robertmhaas@gmail.com
In reply to: Michael Paquier (#14)
Re: Fix some error handling for read() and errno

On Thu, Jun 21, 2018 at 2:32 AM, Michael Paquier <michael@paquier.xyz> wrote:

Sure. I have looked at that and I am finishing with the updated version
attached.

I removed the portion about slru in the code, but I would like to add at
least a mention about it in the commit message. The simplifications are
also applied for the control file messages you changed as well in
cfb758b. Also in ReadControlFile(), we use "could not open control
file", so for consistency this should be replaced with a more simple
message. I noticed as well that the 2PC file handling loses errno a
couple of times, and that we had better keep the messages also
consistent if we do the simplification move... relmapper.c also gains
simplifications.

All the incorrect errno handling I found should be back-patched in my
opinion as we did so in the past with 2a11b188 for example. I am not
really willing to bother about the error message improvements on
anything else than HEAD (not v11, but v12), but... Opinions about all
that?

I think this should be split into two patches. Fixing failure to save
and restore errno is a different issue from cleaning up short write
messages. I agree that the former should be back-patched and the
latter not.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#16Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: Robert Haas (#15)
Re: Fix some error handling for read() and errno

On 2018-Jun-22, Robert Haas wrote:

I think this should be split into two patches. Fixing failure to save
and restore errno is a different issue from cleaning up short write
messages. I agree that the former should be back-patched and the
latter not.

Hmm, yeah, good thought, +1.

--
�lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#17Michael Paquier
michael@paquier.xyz
In reply to: Alvaro Herrera (#16)
Re: Fix some error handling for read() and errno

On Fri, Jun 22, 2018 at 09:51:30AM -0400, Alvaro Herrera wrote:

On 2018-Jun-22, Robert Haas wrote:

I think this should be split into two patches. Fixing failure to save
and restore errno is a different issue from cleaning up short write
messages. I agree that the former should be back-patched and the
latter not.

Hmm, yeah, good thought, +1.

That's exactly why I have started this thread so as both problems are
addressed separately:
/messages/by-id/20180622061535.GD5215@paquier.xyz
And back-patching the errno patch while only bothering about messages on
HEAD matches also what I got in mind. I'll come back to this thread
once the errno issues are all addressed.
--
Michael

#18Michael Paquier
michael@paquier.xyz
In reply to: Michael Paquier (#17)
2 attachment(s)
Re: Fix some error handling for read() and errno

On Sat, Jun 23, 2018 at 08:48:03AM +0900, Michael Paquier wrote:

That's exactly why I have started this thread so as both problems are
addressed separately:
/messages/by-id/20180622061535.GD5215@paquier.xyz
And back-patching the errno patch while only bothering about messages on
HEAD matches also what I got in mind. I'll come back to this thread
once the errno issues are all addressed.

As this one is done, I have been looking at that this thread again.
Peter Eisentraut has pushed as e5d11b9 something which does not need to
worry about pluralization of error messages. So I have moved to this
message style for all messages. All of this is done as 0001.

I have been thinking as well about a common interface which could be
used to read/write/fsync transient files:
void WriteTransientFile(int fd, char *buf, Size count, int elevel,
const char *filename, uint32 wait_event_info);
bool ReadTransientFile(int fd, char *buf, Size count, int elevel,
const char *filename, uint32 wait_event_info);
void SyncTransientFile(int fd, int elevel, const char *filename
uint32 wait_event_info);

There are rather equivalent things with FileRead and FileWrite but the
purpose is different as well.

If you look at 0002, this shaves a bit of code:
6 files changed, 128 insertions(+), 200 deletions(-)

There are also a couple of advantages here:
- Centralize errno handling for transient files with ENOSPC for write(2)
and read count for read(2)
- Wait events have to be defined, so those would unlikely get forgotten
in the future.
- Error handling for CloseTransientFile in code paths is centralized.

ReadTransientFile could be redefined to return the number of bytes read
as result with caller checking for errno, but that feels a bit duplicate
work for twophase.c. WriteTransientFile and SyncTransientFile could
also have the same treatment for consistency but they would not really
be used now. Do you guys think that this is worth pursuing? Merging
0001 and 0002 together may make sense then.
--
Michael

Attachments:

0001-Rework-error-messages-around-file-handling.patchtext/x-diff; charset=us-asciiDownload
From 041068b821dd555e437f77a99e95795eceff3189 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Mon, 25 Jun 2018 14:37:18 +0900
Subject: [PATCH 1/2] Rework error messages around file handling

Some error messages related to file handling are using the code path
context to define their state.  For example, 2PC-related errors are
referring to "two-phase status files", or "relation mapping file" is
used for catalog-to-filenode mapping, however those prove to be
difficult to translate, and are not more helpful than just referring to
the path of the file being manipulated.  So simplify all those error
messages by just referring to files with their path used.  In some
cases, like the manipulation of WAL segments, the context is helpful so
those are kept.

Calls to the system function read() have also been rather inconsistent
with their error handling sometimes not reporting the number of bytes
read, and some other code paths trying to use an errno which has not
been set.  The in-core functions are using a more consistent pattern
with this patch, which checks for both errno if set or if an
inconsistent read is happening.

So as to care about pluralization when reading an unexpected number of
bytes, "could not read: read %d of %d" is used as error message.

Author: Michael Paquier
Discussion: https://postgr.es/m/20180622234803.GA1134@paquier.xyz
---
 src/backend/access/transam/twophase.c       | 40 ++++++------
 src/backend/access/transam/xlog.c           | 40 ++++++++----
 src/backend/replication/logical/origin.c    | 13 +++-
 src/backend/replication/logical/snapbuild.c | 68 +++++++++++++++------
 src/backend/replication/slot.c              | 26 +++++---
 src/backend/replication/walsender.c         | 16 ++++-
 src/backend/utils/cache/relmapper.c         | 28 ++++++---
 src/bin/pg_basebackup/pg_receivewal.c       | 12 +++-
 src/bin/pg_rewind/file_ops.c                | 14 ++++-
 src/bin/pg_rewind/parsexlog.c               | 14 ++++-
 src/bin/pg_waldump/pg_waldump.c             | 17 ++++--
 11 files changed, 200 insertions(+), 88 deletions(-)

diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index a9ef1b3d73..10c1e31c0f 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -1219,6 +1219,7 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 	uint32		crc_offset;
 	pg_crc32c	calc_crc,
 				file_crc;
+	int			r;
 
 	TwoPhaseFilePath(path, xid);
 
@@ -1228,8 +1229,7 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 		if (give_warnings)
 			ereport(WARNING,
 					(errcode_for_file_access(),
-					 errmsg("could not open two-phase state file \"%s\": %m",
-							path)));
+					 errmsg("could not open file \"%s\": %m", path)));
 		return NULL;
 	}
 
@@ -1249,8 +1249,7 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 			errno = save_errno;
 			ereport(WARNING,
 					(errcode_for_file_access(),
-					 errmsg("could not stat two-phase state file \"%s\": %m",
-							path)));
+					 errmsg("could not stat file \"%s\": %m", path)));
 		}
 		return NULL;
 	}
@@ -1277,7 +1276,8 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 	buf = (char *) palloc(stat.st_size);
 
 	pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_READ);
-	if (read(fd, buf, stat.st_size) != stat.st_size)
+	r = read(fd, buf, stat.st_size);
+	if (r != stat.st_size)
 	{
 		int			save_errno = errno;
 
@@ -1285,11 +1285,17 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 		CloseTransientFile(fd);
 		if (give_warnings)
 		{
-			errno = save_errno;
-			ereport(WARNING,
-					(errcode_for_file_access(),
-					 errmsg("could not read two-phase state file \"%s\": %m",
-							path)));
+			if (r < 0)
+			{
+				errno = save_errno;
+				ereport(WARNING,
+						(errcode_for_file_access(),
+						 errmsg("could not read file \"%s\": %m", path)));
+			}
+			else
+				ereport(WARNING,
+						(errmsg("could not read file \"%s\": read %d of %zu",
+								path, r, stat.st_size)));
 		}
 		pfree(buf);
 		return NULL;
@@ -1637,8 +1643,7 @@ RemoveTwoPhaseFile(TransactionId xid, bool giveWarning)
 		if (errno != ENOENT || giveWarning)
 			ereport(WARNING,
 					(errcode_for_file_access(),
-					 errmsg("could not remove two-phase state file \"%s\": %m",
-							path)));
+					 errmsg("could not remove file \"%s\": %m", path)));
 }
 
 /*
@@ -1666,8 +1671,7 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
 	if (fd < 0)
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not recreate two-phase state file \"%s\": %m",
-						path)));
+				 errmsg("could not recreate file \"%s\": %m", path)));
 
 	/* Write content and CRC */
 	pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_WRITE);
@@ -1682,7 +1686,7 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
 		errno = save_errno ? save_errno : ENOSPC;
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not write two-phase state file: %m")));
+				 errmsg("could not write file \"%s\": %m", path)));
 	}
 	if (write(fd, &statefile_crc, sizeof(pg_crc32c)) != sizeof(pg_crc32c))
 	{
@@ -1695,7 +1699,7 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
 		errno = save_errno ? save_errno : ENOSPC;
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not write two-phase state file: %m")));
+				 errmsg("could not write file \"%s\": %m", path)));
 	}
 	pgstat_report_wait_end();
 
@@ -1712,14 +1716,14 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
 		errno = save_errno;
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not fsync two-phase state file: %m")));
+				 errmsg("could not fsync file \"%s\": %m", path)));
 	}
 	pgstat_report_wait_end();
 
 	if (CloseTransientFile(fd) != 0)
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not close two-phase state file: %m")));
+				 errmsg("could not close file \"%s\": %m", path)));
 }
 
 /*
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 1a419aa49b..5bb19cef31 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -3398,21 +3398,24 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
 
 		if (nread > 0)
 		{
+			int			r;
+
 			if (nread > sizeof(buffer))
 				nread = sizeof(buffer);
 			errno = 0;
 			pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_READ);
-			if (read(srcfd, buffer, nread) != nread)
+			r = read(srcfd, buffer, nread);
+			if (r != nread)
 			{
-				if (errno != 0)
+				if (r < 0)
 					ereport(ERROR,
 							(errcode_for_file_access(),
 							 errmsg("could not read file \"%s\": %m",
 									path)));
 				else
 					ereport(ERROR,
-							(errmsg("not enough data in file \"%s\"",
-									path)));
+							(errmsg("could not read file \"%s\": read %d of %d",
+									path, r, nread)));
 			}
 			pgstat_report_wait_end();
 		}
@@ -4499,7 +4502,7 @@ ReadControlFile(void)
 	if (fd < 0)
 		ereport(PANIC,
 				(errcode_for_file_access(),
-				 errmsg("could not open control file \"%s\": %m",
+				 errmsg("could not open file \"%s\": %m",
 						XLOG_CONTROL_FILE)));
 
 	pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_READ);
@@ -4509,10 +4512,12 @@ ReadControlFile(void)
 		if (r < 0)
 			ereport(PANIC,
 					(errcode_for_file_access(),
-					 errmsg("could not read from control file: %m")));
+					 errmsg("could not read file \"%s\": %m",
+							XLOG_CONTROL_FILE)));
 		else
 			ereport(PANIC,
-					 (errmsg("could not read from control file: read %d bytes, expected %d", r, (int) sizeof(ControlFileData))));
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							XLOG_CONTROL_FILE, r, sizeof(ControlFileData))));
 	}
 	pgstat_report_wait_end();
 
@@ -11597,6 +11602,7 @@ XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
 	int			emode = private->emode;
 	uint32		targetPageOff;
 	XLogSegNo	targetSegNo PG_USED_FOR_ASSERTS_ONLY;
+	int			r;
 
 	XLByteToSeg(targetPagePtr, targetSegNo, wal_segment_size);
 	targetPageOff = XLogSegmentOffset(targetPagePtr, wal_segment_size);
@@ -11690,18 +11696,26 @@ retry:
 	}
 
 	pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
-	if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+	r = read(readFile, readBuf, XLOG_BLCKSZ);
+	if (r != XLOG_BLCKSZ)
 	{
 		char		fname[MAXFNAMELEN];
 		int			save_errno = errno;
 
 		pgstat_report_wait_end();
 		XLogFileName(fname, curFileTLI, readSegNo, wal_segment_size);
-		errno = save_errno;
-		ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
-				(errcode_for_file_access(),
-				 errmsg("could not read from log segment %s, offset %u: %m",
-						fname, readOff)));
+		if (r < 0)
+		{
+			errno = save_errno;
+			ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
+					(errcode_for_file_access(),
+					 errmsg("could not read from log segment %s, offset %u: %m",
+							fname, readOff)));
+		}
+		else
+			ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
+					(errmsg("could not read from log segment %s, offset %u: read %d of %d",
+							fname, readOff, r, XLOG_BLCKSZ)));
 		goto next_record_is_invalid;
 	}
 	pgstat_report_wait_end();
diff --git a/src/backend/replication/logical/origin.c b/src/backend/replication/logical/origin.c
index 3d3f6dff1b..841e24c03d 100644
--- a/src/backend/replication/logical/origin.c
+++ b/src/backend/replication/logical/origin.c
@@ -712,9 +712,16 @@ StartupReplicationOrigin(void)
 	/* verify magic, that is written even if nothing was active */
 	readBytes = read(fd, &magic, sizeof(magic));
 	if (readBytes != sizeof(magic))
-		ereport(PANIC,
-				(errmsg("could not read file \"%s\": %m",
-						path)));
+	{
+		if (readBytes < 0)
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": %m",
+							path)));
+		else
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							path, readBytes, sizeof(magic))));
+	}
 	COMP_CRC32C(crc, &magic, sizeof(magic));
 
 	if (magic != REPLICATION_STATE_MAGIC)
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index 2c4a1bab4b..da97efc305 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -1719,11 +1719,18 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 		int			save_errno = errno;
 
 		CloseTransientFile(fd);
-		errno = save_errno;
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %d: %m",
-						path, readBytes, (int) SnapBuildOnDiskConstantSize)));
+
+		if (readBytes < 0)
+		{
+			errno = save_errno;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		}
+		else
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							path, readBytes, SnapBuildOnDiskConstantSize)));
 	}
 
 	if (ondisk.magic != SNAPBUILD_MAGIC)
@@ -1750,11 +1757,18 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 		int			save_errno = errno;
 
 		CloseTransientFile(fd);
-		errno = save_errno;
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %d: %m",
-						path, readBytes, (int) sizeof(SnapBuild))));
+
+		if (readBytes < 0)
+		{
+			errno = save_errno;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		}
+		else
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							path, readBytes, sizeof(SnapBuild))));
 	}
 	COMP_CRC32C(checksum, &ondisk.builder, sizeof(SnapBuild));
 
@@ -1770,11 +1784,18 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 		int			save_errno = errno;
 
 		CloseTransientFile(fd);
-		errno = save_errno;
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %d: %m",
-						path, readBytes, (int) sz)));
+
+		if (readBytes < 0)
+		{
+			errno = save_errno;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		}
+		else
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							path, readBytes, sz)));
 	}
 	COMP_CRC32C(checksum, ondisk.builder.was_running.was_xip, sz);
 
@@ -1789,11 +1810,18 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 		int			save_errno = errno;
 
 		CloseTransientFile(fd);
-		errno = save_errno;
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %d: %m",
-						path, readBytes, (int) sz)));
+
+		if (readBytes < 0)
+		{
+			errno = save_errno;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		}
+		else
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							path, readBytes, sz)));
 	}
 	COMP_CRC32C(checksum, ondisk.builder.committed.xip, sz);
 
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index f5927b4d1d..ddd91ef886 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -1406,11 +1406,15 @@ RestoreSlotFromDisk(const char *name)
 
 		CloseTransientFile(fd);
 		errno = saved_errno;
-		ereport(PANIC,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %u: %m",
-						path, readBytes,
-						(uint32) ReplicationSlotOnDiskConstantSize)));
+		if (readBytes < 0)
+			ereport(PANIC,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		else
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": read %d of %u",
+							path, readBytes,
+							(uint32) ReplicationSlotOnDiskConstantSize)));
 	}
 
 	/* verify magic */
@@ -1446,10 +1450,14 @@ RestoreSlotFromDisk(const char *name)
 
 		CloseTransientFile(fd);
 		errno = saved_errno;
-		ereport(PANIC,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %u: %m",
-						path, readBytes, cp.length)));
+		if (readBytes < 0)
+			ereport(PANIC,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		else
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": read %d of %u",
+							path, readBytes, cp.length)));
 	}
 
 	CloseTransientFile(fd);
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index e47ddca6bc..9b87c7ca41 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -501,11 +501,16 @@ SendTimeLineHistory(TimeLineHistoryCmd *cmd)
 		pgstat_report_wait_start(WAIT_EVENT_WALSENDER_TIMELINE_HISTORY_READ);
 		nread = read(fd, rbuf, sizeof(rbuf));
 		pgstat_report_wait_end();
-		if (nread <= 0)
+		if (nread < 0)
 			ereport(ERROR,
 					(errcode_for_file_access(),
 					 errmsg("could not read file \"%s\": %m",
 							path)));
+		else if (nread == 0)
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							path, nread, bytesleft)));
+
 		pq_sendbytes(&buf, rbuf, nread);
 		bytesleft -= nread;
 	}
@@ -2425,7 +2430,7 @@ retry:
 		pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
 		readbytes = read(sendFile, p, segbytes);
 		pgstat_report_wait_end();
-		if (readbytes <= 0)
+		if (readbytes < 0)
 		{
 			ereport(ERROR,
 					(errcode_for_file_access(),
@@ -2433,6 +2438,13 @@ retry:
 							XLogFileNameP(curFileTimeLine, sendSegNo),
 							sendOff, (unsigned long) segbytes)));
 		}
+		else if (readbytes == 0)
+		{
+			ereport(ERROR,
+					(errmsg("could not read from log segment %s, offset %u: read %d of %lu",
+							XLogFileNameP(curFileTimeLine, sendSegNo),
+							sendOff, readbytes, (unsigned long) segbytes)));
+		}
 
 		/* Update state for read */
 		recptr += readbytes;
diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c
index 99d095f2df..2d31f9f912 100644
--- a/src/backend/utils/cache/relmapper.c
+++ b/src/backend/utils/cache/relmapper.c
@@ -629,6 +629,7 @@ load_relmap_file(bool shared)
 	char		mapfilename[MAXPGPATH];
 	pg_crc32c	crc;
 	int			fd;
+	int			r;
 
 	if (shared)
 	{
@@ -648,7 +649,7 @@ load_relmap_file(bool shared)
 	if (fd < 0)
 		ereport(FATAL,
 				(errcode_for_file_access(),
-				 errmsg("could not open relation mapping file \"%s\": %m",
+				 errmsg("could not open file \"%s\": %m",
 						mapfilename)));
 
 	/*
@@ -659,11 +660,18 @@ load_relmap_file(bool shared)
 	 * are able to access any relation that's affected by the change.
 	 */
 	pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_READ);
-	if (read(fd, map, sizeof(RelMapFile)) != sizeof(RelMapFile))
-		ereport(FATAL,
-				(errcode_for_file_access(),
-				 errmsg("could not read relation mapping file \"%s\": %m",
-						mapfilename)));
+	r = read(fd, map, sizeof(RelMapFile));
+	if (r != sizeof(RelMapFile))
+	{
+		if (r < 0)
+			ereport(FATAL,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", mapfilename)));
+		else
+			ereport(FATAL,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							mapfilename, r, sizeof(RelMapFile))));
+	}
 	pgstat_report_wait_end();
 
 	CloseTransientFile(fd);
@@ -748,7 +756,7 @@ write_relmap_file(bool shared, RelMapFile *newmap,
 	if (fd < 0)
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not open relation mapping file \"%s\": %m",
+				 errmsg("could not open file \"%s\": %m",
 						mapfilename)));
 
 	if (write_wal)
@@ -782,7 +790,7 @@ write_relmap_file(bool shared, RelMapFile *newmap,
 			errno = ENOSPC;
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not write to relation mapping file \"%s\": %m",
+				 errmsg("could not write file \"%s\": %m",
 						mapfilename)));
 	}
 	pgstat_report_wait_end();
@@ -797,14 +805,14 @@ write_relmap_file(bool shared, RelMapFile *newmap,
 	if (pg_fsync(fd) != 0)
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not fsync relation mapping file \"%s\": %m",
+				 errmsg("could not fsync file \"%s\": %m",
 						mapfilename)));
 	pgstat_report_wait_end();
 
 	if (CloseTransientFile(fd))
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not close relation mapping file \"%s\": %m",
+				 errmsg("could not close file \"%s\": %m",
 						mapfilename)));
 
 	/*
diff --git a/src/bin/pg_basebackup/pg_receivewal.c b/src/bin/pg_basebackup/pg_receivewal.c
index 071b32d19d..bdea9a85ff 100644
--- a/src/bin/pg_basebackup/pg_receivewal.c
+++ b/src/bin/pg_basebackup/pg_receivewal.c
@@ -284,6 +284,7 @@ FindStreamingStart(uint32 *tli)
 			char		buf[4];
 			int			bytes_out;
 			char		fullpath[MAXPGPATH * 2];
+			int			r;
 
 			snprintf(fullpath, sizeof(fullpath), "%s/%s", basedir, dirent->d_name);
 
@@ -300,10 +301,15 @@ FindStreamingStart(uint32 *tli)
 						progname, fullpath, strerror(errno));
 				disconnect_and_exit(1);
 			}
-			if (read(fd, (char *) buf, sizeof(buf)) != sizeof(buf))
+			r = read(fd, (char *) buf, sizeof(buf));
+			if (r != sizeof(buf))
 			{
-				fprintf(stderr, _("%s: could not read compressed file \"%s\": %s\n"),
-						progname, fullpath, strerror(errno));
+				if (r < 0)
+					fprintf(stderr, _("%s: could not read compressed file \"%s\": %s\n"),
+							progname, fullpath, strerror(errno));
+				else
+					fprintf(stderr, _("%s: could not read compressed file \"%s\": read %d of %zu\n"),
+							progname, fullpath, r, sizeof(buf));
 				disconnect_and_exit(1);
 			}
 
diff --git a/src/bin/pg_rewind/file_ops.c b/src/bin/pg_rewind/file_ops.c
index 94bcc13ae8..da2bf449e4 100644
--- a/src/bin/pg_rewind/file_ops.c
+++ b/src/bin/pg_rewind/file_ops.c
@@ -289,6 +289,7 @@ slurpFile(const char *datadir, const char *path, size_t *filesize)
 	struct stat statbuf;
 	char		fullpath[MAXPGPATH];
 	int			len;
+	int			r;
 
 	snprintf(fullpath, sizeof(fullpath), "%s/%s", datadir, path);
 
@@ -304,9 +305,16 @@ slurpFile(const char *datadir, const char *path, size_t *filesize)
 
 	buffer = pg_malloc(len + 1);
 
-	if (read(fd, buffer, len) != len)
-		pg_fatal("could not read file \"%s\": %s\n",
-				 fullpath, strerror(errno));
+	r = read(fd, buffer, len);
+	if (r != len)
+	{
+		if (r < 0)
+			pg_fatal("could not read file \"%s\": %s\n",
+					 fullpath, strerror(errno));
+		else
+			pg_fatal("could not read file \"%s\": read %d of %d\n",
+					 fullpath, r, len);
+	}
 	close(fd);
 
 	/* Zero-terminate the buffer. */
diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c
index b4c1f827a6..6b3a337509 100644
--- a/src/bin/pg_rewind/parsexlog.c
+++ b/src/bin/pg_rewind/parsexlog.c
@@ -246,6 +246,7 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
 	uint32		targetPageOff;
 	XLogRecPtr	targetSegEnd;
 	XLogSegNo	targetSegNo;
+	int			r;
 
 	XLByteToSeg(targetPagePtr, targetSegNo, WalSegSz);
 	XLogSegNoOffsetToRecPtr(targetSegNo + 1, 0, targetSegEnd, WalSegSz);
@@ -309,10 +310,17 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
 		return -1;
 	}
 
-	if (read(xlogreadfd, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+
+	r = read(xlogreadfd, readBuf, XLOG_BLCKSZ);
+	if (r != XLOG_BLCKSZ)
 	{
-		printf(_("could not read from file \"%s\": %s\n"), xlogfpath,
-			   strerror(errno));
+		if (r < 0)
+			printf(_("could not read from file \"%s\": %s\n"), xlogfpath,
+				   strerror(errno));
+		else
+			printf(_("could not read from file \"%s\": read %d of %d\n"),
+				   xlogfpath, r, XLOG_BLCKSZ);
+
 		return -1;
 	}
 
diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c
index 5c4f38e597..147929f52b 100644
--- a/src/bin/pg_waldump/pg_waldump.c
+++ b/src/bin/pg_waldump/pg_waldump.c
@@ -210,8 +210,10 @@ search_directory(const char *directory, const char *fname)
 	if (fd >= 0)
 	{
 		char		buf[XLOG_BLCKSZ];
+		int			r;
 
-		if (read(fd, buf, XLOG_BLCKSZ) == XLOG_BLCKSZ)
+		r = read(fd, buf, XLOG_BLCKSZ);
+		if (r == XLOG_BLCKSZ)
 		{
 			XLogLongPageHeader longhdr = (XLogLongPageHeader) buf;
 
@@ -229,7 +231,8 @@ search_directory(const char *directory, const char *fname)
 				fatal_error("could not read file \"%s\": %s",
 							fname, strerror(errno));
 			else
-				fatal_error("not enough data in file \"%s\"", fname);
+				fatal_error("could not read file \"%s\": read %d of %d",
+							fname, r, XLOG_BLCKSZ);
 		}
 		close(fd);
 		return true;
@@ -411,11 +414,17 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id,
 		{
 			int			err = errno;
 			char		fname[MAXPGPATH];
+			int			save_errno = errno;
 
 			XLogFileName(fname, timeline_id, sendSegNo, WalSegSz);
+			errno = save_errno;
 
-			fatal_error("could not read from log file %s, offset %u, length %d: %s",
-						fname, sendOff, segbytes, strerror(err));
+			if (readbytes < 0)
+				fatal_error("could not read from log file %s, offset %u, length %d: %s",
+							fname, sendOff, segbytes, strerror(err));
+			else if (readbytes == 0)
+				fatal_error("could not read from log file %s, offset %u: read %d of %d",
+							fname, sendOff, readbytes, segbytes);
 		}
 
 		/* Update state for read */
-- 
2.18.0

0002-Add-interface-to-read-write-fsync-with-transient-fil.patchtext/x-diff; charset=us-asciiDownload
From 057d3ec6b42c1c5a5b332b82af89e532ac2893f3 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Mon, 25 Jun 2018 16:15:03 +0900
Subject: [PATCH 2/2] Add interface to read/write/fsync with transient files

The following set of routines gets added for the manipulation of
transient files:
void WriteTransientFile(int fd, char *buf, Size count, int elevel,
    const char *filename, uint32 wait_event_info);
bool ReadTransientFile(int fd, char *buf, Size count, int elevel,
    const char *filename, uint32 wait_event_info);
void SyncTransientFile(int fd, int elevel, const char *filename,
    uint32 wait_event_info);

This simplifies code related to replication slots, 2PC files, relation
mapper files and snapshot builds:
- Centralize errno handling for transient files with ENOSPC for write(2)
and read count for read(2)
- Wait events have to be defined, so those would unlikely get forgotten
in the future.
- Error handling for CloseTransientFile in code paths is centralized.
---
 src/backend/access/transam/twophase.c       |  25 +----
 src/backend/replication/logical/snapbuild.c | 110 ++------------------
 src/backend/replication/slot.c              |  46 +-------
 src/backend/storage/file/fd.c               |  97 ++++++++++++++++-
 src/backend/utils/cache/relmapper.c         |  40 ++-----
 src/include/storage/fd.h                    |  10 +-
 6 files changed, 128 insertions(+), 200 deletions(-)

diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 10c1e31c0f..61b3780119 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -1219,7 +1219,6 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 	uint32		crc_offset;
 	pg_crc32c	calc_crc,
 				file_crc;
-	int			r;
 
 	TwoPhaseFilePath(path, xid);
 
@@ -1275,28 +1274,10 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 	 */
 	buf = (char *) palloc(stat.st_size);
 
-	pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_READ);
-	r = read(fd, buf, stat.st_size);
-	if (r != stat.st_size)
+	if (!ReadTransientFile(fd, buf, stat.st_size,
+						   give_warnings ? WARNING : DEBUG3, path,
+						   WAIT_EVENT_TWOPHASE_FILE_READ))
 	{
-		int			save_errno = errno;
-
-		pgstat_report_wait_end();
-		CloseTransientFile(fd);
-		if (give_warnings)
-		{
-			if (r < 0)
-			{
-				errno = save_errno;
-				ereport(WARNING,
-						(errcode_for_file_access(),
-						 errmsg("could not read file \"%s\": %m", path)));
-			}
-			else
-				ereport(WARNING,
-						(errmsg("could not read file \"%s\": read %d of %zu",
-								path, r, stat.st_size)));
-		}
 		pfree(buf);
 		return NULL;
 	}
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index da97efc305..4ea8be6c34 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -1602,20 +1602,8 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
 		ereport(ERROR,
 				(errmsg("could not open file \"%s\": %m", path)));
 
-	pgstat_report_wait_start(WAIT_EVENT_SNAPBUILD_WRITE);
-	if ((write(fd, ondisk, needed_length)) != needed_length)
-	{
-		int			save_errno = errno;
-
-		CloseTransientFile(fd);
-
-		/* if write didn't set errno, assume problem is no disk space */
-		errno = save_errno ? save_errno : ENOSPC;
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not write to file \"%s\": %m", tmppath)));
-	}
-	pgstat_report_wait_end();
+	WriteTransientFile(fd, (char *) ondisk, needed_length, ERROR, tmppath,
+					   WAIT_EVENT_SNAPBUILD_WRITE);
 
 	/*
 	 * fsync the file before renaming so that even if we crash after this we
@@ -1679,7 +1667,6 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 	int			fd;
 	char		path[MAXPGPATH];
 	Size		sz;
-	int			readBytes;
 	pg_crc32c	checksum;
 
 	/* no point in loading a snapshot if we're already there */
@@ -1709,29 +1696,9 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 	fsync_fname(path, false);
 	fsync_fname("pg_logical/snapshots", true);
 
-
 	/* read statically sized portion of snapshot */
-	pgstat_report_wait_start(WAIT_EVENT_SNAPBUILD_READ);
-	readBytes = read(fd, &ondisk, SnapBuildOnDiskConstantSize);
-	pgstat_report_wait_end();
-	if (readBytes != SnapBuildOnDiskConstantSize)
-	{
-		int			save_errno = errno;
-
-		CloseTransientFile(fd);
-
-		if (readBytes < 0)
-		{
-			errno = save_errno;
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not read file \"%s\": %m", path)));
-		}
-		else
-			ereport(ERROR,
-					(errmsg("could not read file \"%s\": read %d of %zu",
-							path, readBytes, SnapBuildOnDiskConstantSize)));
-	}
+	(void) ReadTransientFile(fd, (char *) &ondisk, SnapBuildOnDiskConstantSize,
+					  ERROR, path, WAIT_EVENT_SNAPBUILD_READ);
 
 	if (ondisk.magic != SNAPBUILD_MAGIC)
 		ereport(ERROR,
@@ -1749,80 +1716,23 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 				SnapBuildOnDiskConstantSize - SnapBuildOnDiskNotChecksummedSize);
 
 	/* read SnapBuild */
-	pgstat_report_wait_start(WAIT_EVENT_SNAPBUILD_READ);
-	readBytes = read(fd, &ondisk.builder, sizeof(SnapBuild));
-	pgstat_report_wait_end();
-	if (readBytes != sizeof(SnapBuild))
-	{
-		int			save_errno = errno;
-
-		CloseTransientFile(fd);
-
-		if (readBytes < 0)
-		{
-			errno = save_errno;
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not read file \"%s\": %m", path)));
-		}
-		else
-			ereport(ERROR,
-					(errmsg("could not read file \"%s\": read %d of %zu",
-							path, readBytes, sizeof(SnapBuild))));
-	}
+	(void) ReadTransientFile(fd, (char *) &ondisk.builder, sizeof(SnapBuild),
+					  ERROR, path, WAIT_EVENT_SNAPBUILD_READ);
 	COMP_CRC32C(checksum, &ondisk.builder, sizeof(SnapBuild));
 
 	/* restore running xacts (dead, but kept for backward compat) */
 	sz = sizeof(TransactionId) * ondisk.builder.was_running.was_xcnt_space;
 	ondisk.builder.was_running.was_xip =
 		MemoryContextAllocZero(builder->context, sz);
-	pgstat_report_wait_start(WAIT_EVENT_SNAPBUILD_READ);
-	readBytes = read(fd, ondisk.builder.was_running.was_xip, sz);
-	pgstat_report_wait_end();
-	if (readBytes != sz)
-	{
-		int			save_errno = errno;
-
-		CloseTransientFile(fd);
-
-		if (readBytes < 0)
-		{
-			errno = save_errno;
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not read file \"%s\": %m", path)));
-		}
-		else
-			ereport(ERROR,
-					(errmsg("could not read file \"%s\": read %d of %zu",
-							path, readBytes, sz)));
-	}
+	(void) ReadTransientFile(fd, (char *) ondisk.builder.was_running.was_xip, sz,
+					  ERROR, path, WAIT_EVENT_SNAPBUILD_READ);
 	COMP_CRC32C(checksum, ondisk.builder.was_running.was_xip, sz);
 
 	/* restore committed xacts information */
 	sz = sizeof(TransactionId) * ondisk.builder.committed.xcnt;
 	ondisk.builder.committed.xip = MemoryContextAllocZero(builder->context, sz);
-	pgstat_report_wait_start(WAIT_EVENT_SNAPBUILD_READ);
-	readBytes = read(fd, ondisk.builder.committed.xip, sz);
-	pgstat_report_wait_end();
-	if (readBytes != sz)
-	{
-		int			save_errno = errno;
-
-		CloseTransientFile(fd);
-
-		if (readBytes < 0)
-		{
-			errno = save_errno;
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not read file \"%s\": %m", path)));
-		}
-		else
-			ereport(ERROR,
-					(errmsg("could not read file \"%s\": read %d of %zu",
-							path, readBytes, sz)));
-	}
+	(void) ReadTransientFile(fd, (char *) ondisk.builder.committed.xip, sz,
+					  ERROR, path, WAIT_EVENT_SNAPBUILD_READ);
 	COMP_CRC32C(checksum, ondisk.builder.committed.xip, sz);
 
 	CloseTransientFile(fd);
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index ddd91ef886..3f06aac2cd 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -1346,7 +1346,6 @@ RestoreSlotFromDisk(const char *name)
 	char		path[MAXPGPATH + 22];
 	int			fd;
 	bool		restored = false;
-	int			readBytes;
 	pg_crc32c	checksum;
 
 	/* no need to lock here, no concurrent access allowed yet */
@@ -1397,25 +1396,8 @@ RestoreSlotFromDisk(const char *name)
 	END_CRIT_SECTION();
 
 	/* read part of statefile that's guaranteed to be version independent */
-	pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_READ);
-	readBytes = read(fd, &cp, ReplicationSlotOnDiskConstantSize);
-	pgstat_report_wait_end();
-	if (readBytes != ReplicationSlotOnDiskConstantSize)
-	{
-		int			saved_errno = errno;
-
-		CloseTransientFile(fd);
-		errno = saved_errno;
-		if (readBytes < 0)
-			ereport(PANIC,
-					(errcode_for_file_access(),
-					 errmsg("could not read file \"%s\": %m", path)));
-		else
-			ereport(PANIC,
-					(errmsg("could not read file \"%s\": read %d of %u",
-							path, readBytes,
-							(uint32) ReplicationSlotOnDiskConstantSize)));
-	}
+	(void) ReadTransientFile(fd, (char *) &cp, ReplicationSlotOnDiskConstantSize,
+							 PANIC, path, WAIT_EVENT_REPLICATION_SLOT_READ);
 
 	/* verify magic */
 	if (cp.magic != SLOT_MAGIC)
@@ -1439,27 +1421,9 @@ RestoreSlotFromDisk(const char *name)
 						path, cp.length)));
 
 	/* Now that we know the size, read the entire file */
-	pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_READ);
-	readBytes = read(fd,
-					 (char *) &cp + ReplicationSlotOnDiskConstantSize,
-					 cp.length);
-	pgstat_report_wait_end();
-	if (readBytes != cp.length)
-	{
-		int			saved_errno = errno;
-
-		CloseTransientFile(fd);
-		errno = saved_errno;
-		if (readBytes < 0)
-			ereport(PANIC,
-					(errcode_for_file_access(),
-					 errmsg("could not read file \"%s\": %m", path)));
-		else
-			ereport(PANIC,
-					(errmsg("could not read file \"%s\": read %d of %u",
-							path, readBytes, cp.length)));
-	}
-
+	(void) ReadTransientFile(fd, (char *) &cp + ReplicationSlotOnDiskConstantSize,
+							 cp.length, PANIC, path,
+							 WAIT_EVENT_REPLICATION_SLOT_READ);
 	CloseTransientFile(fd);
 
 	/* now verify the CRC */
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 8dd51f1767..fba7774ddc 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -47,8 +47,9 @@
  * ownership mechanism that provides automatic cleanup for shared files when
  * the last of a group of backends detaches.
  *
- * AllocateFile, AllocateDir, OpenPipeStream and OpenTransientFile are
- * wrappers around fopen(3), opendir(3), popen(3) and open(2), respectively.
+ * AllocateFile, AllocateDir, OpenPipeStream, OpenTransientFile,
+ * WriteTransientFile and ReadTransientFile are wrappers around fopen(3),
+ * opendir(3), popen(3), open(2), write(2) and read(2) respectively.
  * They behave like the corresponding native functions, except that the handle
  * is registered with the current subtransaction, and will be automatically
  * closed at abort. These are intended mainly for short operations like
@@ -2480,6 +2481,98 @@ TryAgain:
 	return NULL;
 }
 
+/*
+ * Write to a file which has been opened using OpenTransientFile or
+ * OpenTransientFilePerm.  Equivalent to write(2).
+ */
+void
+WriteTransientFile(int fd, char *buf, Size count, int elevel,
+				   const char *filename, uint32 wait_event_info)
+{
+	int			r;
+
+	pgstat_report_wait_start(wait_event_info);
+	r = write(fd, buf, count);
+	pgstat_report_wait_end();
+
+	if (r != count)
+	{
+		int         save_errno = errno;
+
+		(void) CloseTransientFile(fd);
+
+		/* if write didn't set errno, assume problem is no disk space */
+		errno = save_errno ? save_errno : ENOSPC;
+		ereport(elevel,
+				(errcode_for_file_access(),
+				 errmsg("could not write to file \"%s\": %m", filename)));
+	}
+}
+
+/*
+ * Read from a file which has been opened using OpenTransientFile or
+ * OpenTransientFilePerm.  Equivalent to read(2).  Returns true on
+ * success and false on failure.
+ */
+bool
+ReadTransientFile(int fd, char *buf, Size count, int elevel,
+				  const char *filename, uint32 wait_event_info)
+{
+	int			r;
+
+	pgstat_report_wait_start(wait_event_info);
+	r = read(fd, buf, count);
+	pgstat_report_wait_end();
+
+	if (r != count)
+	{
+		int         save_errno = errno;
+
+		CloseTransientFile(fd);
+
+		if (r < 0)
+		{
+			errno = save_errno;
+			ereport(elevel,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", filename)));
+		}
+		else
+			ereport(elevel,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							filename, r, count)));
+		return false;
+	}
+
+	return true;
+}
+
+/*
+ * Write to a file which has been opened using OpenTransientFile or
+ * OpenTransientFilePerm.  Equivalent to fsync(2).
+ */
+void
+SyncTransientFile(int fd, int elevel, const char *filename,
+				  uint32 wait_event_info)
+{
+	int			status;
+
+	pgstat_report_wait_start(wait_event_info);
+	status = pg_fsync(fd);
+	pgstat_report_wait_end();
+
+	if (status != 0)
+	{
+		int         save_errno = errno;
+
+		(void) CloseTransientFile(fd);
+		errno = save_errno;
+		ereport(elevel,
+				(errcode_for_file_access(),
+				 errmsg("could not fsync file \"%s\": %m", filename)));
+	}
+}
+
 /*
  * Free an AllocateDesc of any type.
  *
diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c
index 2d31f9f912..e6eff58d40 100644
--- a/src/backend/utils/cache/relmapper.c
+++ b/src/backend/utils/cache/relmapper.c
@@ -629,7 +629,6 @@ load_relmap_file(bool shared)
 	char		mapfilename[MAXPGPATH];
 	pg_crc32c	crc;
 	int			fd;
-	int			r;
 
 	if (shared)
 	{
@@ -659,20 +658,8 @@ load_relmap_file(bool shared)
 	 * look, the sinval signaling mechanism will make us re-read it before we
 	 * are able to access any relation that's affected by the change.
 	 */
-	pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_READ);
-	r = read(fd, map, sizeof(RelMapFile));
-	if (r != sizeof(RelMapFile))
-	{
-		if (r < 0)
-			ereport(FATAL,
-					(errcode_for_file_access(),
-					 errmsg("could not read file \"%s\": %m", mapfilename)));
-		else
-			ereport(FATAL,
-					(errmsg("could not read file \"%s\": read %d of %zu",
-							mapfilename, r, sizeof(RelMapFile))));
-	}
-	pgstat_report_wait_end();
+	(void) ReadTransientFile(fd, (char *) map, sizeof(RelMapFile), FATAL,
+							 mapfilename, WAIT_EVENT_RELATION_MAP_READ);
 
 	CloseTransientFile(fd);
 
@@ -782,18 +769,9 @@ write_relmap_file(bool shared, RelMapFile *newmap,
 	}
 
 	errno = 0;
-	pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_WRITE);
-	if (write(fd, newmap, sizeof(RelMapFile)) != sizeof(RelMapFile))
-	{
-		/* if write didn't set errno, assume problem is no disk space */
-		if (errno == 0)
-			errno = ENOSPC;
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not write file \"%s\": %m",
-						mapfilename)));
-	}
-	pgstat_report_wait_end();
+
+	WriteTransientFile(fd, (char *) newmap, sizeof(RelMapFile), ERROR,
+					   mapfilename, WAIT_EVENT_RELATION_MAP_WRITE);
 
 	/*
 	 * We choose to fsync the data to disk before considering the task done.
@@ -801,13 +779,7 @@ write_relmap_file(bool shared, RelMapFile *newmap,
 	 * issue, but it would complicate checkpointing --- see notes for
 	 * CheckPointRelationMap.
 	 */
-	pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_SYNC);
-	if (pg_fsync(fd) != 0)
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not fsync file \"%s\": %m",
-						mapfilename)));
-	pgstat_report_wait_end();
+	SyncTransientFile(fd, ERROR, mapfilename, WAIT_EVENT_RELATION_MAP_SYNC);
 
 	if (CloseTransientFile(fd))
 		ereport(ERROR,
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index 8e7c9728f4..4309d9da95 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -34,7 +34,9 @@
  *
  * Likewise, use AllocateDir/FreeDir, not opendir/closedir, to allocate
  * open directories (DIR*), and OpenTransientFile/CloseTransient File for an
- * unbuffered file descriptor.
+ * unbuffered file descriptor.  WriteTransientFile should be used instead
+ * of write(2), ReadTransientFile instead of read(2), and SyncTransientFile
+ * instead of fsync(2).
  */
 #ifndef FD_H
 #define FD_H
@@ -105,6 +107,12 @@ extern int	FreeDir(DIR *dir);
 /* Operations to allow use of a plain kernel FD, with automatic cleanup */
 extern int	OpenTransientFile(const char *fileName, int fileFlags);
 extern int	OpenTransientFilePerm(const char *fileName, int fileFlags, mode_t fileMode);
+extern void WriteTransientFile(int fd, char *buf, Size count, int elevel,
+							   const char *filename, uint32 wait_event_info);
+extern bool ReadTransientFile(int fd, char *buf, Size count, int elevel,
+							  const char *filename, uint32 wait_event_info);
+extern void SyncTransientFile(int fd, int elevel,  const char *filename,
+							  uint32 wait_event_info);
 extern int	CloseTransientFile(int fd);
 
 /* If you've really really gotta have a plain kernel FD, use this */
-- 
2.18.0

#19Michael Paquier
michael@paquier.xyz
In reply to: Michael Paquier (#18)
2 attachment(s)
Re: Fix some error handling for read() and errno

On Mon, Jun 25, 2018 at 04:18:18PM +0900, Michael Paquier wrote:

As this one is done, I have been looking at that this thread again.
Peter Eisentraut has pushed as e5d11b9 something which does not need to
worry about pluralization of error messages. So I have moved to this
message style for all messages. All of this is done as 0001.

Mr. Robot has been complaining about this patch set, so attached is a
rebased version. Thinking about it, I would tend to just merge 0001 and
give up on 0002 as that may not justify future backpatch pain. Thoughts
are welcome.
--
Michael

Attachments:

0001-Rework-error-messages-around-file-handling.patchtext/x-diff; charset=us-asciiDownload
From c1bf79e30eae60ae3d57f28b8ba28361c5447468 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Mon, 25 Jun 2018 14:37:18 +0900
Subject: [PATCH 1/2] Rework error messages around file handling

Some error messages related to file handling are using the code path
context to define their state.  For example, 2PC-related errors are
referring to "two-phase status files", or "relation mapping file" is
used for catalog-to-filenode mapping, however those prove to be
difficult to translate, and are not more helpful than just referring to
the path of the file being manipulated.  So simplify all those error
messages by just referring to files with their path used.  In some
cases, like the manipulation of WAL segments, the context is helpful so
those are kept.

Calls to the system function read() have also been rather inconsistent
with their error handling sometimes not reporting the number of bytes
read, and some other code paths trying to use an errno which has not
been set.  The in-core functions are using a more consistent pattern
with this patch, which checks for both errno if set or if an
inconsistent read is happening.

So as to care about pluralization when reading an unexpected number of
bytes, "could not read: read %d of %d" is used as error message.

Author: Michael Paquier
Discussion: https://postgr.es/m/20180622234803.GA1134@paquier.xyz
---
 src/backend/access/transam/twophase.c       | 40 ++++++------
 src/backend/access/transam/xlog.c           | 40 ++++++++----
 src/backend/replication/logical/origin.c    | 13 +++-
 src/backend/replication/logical/snapbuild.c | 68 +++++++++++++++------
 src/backend/replication/slot.c              | 26 +++++---
 src/backend/replication/walsender.c         | 16 ++++-
 src/backend/utils/cache/relmapper.c         | 28 ++++++---
 src/bin/pg_basebackup/pg_receivewal.c       | 12 +++-
 src/bin/pg_rewind/file_ops.c                | 14 ++++-
 src/bin/pg_rewind/parsexlog.c               | 14 ++++-
 src/bin/pg_waldump/pg_waldump.c             | 17 ++++--
 11 files changed, 200 insertions(+), 88 deletions(-)

diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index e8d4e37fe3..0c99b33664 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -1219,6 +1219,7 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 	uint32		crc_offset;
 	pg_crc32c	calc_crc,
 				file_crc;
+	int			r;
 
 	TwoPhaseFilePath(path, xid);
 
@@ -1228,8 +1229,7 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 		if (give_warnings)
 			ereport(WARNING,
 					(errcode_for_file_access(),
-					 errmsg("could not open two-phase state file \"%s\": %m",
-							path)));
+					 errmsg("could not open file \"%s\": %m", path)));
 		return NULL;
 	}
 
@@ -1249,8 +1249,7 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 			errno = save_errno;
 			ereport(WARNING,
 					(errcode_for_file_access(),
-					 errmsg("could not stat two-phase state file \"%s\": %m",
-							path)));
+					 errmsg("could not stat file \"%s\": %m", path)));
 		}
 		return NULL;
 	}
@@ -1277,7 +1276,8 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 	buf = (char *) palloc(stat.st_size);
 
 	pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_READ);
-	if (read(fd, buf, stat.st_size) != stat.st_size)
+	r = read(fd, buf, stat.st_size);
+	if (r != stat.st_size)
 	{
 		int			save_errno = errno;
 
@@ -1285,11 +1285,17 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 		CloseTransientFile(fd);
 		if (give_warnings)
 		{
-			errno = save_errno;
-			ereport(WARNING,
-					(errcode_for_file_access(),
-					 errmsg("could not read two-phase state file \"%s\": %m",
-							path)));
+			if (r < 0)
+			{
+				errno = save_errno;
+				ereport(WARNING,
+						(errcode_for_file_access(),
+						 errmsg("could not read file \"%s\": %m", path)));
+			}
+			else
+				ereport(WARNING,
+						(errmsg("could not read file \"%s\": read %d of %zu",
+								path, r, stat.st_size)));
 		}
 		pfree(buf);
 		return NULL;
@@ -1632,8 +1638,7 @@ RemoveTwoPhaseFile(TransactionId xid, bool giveWarning)
 		if (errno != ENOENT || giveWarning)
 			ereport(WARNING,
 					(errcode_for_file_access(),
-					 errmsg("could not remove two-phase state file \"%s\": %m",
-							path)));
+					 errmsg("could not remove file \"%s\": %m", path)));
 }
 
 /*
@@ -1661,8 +1666,7 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
 	if (fd < 0)
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not recreate two-phase state file \"%s\": %m",
-						path)));
+				 errmsg("could not recreate file \"%s\": %m", path)));
 
 	/* Write content and CRC */
 	pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_WRITE);
@@ -1677,7 +1681,7 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
 		errno = save_errno ? save_errno : ENOSPC;
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not write two-phase state file: %m")));
+				 errmsg("could not write file \"%s\": %m", path)));
 	}
 	if (write(fd, &statefile_crc, sizeof(pg_crc32c)) != sizeof(pg_crc32c))
 	{
@@ -1690,7 +1694,7 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
 		errno = save_errno ? save_errno : ENOSPC;
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not write two-phase state file: %m")));
+				 errmsg("could not write file \"%s\": %m", path)));
 	}
 	pgstat_report_wait_end();
 
@@ -1707,14 +1711,14 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
 		errno = save_errno;
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not fsync two-phase state file: %m")));
+				 errmsg("could not fsync file \"%s\": %m", path)));
 	}
 	pgstat_report_wait_end();
 
 	if (CloseTransientFile(fd) != 0)
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not close two-phase state file: %m")));
+				 errmsg("could not close file \"%s\": %m", path)));
 }
 
 /*
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 4049deb968..1e8919df06 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -3408,21 +3408,24 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
 
 		if (nread > 0)
 		{
+			int			r;
+
 			if (nread > sizeof(buffer))
 				nread = sizeof(buffer);
 			errno = 0;
 			pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_READ);
-			if (read(srcfd, buffer, nread) != nread)
+			r = read(srcfd, buffer, nread);
+			if (r != nread)
 			{
-				if (errno != 0)
+				if (r < 0)
 					ereport(ERROR,
 							(errcode_for_file_access(),
 							 errmsg("could not read file \"%s\": %m",
 									path)));
 				else
 					ereport(ERROR,
-							(errmsg("not enough data in file \"%s\"",
-									path)));
+							(errmsg("could not read file \"%s\": read %d of %d",
+									path, r, nread)));
 			}
 			pgstat_report_wait_end();
 		}
@@ -4544,7 +4547,7 @@ ReadControlFile(void)
 	if (fd < 0)
 		ereport(PANIC,
 				(errcode_for_file_access(),
-				 errmsg("could not open control file \"%s\": %m",
+				 errmsg("could not open file \"%s\": %m",
 						XLOG_CONTROL_FILE)));
 
 	pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_READ);
@@ -4554,10 +4557,12 @@ ReadControlFile(void)
 		if (r < 0)
 			ereport(PANIC,
 					(errcode_for_file_access(),
-					 errmsg("could not read from control file: %m")));
+					 errmsg("could not read file \"%s\": %m",
+							XLOG_CONTROL_FILE)));
 		else
 			ereport(PANIC,
-					(errmsg("could not read from control file: read %d bytes, expected %d", r, (int) sizeof(ControlFileData))));
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							XLOG_CONTROL_FILE, r, sizeof(ControlFileData))));
 	}
 	pgstat_report_wait_end();
 
@@ -11689,6 +11694,7 @@ XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
 	int			emode = private->emode;
 	uint32		targetPageOff;
 	XLogSegNo	targetSegNo PG_USED_FOR_ASSERTS_ONLY;
+	int			r;
 
 	XLByteToSeg(targetPagePtr, targetSegNo, wal_segment_size);
 	targetPageOff = XLogSegmentOffset(targetPagePtr, wal_segment_size);
@@ -11782,18 +11788,26 @@ retry:
 	}
 
 	pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
-	if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+	r = read(readFile, readBuf, XLOG_BLCKSZ);
+	if (r != XLOG_BLCKSZ)
 	{
 		char		fname[MAXFNAMELEN];
 		int			save_errno = errno;
 
 		pgstat_report_wait_end();
 		XLogFileName(fname, curFileTLI, readSegNo, wal_segment_size);
-		errno = save_errno;
-		ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
-				(errcode_for_file_access(),
-				 errmsg("could not read from log segment %s, offset %u: %m",
-						fname, readOff)));
+		if (r < 0)
+		{
+			errno = save_errno;
+			ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
+					(errcode_for_file_access(),
+					 errmsg("could not read from log segment %s, offset %u: %m",
+							fname, readOff)));
+		}
+		else
+			ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
+					(errmsg("could not read from log segment %s, offset %u: read %d of %d",
+							fname, readOff, r, XLOG_BLCKSZ)));
 		goto next_record_is_invalid;
 	}
 	pgstat_report_wait_end();
diff --git a/src/backend/replication/logical/origin.c b/src/backend/replication/logical/origin.c
index 3d3f6dff1b..841e24c03d 100644
--- a/src/backend/replication/logical/origin.c
+++ b/src/backend/replication/logical/origin.c
@@ -712,9 +712,16 @@ StartupReplicationOrigin(void)
 	/* verify magic, that is written even if nothing was active */
 	readBytes = read(fd, &magic, sizeof(magic));
 	if (readBytes != sizeof(magic))
-		ereport(PANIC,
-				(errmsg("could not read file \"%s\": %m",
-						path)));
+	{
+		if (readBytes < 0)
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": %m",
+							path)));
+		else
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							path, readBytes, sizeof(magic))));
+	}
 	COMP_CRC32C(crc, &magic, sizeof(magic));
 
 	if (magic != REPLICATION_STATE_MAGIC)
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index e975faeb8c..61bc9e8f14 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -1726,11 +1726,18 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 		int			save_errno = errno;
 
 		CloseTransientFile(fd);
-		errno = save_errno;
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %d: %m",
-						path, readBytes, (int) SnapBuildOnDiskConstantSize)));
+
+		if (readBytes < 0)
+		{
+			errno = save_errno;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		}
+		else
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							path, readBytes, SnapBuildOnDiskConstantSize)));
 	}
 
 	if (ondisk.magic != SNAPBUILD_MAGIC)
@@ -1757,11 +1764,18 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 		int			save_errno = errno;
 
 		CloseTransientFile(fd);
-		errno = save_errno;
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %d: %m",
-						path, readBytes, (int) sizeof(SnapBuild))));
+
+		if (readBytes < 0)
+		{
+			errno = save_errno;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		}
+		else
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							path, readBytes, sizeof(SnapBuild))));
 	}
 	COMP_CRC32C(checksum, &ondisk.builder, sizeof(SnapBuild));
 
@@ -1777,11 +1791,18 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 		int			save_errno = errno;
 
 		CloseTransientFile(fd);
-		errno = save_errno;
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %d: %m",
-						path, readBytes, (int) sz)));
+
+		if (readBytes < 0)
+		{
+			errno = save_errno;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		}
+		else
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							path, readBytes, sz)));
 	}
 	COMP_CRC32C(checksum, ondisk.builder.was_running.was_xip, sz);
 
@@ -1796,11 +1817,18 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 		int			save_errno = errno;
 
 		CloseTransientFile(fd);
-		errno = save_errno;
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %d: %m",
-						path, readBytes, (int) sz)));
+
+		if (readBytes < 0)
+		{
+			errno = save_errno;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		}
+		else
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							path, readBytes, sz)));
 	}
 	COMP_CRC32C(checksum, ondisk.builder.committed.xip, sz);
 
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index fb95b44ed8..e0c61c9199 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -1414,11 +1414,15 @@ RestoreSlotFromDisk(const char *name)
 
 		CloseTransientFile(fd);
 		errno = saved_errno;
-		ereport(PANIC,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %u: %m",
-						path, readBytes,
-						(uint32) ReplicationSlotOnDiskConstantSize)));
+		if (readBytes < 0)
+			ereport(PANIC,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		else
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": read %d of %u",
+							path, readBytes,
+							(uint32) ReplicationSlotOnDiskConstantSize)));
 	}
 
 	/* verify magic */
@@ -1454,10 +1458,14 @@ RestoreSlotFromDisk(const char *name)
 
 		CloseTransientFile(fd);
 		errno = saved_errno;
-		ereport(PANIC,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %u: %m",
-						path, readBytes, cp.length)));
+		if (readBytes < 0)
+			ereport(PANIC,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		else
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": read %d of %u",
+							path, readBytes, cp.length)));
 	}
 
 	CloseTransientFile(fd);
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 3a0106bc93..d4ac319258 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -501,11 +501,16 @@ SendTimeLineHistory(TimeLineHistoryCmd *cmd)
 		pgstat_report_wait_start(WAIT_EVENT_WALSENDER_TIMELINE_HISTORY_READ);
 		nread = read(fd, rbuf, sizeof(rbuf));
 		pgstat_report_wait_end();
-		if (nread <= 0)
+		if (nread < 0)
 			ereport(ERROR,
 					(errcode_for_file_access(),
 					 errmsg("could not read file \"%s\": %m",
 							path)));
+		else if (nread == 0)
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							path, nread, bytesleft)));
+
 		pq_sendbytes(&buf, rbuf, nread);
 		bytesleft -= nread;
 	}
@@ -2425,7 +2430,7 @@ retry:
 		pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
 		readbytes = read(sendFile, p, segbytes);
 		pgstat_report_wait_end();
-		if (readbytes <= 0)
+		if (readbytes < 0)
 		{
 			ereport(ERROR,
 					(errcode_for_file_access(),
@@ -2433,6 +2438,13 @@ retry:
 							XLogFileNameP(curFileTimeLine, sendSegNo),
 							sendOff, (unsigned long) segbytes)));
 		}
+		else if (readbytes == 0)
+		{
+			ereport(ERROR,
+					(errmsg("could not read from log segment %s, offset %u: read %d of %lu",
+							XLogFileNameP(curFileTimeLine, sendSegNo),
+							sendOff, readbytes, (unsigned long) segbytes)));
+		}
 
 		/* Update state for read */
 		recptr += readbytes;
diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c
index 99d095f2df..2d31f9f912 100644
--- a/src/backend/utils/cache/relmapper.c
+++ b/src/backend/utils/cache/relmapper.c
@@ -629,6 +629,7 @@ load_relmap_file(bool shared)
 	char		mapfilename[MAXPGPATH];
 	pg_crc32c	crc;
 	int			fd;
+	int			r;
 
 	if (shared)
 	{
@@ -648,7 +649,7 @@ load_relmap_file(bool shared)
 	if (fd < 0)
 		ereport(FATAL,
 				(errcode_for_file_access(),
-				 errmsg("could not open relation mapping file \"%s\": %m",
+				 errmsg("could not open file \"%s\": %m",
 						mapfilename)));
 
 	/*
@@ -659,11 +660,18 @@ load_relmap_file(bool shared)
 	 * are able to access any relation that's affected by the change.
 	 */
 	pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_READ);
-	if (read(fd, map, sizeof(RelMapFile)) != sizeof(RelMapFile))
-		ereport(FATAL,
-				(errcode_for_file_access(),
-				 errmsg("could not read relation mapping file \"%s\": %m",
-						mapfilename)));
+	r = read(fd, map, sizeof(RelMapFile));
+	if (r != sizeof(RelMapFile))
+	{
+		if (r < 0)
+			ereport(FATAL,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", mapfilename)));
+		else
+			ereport(FATAL,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							mapfilename, r, sizeof(RelMapFile))));
+	}
 	pgstat_report_wait_end();
 
 	CloseTransientFile(fd);
@@ -748,7 +756,7 @@ write_relmap_file(bool shared, RelMapFile *newmap,
 	if (fd < 0)
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not open relation mapping file \"%s\": %m",
+				 errmsg("could not open file \"%s\": %m",
 						mapfilename)));
 
 	if (write_wal)
@@ -782,7 +790,7 @@ write_relmap_file(bool shared, RelMapFile *newmap,
 			errno = ENOSPC;
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not write to relation mapping file \"%s\": %m",
+				 errmsg("could not write file \"%s\": %m",
 						mapfilename)));
 	}
 	pgstat_report_wait_end();
@@ -797,14 +805,14 @@ write_relmap_file(bool shared, RelMapFile *newmap,
 	if (pg_fsync(fd) != 0)
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not fsync relation mapping file \"%s\": %m",
+				 errmsg("could not fsync file \"%s\": %m",
 						mapfilename)));
 	pgstat_report_wait_end();
 
 	if (CloseTransientFile(fd))
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not close relation mapping file \"%s\": %m",
+				 errmsg("could not close file \"%s\": %m",
 						mapfilename)));
 
 	/*
diff --git a/src/bin/pg_basebackup/pg_receivewal.c b/src/bin/pg_basebackup/pg_receivewal.c
index ed9d7f6378..8be8d48a8a 100644
--- a/src/bin/pg_basebackup/pg_receivewal.c
+++ b/src/bin/pg_basebackup/pg_receivewal.c
@@ -284,6 +284,7 @@ FindStreamingStart(uint32 *tli)
 			char		buf[4];
 			int			bytes_out;
 			char		fullpath[MAXPGPATH * 2];
+			int			r;
 
 			snprintf(fullpath, sizeof(fullpath), "%s/%s", basedir, dirent->d_name);
 
@@ -300,10 +301,15 @@ FindStreamingStart(uint32 *tli)
 						progname, fullpath, strerror(errno));
 				disconnect_and_exit(1);
 			}
-			if (read(fd, (char *) buf, sizeof(buf)) != sizeof(buf))
+			r = read(fd, (char *) buf, sizeof(buf));
+			if (r != sizeof(buf))
 			{
-				fprintf(stderr, _("%s: could not read compressed file \"%s\": %s\n"),
-						progname, fullpath, strerror(errno));
+				if (r < 0)
+					fprintf(stderr, _("%s: could not read compressed file \"%s\": %s\n"),
+							progname, fullpath, strerror(errno));
+				else
+					fprintf(stderr, _("%s: could not read compressed file \"%s\": read %d of %zu\n"),
+							progname, fullpath, r, sizeof(buf));
 				disconnect_and_exit(1);
 			}
 
diff --git a/src/bin/pg_rewind/file_ops.c b/src/bin/pg_rewind/file_ops.c
index 94bcc13ae8..da2bf449e4 100644
--- a/src/bin/pg_rewind/file_ops.c
+++ b/src/bin/pg_rewind/file_ops.c
@@ -289,6 +289,7 @@ slurpFile(const char *datadir, const char *path, size_t *filesize)
 	struct stat statbuf;
 	char		fullpath[MAXPGPATH];
 	int			len;
+	int			r;
 
 	snprintf(fullpath, sizeof(fullpath), "%s/%s", datadir, path);
 
@@ -304,9 +305,16 @@ slurpFile(const char *datadir, const char *path, size_t *filesize)
 
 	buffer = pg_malloc(len + 1);
 
-	if (read(fd, buffer, len) != len)
-		pg_fatal("could not read file \"%s\": %s\n",
-				 fullpath, strerror(errno));
+	r = read(fd, buffer, len);
+	if (r != len)
+	{
+		if (r < 0)
+			pg_fatal("could not read file \"%s\": %s\n",
+					 fullpath, strerror(errno));
+		else
+			pg_fatal("could not read file \"%s\": read %d of %d\n",
+					 fullpath, r, len);
+	}
 	close(fd);
 
 	/* Zero-terminate the buffer. */
diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c
index 1689279767..f698659b3a 100644
--- a/src/bin/pg_rewind/parsexlog.c
+++ b/src/bin/pg_rewind/parsexlog.c
@@ -246,6 +246,7 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
 	uint32		targetPageOff;
 	XLogRecPtr	targetSegEnd;
 	XLogSegNo	targetSegNo;
+	int			r;
 
 	XLByteToSeg(targetPagePtr, targetSegNo, WalSegSz);
 	XLogSegNoOffsetToRecPtr(targetSegNo + 1, 0, WalSegSz, targetSegEnd);
@@ -309,10 +310,17 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
 		return -1;
 	}
 
-	if (read(xlogreadfd, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+
+	r = read(xlogreadfd, readBuf, XLOG_BLCKSZ);
+	if (r != XLOG_BLCKSZ)
 	{
-		printf(_("could not read from file \"%s\": %s\n"), xlogfpath,
-			   strerror(errno));
+		if (r < 0)
+			printf(_("could not read from file \"%s\": %s\n"), xlogfpath,
+				   strerror(errno));
+		else
+			printf(_("could not read from file \"%s\": read %d of %d\n"),
+				   xlogfpath, r, XLOG_BLCKSZ);
+
 		return -1;
 	}
 
diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c
index d41b831b18..744b1f52a3 100644
--- a/src/bin/pg_waldump/pg_waldump.c
+++ b/src/bin/pg_waldump/pg_waldump.c
@@ -210,8 +210,10 @@ search_directory(const char *directory, const char *fname)
 	if (fd >= 0)
 	{
 		char		buf[XLOG_BLCKSZ];
+		int			r;
 
-		if (read(fd, buf, XLOG_BLCKSZ) == XLOG_BLCKSZ)
+		r = read(fd, buf, XLOG_BLCKSZ);
+		if (r == XLOG_BLCKSZ)
 		{
 			XLogLongPageHeader longhdr = (XLogLongPageHeader) buf;
 
@@ -229,7 +231,8 @@ search_directory(const char *directory, const char *fname)
 				fatal_error("could not read file \"%s\": %s",
 							fname, strerror(errno));
 			else
-				fatal_error("not enough data in file \"%s\"", fname);
+				fatal_error("could not read file \"%s\": read %d of %d",
+							fname, r, XLOG_BLCKSZ);
 		}
 		close(fd);
 		return true;
@@ -411,11 +414,17 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id,
 		{
 			int			err = errno;
 			char		fname[MAXPGPATH];
+			int			save_errno = errno;
 
 			XLogFileName(fname, timeline_id, sendSegNo, WalSegSz);
+			errno = save_errno;
 
-			fatal_error("could not read from log file %s, offset %u, length %d: %s",
-						fname, sendOff, segbytes, strerror(err));
+			if (readbytes < 0)
+				fatal_error("could not read from log file %s, offset %u, length %d: %s",
+							fname, sendOff, segbytes, strerror(err));
+			else if (readbytes == 0)
+				fatal_error("could not read from log file %s, offset %u: read %d of %d",
+							fname, sendOff, readbytes, segbytes);
 		}
 
 		/* Update state for read */
-- 
2.18.0

0002-Add-interface-to-read-write-fsync-with-transient-fil.patchtext/x-diff; charset=us-asciiDownload
From cca233d62fd875ed0bdd76726742342ea9bc37c3 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Mon, 25 Jun 2018 16:15:03 +0900
Subject: [PATCH 2/2] Add interface to read/write/fsync with transient files

The following set of routines gets added for the manipulation of
transient files:
void WriteTransientFile(int fd, char *buf, Size count, int elevel,
    const char *filename, uint32 wait_event_info);
bool ReadTransientFile(int fd, char *buf, Size count, int elevel,
    const char *filename, uint32 wait_event_info);
void SyncTransientFile(int fd, int elevel, const char *filename,
    uint32 wait_event_info);

This simplifies code related to replication slots, 2PC files, relation
mapper files and snapshot builds:
- Centralize errno handling for transient files with ENOSPC for write(2)
and read count for read(2)
- Wait events have to be defined, so those would unlikely get forgotten
in the future.
- Error handling for CloseTransientFile in code paths is centralized.
---
 src/backend/access/transam/twophase.c       |  25 +----
 src/backend/replication/logical/snapbuild.c | 110 ++------------------
 src/backend/replication/slot.c              |  46 +-------
 src/backend/storage/file/fd.c               |  97 ++++++++++++++++-
 src/backend/utils/cache/relmapper.c         |  40 ++-----
 src/include/storage/fd.h                    |  10 +-
 6 files changed, 128 insertions(+), 200 deletions(-)

diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 0c99b33664..557261fc31 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -1219,7 +1219,6 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 	uint32		crc_offset;
 	pg_crc32c	calc_crc,
 				file_crc;
-	int			r;
 
 	TwoPhaseFilePath(path, xid);
 
@@ -1275,28 +1274,10 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 	 */
 	buf = (char *) palloc(stat.st_size);
 
-	pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_READ);
-	r = read(fd, buf, stat.st_size);
-	if (r != stat.st_size)
+	if (!ReadTransientFile(fd, buf, stat.st_size,
+						   give_warnings ? WARNING : DEBUG3, path,
+						   WAIT_EVENT_TWOPHASE_FILE_READ))
 	{
-		int			save_errno = errno;
-
-		pgstat_report_wait_end();
-		CloseTransientFile(fd);
-		if (give_warnings)
-		{
-			if (r < 0)
-			{
-				errno = save_errno;
-				ereport(WARNING,
-						(errcode_for_file_access(),
-						 errmsg("could not read file \"%s\": %m", path)));
-			}
-			else
-				ereport(WARNING,
-						(errmsg("could not read file \"%s\": read %d of %zu",
-								path, r, stat.st_size)));
-		}
 		pfree(buf);
 		return NULL;
 	}
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index 61bc9e8f14..05b74a61a3 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -1609,20 +1609,8 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
 		ereport(ERROR,
 				(errmsg("could not open file \"%s\": %m", path)));
 
-	pgstat_report_wait_start(WAIT_EVENT_SNAPBUILD_WRITE);
-	if ((write(fd, ondisk, needed_length)) != needed_length)
-	{
-		int			save_errno = errno;
-
-		CloseTransientFile(fd);
-
-		/* if write didn't set errno, assume problem is no disk space */
-		errno = save_errno ? save_errno : ENOSPC;
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not write to file \"%s\": %m", tmppath)));
-	}
-	pgstat_report_wait_end();
+	WriteTransientFile(fd, (char *) ondisk, needed_length, ERROR, tmppath,
+					   WAIT_EVENT_SNAPBUILD_WRITE);
 
 	/*
 	 * fsync the file before renaming so that even if we crash after this we
@@ -1686,7 +1674,6 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 	int			fd;
 	char		path[MAXPGPATH];
 	Size		sz;
-	int			readBytes;
 	pg_crc32c	checksum;
 
 	/* no point in loading a snapshot if we're already there */
@@ -1716,29 +1703,9 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 	fsync_fname(path, false);
 	fsync_fname("pg_logical/snapshots", true);
 
-
 	/* read statically sized portion of snapshot */
-	pgstat_report_wait_start(WAIT_EVENT_SNAPBUILD_READ);
-	readBytes = read(fd, &ondisk, SnapBuildOnDiskConstantSize);
-	pgstat_report_wait_end();
-	if (readBytes != SnapBuildOnDiskConstantSize)
-	{
-		int			save_errno = errno;
-
-		CloseTransientFile(fd);
-
-		if (readBytes < 0)
-		{
-			errno = save_errno;
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not read file \"%s\": %m", path)));
-		}
-		else
-			ereport(ERROR,
-					(errmsg("could not read file \"%s\": read %d of %zu",
-							path, readBytes, SnapBuildOnDiskConstantSize)));
-	}
+	(void) ReadTransientFile(fd, (char *) &ondisk, SnapBuildOnDiskConstantSize,
+					  ERROR, path, WAIT_EVENT_SNAPBUILD_READ);
 
 	if (ondisk.magic != SNAPBUILD_MAGIC)
 		ereport(ERROR,
@@ -1756,80 +1723,23 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 				SnapBuildOnDiskConstantSize - SnapBuildOnDiskNotChecksummedSize);
 
 	/* read SnapBuild */
-	pgstat_report_wait_start(WAIT_EVENT_SNAPBUILD_READ);
-	readBytes = read(fd, &ondisk.builder, sizeof(SnapBuild));
-	pgstat_report_wait_end();
-	if (readBytes != sizeof(SnapBuild))
-	{
-		int			save_errno = errno;
-
-		CloseTransientFile(fd);
-
-		if (readBytes < 0)
-		{
-			errno = save_errno;
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not read file \"%s\": %m", path)));
-		}
-		else
-			ereport(ERROR,
-					(errmsg("could not read file \"%s\": read %d of %zu",
-							path, readBytes, sizeof(SnapBuild))));
-	}
+	(void) ReadTransientFile(fd, (char *) &ondisk.builder, sizeof(SnapBuild),
+					  ERROR, path, WAIT_EVENT_SNAPBUILD_READ);
 	COMP_CRC32C(checksum, &ondisk.builder, sizeof(SnapBuild));
 
 	/* restore running xacts (dead, but kept for backward compat) */
 	sz = sizeof(TransactionId) * ondisk.builder.was_running.was_xcnt_space;
 	ondisk.builder.was_running.was_xip =
 		MemoryContextAllocZero(builder->context, sz);
-	pgstat_report_wait_start(WAIT_EVENT_SNAPBUILD_READ);
-	readBytes = read(fd, ondisk.builder.was_running.was_xip, sz);
-	pgstat_report_wait_end();
-	if (readBytes != sz)
-	{
-		int			save_errno = errno;
-
-		CloseTransientFile(fd);
-
-		if (readBytes < 0)
-		{
-			errno = save_errno;
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not read file \"%s\": %m", path)));
-		}
-		else
-			ereport(ERROR,
-					(errmsg("could not read file \"%s\": read %d of %zu",
-							path, readBytes, sz)));
-	}
+	(void) ReadTransientFile(fd, (char *) ondisk.builder.was_running.was_xip, sz,
+					  ERROR, path, WAIT_EVENT_SNAPBUILD_READ);
 	COMP_CRC32C(checksum, ondisk.builder.was_running.was_xip, sz);
 
 	/* restore committed xacts information */
 	sz = sizeof(TransactionId) * ondisk.builder.committed.xcnt;
 	ondisk.builder.committed.xip = MemoryContextAllocZero(builder->context, sz);
-	pgstat_report_wait_start(WAIT_EVENT_SNAPBUILD_READ);
-	readBytes = read(fd, ondisk.builder.committed.xip, sz);
-	pgstat_report_wait_end();
-	if (readBytes != sz)
-	{
-		int			save_errno = errno;
-
-		CloseTransientFile(fd);
-
-		if (readBytes < 0)
-		{
-			errno = save_errno;
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not read file \"%s\": %m", path)));
-		}
-		else
-			ereport(ERROR,
-					(errmsg("could not read file \"%s\": read %d of %zu",
-							path, readBytes, sz)));
-	}
+	(void) ReadTransientFile(fd, (char *) ondisk.builder.committed.xip, sz,
+					  ERROR, path, WAIT_EVENT_SNAPBUILD_READ);
 	COMP_CRC32C(checksum, ondisk.builder.committed.xip, sz);
 
 	CloseTransientFile(fd);
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index e0c61c9199..818b13e973 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -1354,7 +1354,6 @@ RestoreSlotFromDisk(const char *name)
 	char		path[MAXPGPATH + 22];
 	int			fd;
 	bool		restored = false;
-	int			readBytes;
 	pg_crc32c	checksum;
 
 	/* no need to lock here, no concurrent access allowed yet */
@@ -1405,25 +1404,8 @@ RestoreSlotFromDisk(const char *name)
 	END_CRIT_SECTION();
 
 	/* read part of statefile that's guaranteed to be version independent */
-	pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_READ);
-	readBytes = read(fd, &cp, ReplicationSlotOnDiskConstantSize);
-	pgstat_report_wait_end();
-	if (readBytes != ReplicationSlotOnDiskConstantSize)
-	{
-		int			saved_errno = errno;
-
-		CloseTransientFile(fd);
-		errno = saved_errno;
-		if (readBytes < 0)
-			ereport(PANIC,
-					(errcode_for_file_access(),
-					 errmsg("could not read file \"%s\": %m", path)));
-		else
-			ereport(PANIC,
-					(errmsg("could not read file \"%s\": read %d of %u",
-							path, readBytes,
-							(uint32) ReplicationSlotOnDiskConstantSize)));
-	}
+	(void) ReadTransientFile(fd, (char *) &cp, ReplicationSlotOnDiskConstantSize,
+							 PANIC, path, WAIT_EVENT_REPLICATION_SLOT_READ);
 
 	/* verify magic */
 	if (cp.magic != SLOT_MAGIC)
@@ -1447,27 +1429,9 @@ RestoreSlotFromDisk(const char *name)
 						path, cp.length)));
 
 	/* Now that we know the size, read the entire file */
-	pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_READ);
-	readBytes = read(fd,
-					 (char *) &cp + ReplicationSlotOnDiskConstantSize,
-					 cp.length);
-	pgstat_report_wait_end();
-	if (readBytes != cp.length)
-	{
-		int			saved_errno = errno;
-
-		CloseTransientFile(fd);
-		errno = saved_errno;
-		if (readBytes < 0)
-			ereport(PANIC,
-					(errcode_for_file_access(),
-					 errmsg("could not read file \"%s\": %m", path)));
-		else
-			ereport(PANIC,
-					(errmsg("could not read file \"%s\": read %d of %u",
-							path, readBytes, cp.length)));
-	}
-
+	(void) ReadTransientFile(fd, (char *) &cp + ReplicationSlotOnDiskConstantSize,
+							 cp.length, PANIC, path,
+							 WAIT_EVENT_REPLICATION_SLOT_READ);
 	CloseTransientFile(fd);
 
 	/* now verify the CRC */
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 8dd51f1767..fba7774ddc 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -47,8 +47,9 @@
  * ownership mechanism that provides automatic cleanup for shared files when
  * the last of a group of backends detaches.
  *
- * AllocateFile, AllocateDir, OpenPipeStream and OpenTransientFile are
- * wrappers around fopen(3), opendir(3), popen(3) and open(2), respectively.
+ * AllocateFile, AllocateDir, OpenPipeStream, OpenTransientFile,
+ * WriteTransientFile and ReadTransientFile are wrappers around fopen(3),
+ * opendir(3), popen(3), open(2), write(2) and read(2) respectively.
  * They behave like the corresponding native functions, except that the handle
  * is registered with the current subtransaction, and will be automatically
  * closed at abort. These are intended mainly for short operations like
@@ -2480,6 +2481,98 @@ TryAgain:
 	return NULL;
 }
 
+/*
+ * Write to a file which has been opened using OpenTransientFile or
+ * OpenTransientFilePerm.  Equivalent to write(2).
+ */
+void
+WriteTransientFile(int fd, char *buf, Size count, int elevel,
+				   const char *filename, uint32 wait_event_info)
+{
+	int			r;
+
+	pgstat_report_wait_start(wait_event_info);
+	r = write(fd, buf, count);
+	pgstat_report_wait_end();
+
+	if (r != count)
+	{
+		int         save_errno = errno;
+
+		(void) CloseTransientFile(fd);
+
+		/* if write didn't set errno, assume problem is no disk space */
+		errno = save_errno ? save_errno : ENOSPC;
+		ereport(elevel,
+				(errcode_for_file_access(),
+				 errmsg("could not write to file \"%s\": %m", filename)));
+	}
+}
+
+/*
+ * Read from a file which has been opened using OpenTransientFile or
+ * OpenTransientFilePerm.  Equivalent to read(2).  Returns true on
+ * success and false on failure.
+ */
+bool
+ReadTransientFile(int fd, char *buf, Size count, int elevel,
+				  const char *filename, uint32 wait_event_info)
+{
+	int			r;
+
+	pgstat_report_wait_start(wait_event_info);
+	r = read(fd, buf, count);
+	pgstat_report_wait_end();
+
+	if (r != count)
+	{
+		int         save_errno = errno;
+
+		CloseTransientFile(fd);
+
+		if (r < 0)
+		{
+			errno = save_errno;
+			ereport(elevel,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", filename)));
+		}
+		else
+			ereport(elevel,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							filename, r, count)));
+		return false;
+	}
+
+	return true;
+}
+
+/*
+ * Write to a file which has been opened using OpenTransientFile or
+ * OpenTransientFilePerm.  Equivalent to fsync(2).
+ */
+void
+SyncTransientFile(int fd, int elevel, const char *filename,
+				  uint32 wait_event_info)
+{
+	int			status;
+
+	pgstat_report_wait_start(wait_event_info);
+	status = pg_fsync(fd);
+	pgstat_report_wait_end();
+
+	if (status != 0)
+	{
+		int         save_errno = errno;
+
+		(void) CloseTransientFile(fd);
+		errno = save_errno;
+		ereport(elevel,
+				(errcode_for_file_access(),
+				 errmsg("could not fsync file \"%s\": %m", filename)));
+	}
+}
+
 /*
  * Free an AllocateDesc of any type.
  *
diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c
index 2d31f9f912..e6eff58d40 100644
--- a/src/backend/utils/cache/relmapper.c
+++ b/src/backend/utils/cache/relmapper.c
@@ -629,7 +629,6 @@ load_relmap_file(bool shared)
 	char		mapfilename[MAXPGPATH];
 	pg_crc32c	crc;
 	int			fd;
-	int			r;
 
 	if (shared)
 	{
@@ -659,20 +658,8 @@ load_relmap_file(bool shared)
 	 * look, the sinval signaling mechanism will make us re-read it before we
 	 * are able to access any relation that's affected by the change.
 	 */
-	pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_READ);
-	r = read(fd, map, sizeof(RelMapFile));
-	if (r != sizeof(RelMapFile))
-	{
-		if (r < 0)
-			ereport(FATAL,
-					(errcode_for_file_access(),
-					 errmsg("could not read file \"%s\": %m", mapfilename)));
-		else
-			ereport(FATAL,
-					(errmsg("could not read file \"%s\": read %d of %zu",
-							mapfilename, r, sizeof(RelMapFile))));
-	}
-	pgstat_report_wait_end();
+	(void) ReadTransientFile(fd, (char *) map, sizeof(RelMapFile), FATAL,
+							 mapfilename, WAIT_EVENT_RELATION_MAP_READ);
 
 	CloseTransientFile(fd);
 
@@ -782,18 +769,9 @@ write_relmap_file(bool shared, RelMapFile *newmap,
 	}
 
 	errno = 0;
-	pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_WRITE);
-	if (write(fd, newmap, sizeof(RelMapFile)) != sizeof(RelMapFile))
-	{
-		/* if write didn't set errno, assume problem is no disk space */
-		if (errno == 0)
-			errno = ENOSPC;
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not write file \"%s\": %m",
-						mapfilename)));
-	}
-	pgstat_report_wait_end();
+
+	WriteTransientFile(fd, (char *) newmap, sizeof(RelMapFile), ERROR,
+					   mapfilename, WAIT_EVENT_RELATION_MAP_WRITE);
 
 	/*
 	 * We choose to fsync the data to disk before considering the task done.
@@ -801,13 +779,7 @@ write_relmap_file(bool shared, RelMapFile *newmap,
 	 * issue, but it would complicate checkpointing --- see notes for
 	 * CheckPointRelationMap.
 	 */
-	pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_SYNC);
-	if (pg_fsync(fd) != 0)
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not fsync file \"%s\": %m",
-						mapfilename)));
-	pgstat_report_wait_end();
+	SyncTransientFile(fd, ERROR, mapfilename, WAIT_EVENT_RELATION_MAP_SYNC);
 
 	if (CloseTransientFile(fd))
 		ereport(ERROR,
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index 8e7c9728f4..4309d9da95 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -34,7 +34,9 @@
  *
  * Likewise, use AllocateDir/FreeDir, not opendir/closedir, to allocate
  * open directories (DIR*), and OpenTransientFile/CloseTransient File for an
- * unbuffered file descriptor.
+ * unbuffered file descriptor.  WriteTransientFile should be used instead
+ * of write(2), ReadTransientFile instead of read(2), and SyncTransientFile
+ * instead of fsync(2).
  */
 #ifndef FD_H
 #define FD_H
@@ -105,6 +107,12 @@ extern int	FreeDir(DIR *dir);
 /* Operations to allow use of a plain kernel FD, with automatic cleanup */
 extern int	OpenTransientFile(const char *fileName, int fileFlags);
 extern int	OpenTransientFilePerm(const char *fileName, int fileFlags, mode_t fileMode);
+extern void WriteTransientFile(int fd, char *buf, Size count, int elevel,
+							   const char *filename, uint32 wait_event_info);
+extern bool ReadTransientFile(int fd, char *buf, Size count, int elevel,
+							  const char *filename, uint32 wait_event_info);
+extern void SyncTransientFile(int fd, int elevel,  const char *filename,
+							  uint32 wait_event_info);
 extern int	CloseTransientFile(int fd);
 
 /* If you've really really gotta have a plain kernel FD, use this */
-- 
2.18.0

#20Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: Michael Paquier (#19)
Re: Fix some error handling for read() and errno

On 2018-Jul-13, Michael Paquier wrote:

Mr. Robot has been complaining about this patch set, so attached is a
rebased version. Thinking about it, I would tend to just merge 0001 and
give up on 0002 as that may not justify future backpatch pain. Thoughts
are welcome.

I vote to push both.

--
�lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#21Michael Paquier
michael@paquier.xyz
In reply to: Alvaro Herrera (#20)
Re: Fix some error handling for read() and errno

On Fri, Jul 13, 2018 at 11:31:31AM -0400, Alvaro Herrera wrote:

Mr. Robot has been complaining about this patch set, so attached is a
rebased version. Thinking about it, I would tend to just merge 0001 and
give up on 0002 as that may not justify future backpatch pain. Thoughts
are welcome.

I vote to push both.

Thanks! Did you look at the code? The first patch is just some
cleanup, while the second could have adjustments? For the second I went
with the minimal amount of work, and actually there is no need to make
ReadTransientFile() return a status per my study of ReadTwoPhaseFile()
in /messages/by-id/20180709050309.GM1467@paquier.xyz which must fail
when reading the file. So patch 0002 depends on the other 2PC patch.
--
Michael

#22Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: Michael Paquier (#21)
Re: Fix some error handling for read() and errno

On 2018-Jul-14, Michael Paquier wrote:

On Fri, Jul 13, 2018 at 11:31:31AM -0400, Alvaro Herrera wrote:

Mr. Robot has been complaining about this patch set, so attached is a
rebased version. Thinking about it, I would tend to just merge 0001 and
give up on 0002 as that may not justify future backpatch pain. Thoughts
are welcome.

I vote to push both.

Thanks! Did you look at the code? The first patch is just some
cleanup, while the second could have adjustments? For the second I went
with the minimal amount of work, and actually there is no need to make
ReadTransientFile() return a status per my study of ReadTwoPhaseFile()
in /messages/by-id/20180709050309.GM1467@paquier.xyz which must fail
when reading the file. So patch 0002 depends on the other 2PC patch.

I did read them, though not in minute detail. 0002 seems to result in
code easier to read. If there are particular places that deviate from
the obvious patterns, I didn't notice them.

In 0001 one thing I wasn't terribly in love with was random deviations
in sprintf format specifiers for things that should be identical, ie.
%lu in some places and %zu in others, for "read only %d of %d". It
seems you should pick the more general one (%zu) and use casts to Size
(or is it size_t?) in the places that have other types. That way you
*really* decrease translator effort to the bare minimum :-)

Ah, in 0001 you have one case of "could not read _from_" (in
SimpleXLogPageRead). The "from" is not present in the other places.
Looks odd.

I'm not sure about putting the wait event stuff inside the new
functions. It looks odd, though I understand why you did it.

No opinion on the 2PC stuff -- didn't look at that.

--
�lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#23Michael Paquier
michael@paquier.xyz
In reply to: Alvaro Herrera (#22)
2 attachment(s)
Re: Fix some error handling for read() and errno

On Sat, Jul 14, 2018 at 12:50:02AM -0400, Alvaro Herrera wrote:

I did read them, though not in minute detail. 0002 seems to result in
code easier to read. If there are particular places that deviate from
the obvious patterns, I didn't notice them.

In 0001 one thing I wasn't terribly in love with was random deviations
in sprintf format specifiers for things that should be identical, ie.
%lu in some places and %zu in others, for "read only %d of %d". It
seems you should pick the more general one (%zu) and use casts to Size
(or is it size_t?) in the places that have other types. That way you
*really* decrease translator effort to the bare minimum :-)

Yeah, that would be really the point, hence let's use "could not read
file \"%s\": read %d of %zu" everywhere with a cast to Size. That's
used in some other places as well.

Ah, in 0001 you have one case of "could not read _from_" (in
SimpleXLogPageRead). The "from" is not present in the other places.
Looks odd.

Right.

I'm not sure about putting the wait event stuff inside the new
functions. It looks odd, though I understand why you did it.

I don't really find that strange as any transient files should be
tracked by I/O wait events :)

No opinion on the 2PC stuff -- didn't look at that.

For now, I think that just moving forward with 0001, and then revisit
0002 once the other 2PC patch is settled makes the most sense. On the
other thread, the current 2PC behavior can create silent data loss so
I would like to back-patch it, so that would be less work.
--
Michael

Attachments:

0001-Rework-error-messages-around-file-handling.patchtext/x-diff; charset=us-asciiDownload
From 434481d861575ffab6294d871089897762a6b911 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Sat, 14 Jul 2018 15:30:55 +0900
Subject: [PATCH 1/2] Rework error messages around file handling

Some error messages related to file handling are using the code path
context to define their state.  For example, 2PC-related errors are
referring to "two-phase status files", or "relation mapping file" is
used for catalog-to-filenode mapping, however those prove to be
difficult to translate, and are not more helpful than just referring to
the path of the file being manipulated.  So simplify all those error
messages by just referring to files with their path used.  In some
cases, like the manipulation of WAL segments, the context is helpful so
those are kept.

Calls to the system function read() have also been rather inconsistent
with their error handling sometimes not reporting the number of bytes
read, and some other code paths trying to use an errno which has not
been set.  The in-core functions are using a more consistent pattern
with this patch, which checks for both errno if set or if an
inconsistent read is happening.

So as to care about pluralization when reading an unexpected number of
bytes, "could not read: read %d of %zu" is used as error message.

Author: Michael Paquier
Discussion: https://postgr.es/m/20180622234803.GA1134@paquier.xyz
---
 src/backend/access/transam/twophase.c       | 40 ++++++------
 src/backend/access/transam/xlog.c           | 40 ++++++++----
 src/backend/replication/logical/origin.c    | 13 +++-
 src/backend/replication/logical/snapbuild.c | 68 +++++++++++++++------
 src/backend/replication/slot.c              | 26 +++++---
 src/backend/replication/walsender.c         | 20 ++++--
 src/backend/utils/cache/relmapper.c         | 28 ++++++---
 src/bin/pg_basebackup/pg_receivewal.c       | 12 +++-
 src/bin/pg_rewind/file_ops.c                | 14 ++++-
 src/bin/pg_rewind/parsexlog.c               | 14 ++++-
 src/bin/pg_waldump/pg_waldump.c             | 17 ++++--
 src/common/controldata_utils.c              |  8 +--
 12 files changed, 206 insertions(+), 94 deletions(-)

diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index e8d4e37fe3..0c99b33664 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -1219,6 +1219,7 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 	uint32		crc_offset;
 	pg_crc32c	calc_crc,
 				file_crc;
+	int			r;
 
 	TwoPhaseFilePath(path, xid);
 
@@ -1228,8 +1229,7 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 		if (give_warnings)
 			ereport(WARNING,
 					(errcode_for_file_access(),
-					 errmsg("could not open two-phase state file \"%s\": %m",
-							path)));
+					 errmsg("could not open file \"%s\": %m", path)));
 		return NULL;
 	}
 
@@ -1249,8 +1249,7 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 			errno = save_errno;
 			ereport(WARNING,
 					(errcode_for_file_access(),
-					 errmsg("could not stat two-phase state file \"%s\": %m",
-							path)));
+					 errmsg("could not stat file \"%s\": %m", path)));
 		}
 		return NULL;
 	}
@@ -1277,7 +1276,8 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 	buf = (char *) palloc(stat.st_size);
 
 	pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_READ);
-	if (read(fd, buf, stat.st_size) != stat.st_size)
+	r = read(fd, buf, stat.st_size);
+	if (r != stat.st_size)
 	{
 		int			save_errno = errno;
 
@@ -1285,11 +1285,17 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 		CloseTransientFile(fd);
 		if (give_warnings)
 		{
-			errno = save_errno;
-			ereport(WARNING,
-					(errcode_for_file_access(),
-					 errmsg("could not read two-phase state file \"%s\": %m",
-							path)));
+			if (r < 0)
+			{
+				errno = save_errno;
+				ereport(WARNING,
+						(errcode_for_file_access(),
+						 errmsg("could not read file \"%s\": %m", path)));
+			}
+			else
+				ereport(WARNING,
+						(errmsg("could not read file \"%s\": read %d of %zu",
+								path, r, stat.st_size)));
 		}
 		pfree(buf);
 		return NULL;
@@ -1632,8 +1638,7 @@ RemoveTwoPhaseFile(TransactionId xid, bool giveWarning)
 		if (errno != ENOENT || giveWarning)
 			ereport(WARNING,
 					(errcode_for_file_access(),
-					 errmsg("could not remove two-phase state file \"%s\": %m",
-							path)));
+					 errmsg("could not remove file \"%s\": %m", path)));
 }
 
 /*
@@ -1661,8 +1666,7 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
 	if (fd < 0)
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not recreate two-phase state file \"%s\": %m",
-						path)));
+				 errmsg("could not recreate file \"%s\": %m", path)));
 
 	/* Write content and CRC */
 	pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_WRITE);
@@ -1677,7 +1681,7 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
 		errno = save_errno ? save_errno : ENOSPC;
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not write two-phase state file: %m")));
+				 errmsg("could not write file \"%s\": %m", path)));
 	}
 	if (write(fd, &statefile_crc, sizeof(pg_crc32c)) != sizeof(pg_crc32c))
 	{
@@ -1690,7 +1694,7 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
 		errno = save_errno ? save_errno : ENOSPC;
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not write two-phase state file: %m")));
+				 errmsg("could not write file \"%s\": %m", path)));
 	}
 	pgstat_report_wait_end();
 
@@ -1707,14 +1711,14 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
 		errno = save_errno;
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not fsync two-phase state file: %m")));
+				 errmsg("could not fsync file \"%s\": %m", path)));
 	}
 	pgstat_report_wait_end();
 
 	if (CloseTransientFile(fd) != 0)
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not close two-phase state file: %m")));
+				 errmsg("could not close file \"%s\": %m", path)));
 }
 
 /*
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 4049deb968..bebe6405de 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -3408,21 +3408,24 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
 
 		if (nread > 0)
 		{
+			int			r;
+
 			if (nread > sizeof(buffer))
 				nread = sizeof(buffer);
 			errno = 0;
 			pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_READ);
-			if (read(srcfd, buffer, nread) != nread)
+			r = read(srcfd, buffer, nread);
+			if (r != nread)
 			{
-				if (errno != 0)
+				if (r < 0)
 					ereport(ERROR,
 							(errcode_for_file_access(),
 							 errmsg("could not read file \"%s\": %m",
 									path)));
 				else
 					ereport(ERROR,
-							(errmsg("not enough data in file \"%s\"",
-									path)));
+							(errmsg("could not read file \"%s\": read %d of %zu",
+									path, r, (Size) nread)));
 			}
 			pgstat_report_wait_end();
 		}
@@ -4544,7 +4547,7 @@ ReadControlFile(void)
 	if (fd < 0)
 		ereport(PANIC,
 				(errcode_for_file_access(),
-				 errmsg("could not open control file \"%s\": %m",
+				 errmsg("could not open file \"%s\": %m",
 						XLOG_CONTROL_FILE)));
 
 	pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_READ);
@@ -4554,10 +4557,12 @@ ReadControlFile(void)
 		if (r < 0)
 			ereport(PANIC,
 					(errcode_for_file_access(),
-					 errmsg("could not read from control file: %m")));
+					 errmsg("could not read file \"%s\": %m",
+							XLOG_CONTROL_FILE)));
 		else
 			ereport(PANIC,
-					(errmsg("could not read from control file: read %d bytes, expected %d", r, (int) sizeof(ControlFileData))));
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							XLOG_CONTROL_FILE, r, sizeof(ControlFileData))));
 	}
 	pgstat_report_wait_end();
 
@@ -11689,6 +11694,7 @@ XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
 	int			emode = private->emode;
 	uint32		targetPageOff;
 	XLogSegNo	targetSegNo PG_USED_FOR_ASSERTS_ONLY;
+	int			r;
 
 	XLByteToSeg(targetPagePtr, targetSegNo, wal_segment_size);
 	targetPageOff = XLogSegmentOffset(targetPagePtr, wal_segment_size);
@@ -11782,18 +11788,26 @@ retry:
 	}
 
 	pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
-	if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+	r = read(readFile, readBuf, XLOG_BLCKSZ);
+	if (r != XLOG_BLCKSZ)
 	{
 		char		fname[MAXFNAMELEN];
 		int			save_errno = errno;
 
 		pgstat_report_wait_end();
 		XLogFileName(fname, curFileTLI, readSegNo, wal_segment_size);
-		errno = save_errno;
-		ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
-				(errcode_for_file_access(),
-				 errmsg("could not read from log segment %s, offset %u: %m",
-						fname, readOff)));
+		if (r < 0)
+		{
+			errno = save_errno;
+			ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
+					(errcode_for_file_access(),
+					 errmsg("could not read from log segment %s, offset %u: %m",
+							fname, readOff)));
+		}
+		else
+			ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
+					(errmsg("could not read from log segment %s, offset %u: read %d of %zu",
+							fname, readOff, r, (Size) XLOG_BLCKSZ)));
 		goto next_record_is_invalid;
 	}
 	pgstat_report_wait_end();
diff --git a/src/backend/replication/logical/origin.c b/src/backend/replication/logical/origin.c
index 3d3f6dff1b..841e24c03d 100644
--- a/src/backend/replication/logical/origin.c
+++ b/src/backend/replication/logical/origin.c
@@ -712,9 +712,16 @@ StartupReplicationOrigin(void)
 	/* verify magic, that is written even if nothing was active */
 	readBytes = read(fd, &magic, sizeof(magic));
 	if (readBytes != sizeof(magic))
-		ereport(PANIC,
-				(errmsg("could not read file \"%s\": %m",
-						path)));
+	{
+		if (readBytes < 0)
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": %m",
+							path)));
+		else
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							path, readBytes, sizeof(magic))));
+	}
 	COMP_CRC32C(crc, &magic, sizeof(magic));
 
 	if (magic != REPLICATION_STATE_MAGIC)
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index e975faeb8c..61bc9e8f14 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -1726,11 +1726,18 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 		int			save_errno = errno;
 
 		CloseTransientFile(fd);
-		errno = save_errno;
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %d: %m",
-						path, readBytes, (int) SnapBuildOnDiskConstantSize)));
+
+		if (readBytes < 0)
+		{
+			errno = save_errno;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		}
+		else
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							path, readBytes, SnapBuildOnDiskConstantSize)));
 	}
 
 	if (ondisk.magic != SNAPBUILD_MAGIC)
@@ -1757,11 +1764,18 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 		int			save_errno = errno;
 
 		CloseTransientFile(fd);
-		errno = save_errno;
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %d: %m",
-						path, readBytes, (int) sizeof(SnapBuild))));
+
+		if (readBytes < 0)
+		{
+			errno = save_errno;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		}
+		else
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							path, readBytes, sizeof(SnapBuild))));
 	}
 	COMP_CRC32C(checksum, &ondisk.builder, sizeof(SnapBuild));
 
@@ -1777,11 +1791,18 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 		int			save_errno = errno;
 
 		CloseTransientFile(fd);
-		errno = save_errno;
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %d: %m",
-						path, readBytes, (int) sz)));
+
+		if (readBytes < 0)
+		{
+			errno = save_errno;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		}
+		else
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							path, readBytes, sz)));
 	}
 	COMP_CRC32C(checksum, ondisk.builder.was_running.was_xip, sz);
 
@@ -1796,11 +1817,18 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 		int			save_errno = errno;
 
 		CloseTransientFile(fd);
-		errno = save_errno;
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %d: %m",
-						path, readBytes, (int) sz)));
+
+		if (readBytes < 0)
+		{
+			errno = save_errno;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		}
+		else
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							path, readBytes, sz)));
 	}
 	COMP_CRC32C(checksum, ondisk.builder.committed.xip, sz);
 
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index fb95b44ed8..afbaf8c80d 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -1414,11 +1414,15 @@ RestoreSlotFromDisk(const char *name)
 
 		CloseTransientFile(fd);
 		errno = saved_errno;
-		ereport(PANIC,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %u: %m",
-						path, readBytes,
-						(uint32) ReplicationSlotOnDiskConstantSize)));
+		if (readBytes < 0)
+			ereport(PANIC,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		else
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							path, readBytes,
+							ReplicationSlotOnDiskConstantSize)));
 	}
 
 	/* verify magic */
@@ -1454,10 +1458,14 @@ RestoreSlotFromDisk(const char *name)
 
 		CloseTransientFile(fd);
 		errno = saved_errno;
-		ereport(PANIC,
-				(errcode_for_file_access(),
-				 errmsg("could not read file \"%s\", read %d of %u: %m",
-						path, readBytes, cp.length)));
+		if (readBytes < 0)
+			ereport(PANIC,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", path)));
+		else
+			ereport(PANIC,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							path, readBytes, (Size) cp.length)));
 	}
 
 	CloseTransientFile(fd);
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 3a0106bc93..e3df5495c3 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -501,11 +501,16 @@ SendTimeLineHistory(TimeLineHistoryCmd *cmd)
 		pgstat_report_wait_start(WAIT_EVENT_WALSENDER_TIMELINE_HISTORY_READ);
 		nread = read(fd, rbuf, sizeof(rbuf));
 		pgstat_report_wait_end();
-		if (nread <= 0)
+		if (nread < 0)
 			ereport(ERROR,
 					(errcode_for_file_access(),
 					 errmsg("could not read file \"%s\": %m",
 							path)));
+		else if (nread == 0)
+			ereport(ERROR,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							path, nread, bytesleft)));
+
 		pq_sendbytes(&buf, rbuf, nread);
 		bytesleft -= nread;
 	}
@@ -2425,13 +2430,20 @@ retry:
 		pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
 		readbytes = read(sendFile, p, segbytes);
 		pgstat_report_wait_end();
-		if (readbytes <= 0)
+		if (readbytes < 0)
 		{
 			ereport(ERROR,
 					(errcode_for_file_access(),
-					 errmsg("could not read from log segment %s, offset %u, length %lu: %m",
+					 errmsg("could not read from log segment %s, offset %u, length %zu: %m",
 							XLogFileNameP(curFileTimeLine, sendSegNo),
-							sendOff, (unsigned long) segbytes)));
+							sendOff, (Size) segbytes)));
+		}
+		else if (readbytes == 0)
+		{
+			ereport(ERROR,
+					(errmsg("could not read from log segment %s, offset %u: read %d of %zu",
+							XLogFileNameP(curFileTimeLine, sendSegNo),
+							sendOff, readbytes, (Size) segbytes)));
 		}
 
 		/* Update state for read */
diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c
index 99d095f2df..2d31f9f912 100644
--- a/src/backend/utils/cache/relmapper.c
+++ b/src/backend/utils/cache/relmapper.c
@@ -629,6 +629,7 @@ load_relmap_file(bool shared)
 	char		mapfilename[MAXPGPATH];
 	pg_crc32c	crc;
 	int			fd;
+	int			r;
 
 	if (shared)
 	{
@@ -648,7 +649,7 @@ load_relmap_file(bool shared)
 	if (fd < 0)
 		ereport(FATAL,
 				(errcode_for_file_access(),
-				 errmsg("could not open relation mapping file \"%s\": %m",
+				 errmsg("could not open file \"%s\": %m",
 						mapfilename)));
 
 	/*
@@ -659,11 +660,18 @@ load_relmap_file(bool shared)
 	 * are able to access any relation that's affected by the change.
 	 */
 	pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_READ);
-	if (read(fd, map, sizeof(RelMapFile)) != sizeof(RelMapFile))
-		ereport(FATAL,
-				(errcode_for_file_access(),
-				 errmsg("could not read relation mapping file \"%s\": %m",
-						mapfilename)));
+	r = read(fd, map, sizeof(RelMapFile));
+	if (r != sizeof(RelMapFile))
+	{
+		if (r < 0)
+			ereport(FATAL,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", mapfilename)));
+		else
+			ereport(FATAL,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							mapfilename, r, sizeof(RelMapFile))));
+	}
 	pgstat_report_wait_end();
 
 	CloseTransientFile(fd);
@@ -748,7 +756,7 @@ write_relmap_file(bool shared, RelMapFile *newmap,
 	if (fd < 0)
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not open relation mapping file \"%s\": %m",
+				 errmsg("could not open file \"%s\": %m",
 						mapfilename)));
 
 	if (write_wal)
@@ -782,7 +790,7 @@ write_relmap_file(bool shared, RelMapFile *newmap,
 			errno = ENOSPC;
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not write to relation mapping file \"%s\": %m",
+				 errmsg("could not write file \"%s\": %m",
 						mapfilename)));
 	}
 	pgstat_report_wait_end();
@@ -797,14 +805,14 @@ write_relmap_file(bool shared, RelMapFile *newmap,
 	if (pg_fsync(fd) != 0)
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not fsync relation mapping file \"%s\": %m",
+				 errmsg("could not fsync file \"%s\": %m",
 						mapfilename)));
 	pgstat_report_wait_end();
 
 	if (CloseTransientFile(fd))
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not close relation mapping file \"%s\": %m",
+				 errmsg("could not close file \"%s\": %m",
 						mapfilename)));
 
 	/*
diff --git a/src/bin/pg_basebackup/pg_receivewal.c b/src/bin/pg_basebackup/pg_receivewal.c
index ed9d7f6378..8be8d48a8a 100644
--- a/src/bin/pg_basebackup/pg_receivewal.c
+++ b/src/bin/pg_basebackup/pg_receivewal.c
@@ -284,6 +284,7 @@ FindStreamingStart(uint32 *tli)
 			char		buf[4];
 			int			bytes_out;
 			char		fullpath[MAXPGPATH * 2];
+			int			r;
 
 			snprintf(fullpath, sizeof(fullpath), "%s/%s", basedir, dirent->d_name);
 
@@ -300,10 +301,15 @@ FindStreamingStart(uint32 *tli)
 						progname, fullpath, strerror(errno));
 				disconnect_and_exit(1);
 			}
-			if (read(fd, (char *) buf, sizeof(buf)) != sizeof(buf))
+			r = read(fd, (char *) buf, sizeof(buf));
+			if (r != sizeof(buf))
 			{
-				fprintf(stderr, _("%s: could not read compressed file \"%s\": %s\n"),
-						progname, fullpath, strerror(errno));
+				if (r < 0)
+					fprintf(stderr, _("%s: could not read compressed file \"%s\": %s\n"),
+							progname, fullpath, strerror(errno));
+				else
+					fprintf(stderr, _("%s: could not read compressed file \"%s\": read %d of %zu\n"),
+							progname, fullpath, r, sizeof(buf));
 				disconnect_and_exit(1);
 			}
 
diff --git a/src/bin/pg_rewind/file_ops.c b/src/bin/pg_rewind/file_ops.c
index 94bcc13ae8..0bd110f9b0 100644
--- a/src/bin/pg_rewind/file_ops.c
+++ b/src/bin/pg_rewind/file_ops.c
@@ -289,6 +289,7 @@ slurpFile(const char *datadir, const char *path, size_t *filesize)
 	struct stat statbuf;
 	char		fullpath[MAXPGPATH];
 	int			len;
+	int			r;
 
 	snprintf(fullpath, sizeof(fullpath), "%s/%s", datadir, path);
 
@@ -304,9 +305,16 @@ slurpFile(const char *datadir, const char *path, size_t *filesize)
 
 	buffer = pg_malloc(len + 1);
 
-	if (read(fd, buffer, len) != len)
-		pg_fatal("could not read file \"%s\": %s\n",
-				 fullpath, strerror(errno));
+	r = read(fd, buffer, len);
+	if (r != len)
+	{
+		if (r < 0)
+			pg_fatal("could not read file \"%s\": %s\n",
+					 fullpath, strerror(errno));
+		else
+			pg_fatal("could not read file \"%s\": read %d of %zu\n",
+					 fullpath, r, (Size) len);
+	}
 	close(fd);
 
 	/* Zero-terminate the buffer. */
diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c
index 1689279767..40028471bf 100644
--- a/src/bin/pg_rewind/parsexlog.c
+++ b/src/bin/pg_rewind/parsexlog.c
@@ -246,6 +246,7 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
 	uint32		targetPageOff;
 	XLogRecPtr	targetSegEnd;
 	XLogSegNo	targetSegNo;
+	int			r;
 
 	XLByteToSeg(targetPagePtr, targetSegNo, WalSegSz);
 	XLogSegNoOffsetToRecPtr(targetSegNo + 1, 0, WalSegSz, targetSegEnd);
@@ -309,10 +310,17 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
 		return -1;
 	}
 
-	if (read(xlogreadfd, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+
+	r = read(xlogreadfd, readBuf, XLOG_BLCKSZ);
+	if (r != XLOG_BLCKSZ)
 	{
-		printf(_("could not read from file \"%s\": %s\n"), xlogfpath,
-			   strerror(errno));
+		if (r < 0)
+			printf(_("could not read file \"%s\": %s\n"), xlogfpath,
+				   strerror(errno));
+		else
+			printf(_("could not read file \"%s\": read %d of %zu\n"),
+				   xlogfpath, r, (Size) XLOG_BLCKSZ);
+
 		return -1;
 	}
 
diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c
index d41b831b18..2a557b37e5 100644
--- a/src/bin/pg_waldump/pg_waldump.c
+++ b/src/bin/pg_waldump/pg_waldump.c
@@ -210,8 +210,10 @@ search_directory(const char *directory, const char *fname)
 	if (fd >= 0)
 	{
 		char		buf[XLOG_BLCKSZ];
+		int			r;
 
-		if (read(fd, buf, XLOG_BLCKSZ) == XLOG_BLCKSZ)
+		r = read(fd, buf, XLOG_BLCKSZ);
+		if (r == XLOG_BLCKSZ)
 		{
 			XLogLongPageHeader longhdr = (XLogLongPageHeader) buf;
 
@@ -229,7 +231,8 @@ search_directory(const char *directory, const char *fname)
 				fatal_error("could not read file \"%s\": %s",
 							fname, strerror(errno));
 			else
-				fatal_error("not enough data in file \"%s\"", fname);
+				fatal_error("could not read file \"%s\": read %d of %zu",
+							fname, r, (Size) XLOG_BLCKSZ);
 		}
 		close(fd);
 		return true;
@@ -411,11 +414,17 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id,
 		{
 			int			err = errno;
 			char		fname[MAXPGPATH];
+			int			save_errno = errno;
 
 			XLogFileName(fname, timeline_id, sendSegNo, WalSegSz);
+			errno = save_errno;
 
-			fatal_error("could not read from log file %s, offset %u, length %d: %s",
-						fname, sendOff, segbytes, strerror(err));
+			if (readbytes < 0)
+				fatal_error("could not read from log file %s, offset %u, length %d: %s",
+							fname, sendOff, segbytes, strerror(err));
+			else if (readbytes == 0)
+				fatal_error("could not read from log file %s, offset %u: read %d of %zu",
+							fname, sendOff, readbytes, (Size) segbytes);
 		}
 
 		/* Update state for read */
diff --git a/src/common/controldata_utils.c b/src/common/controldata_utils.c
index e83d60d438..60197b2440 100644
--- a/src/common/controldata_utils.c
+++ b/src/common/controldata_utils.c
@@ -83,12 +83,12 @@ get_controlfile(const char *DataDir, const char *progname, bool *crc_ok_p)
 		else
 #ifndef FRONTEND
 			ereport(ERROR,
-					(errmsg("could not read file \"%s\": read %d of %d",
-							ControlFilePath, r, (int) sizeof(ControlFileData))));
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							ControlFilePath, r, sizeof(ControlFileData))));
 #else
 		{
-			fprintf(stderr, _("%s: could not read file \"%s\": read %d of %d\n"),
-					progname, ControlFilePath, r, (int) sizeof(ControlFileData));
+			fprintf(stderr, _("%s: could not read file \"%s\": read %d of %zu\n"),
+					progname, ControlFilePath, r, sizeof(ControlFileData));
 			exit(EXIT_FAILURE);
 		}
 #endif
-- 
2.18.0

0002-Add-interface-to-read-write-fsync-with-transient-fil.patchtext/x-diff; charset=us-asciiDownload
From bfabee80635269c0950864d6cd2cf59d613f2d21 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Mon, 25 Jun 2018 16:15:03 +0900
Subject: [PATCH 2/2] Add interface to read/write/fsync with transient files

The following set of routines gets added for the manipulation of
transient files:
void WriteTransientFile(int fd, char *buf, Size count, int elevel,
    const char *filename, uint32 wait_event_info);
bool ReadTransientFile(int fd, char *buf, Size count, int elevel,
    const char *filename, uint32 wait_event_info);
void SyncTransientFile(int fd, int elevel, const char *filename,
    uint32 wait_event_info);

This simplifies code related to replication slots, 2PC files, relation
mapper files and snapshot builds:
- Centralize errno handling for transient files with ENOSPC for write(2)
and read count for read(2)
- Wait events have to be defined, so those would unlikely get forgotten
in the future.
- Error handling for CloseTransientFile in code paths is centralized.
---
 src/backend/access/transam/twophase.c       |  25 +----
 src/backend/replication/logical/snapbuild.c | 110 ++------------------
 src/backend/replication/slot.c              |  46 +-------
 src/backend/storage/file/fd.c               |  97 ++++++++++++++++-
 src/backend/utils/cache/relmapper.c         |  40 ++-----
 src/include/storage/fd.h                    |  10 +-
 6 files changed, 128 insertions(+), 200 deletions(-)

diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 0c99b33664..557261fc31 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -1219,7 +1219,6 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 	uint32		crc_offset;
 	pg_crc32c	calc_crc,
 				file_crc;
-	int			r;
 
 	TwoPhaseFilePath(path, xid);
 
@@ -1275,28 +1274,10 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 	 */
 	buf = (char *) palloc(stat.st_size);
 
-	pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_READ);
-	r = read(fd, buf, stat.st_size);
-	if (r != stat.st_size)
+	if (!ReadTransientFile(fd, buf, stat.st_size,
+						   give_warnings ? WARNING : DEBUG3, path,
+						   WAIT_EVENT_TWOPHASE_FILE_READ))
 	{
-		int			save_errno = errno;
-
-		pgstat_report_wait_end();
-		CloseTransientFile(fd);
-		if (give_warnings)
-		{
-			if (r < 0)
-			{
-				errno = save_errno;
-				ereport(WARNING,
-						(errcode_for_file_access(),
-						 errmsg("could not read file \"%s\": %m", path)));
-			}
-			else
-				ereport(WARNING,
-						(errmsg("could not read file \"%s\": read %d of %zu",
-								path, r, stat.st_size)));
-		}
 		pfree(buf);
 		return NULL;
 	}
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index 61bc9e8f14..05b74a61a3 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -1609,20 +1609,8 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
 		ereport(ERROR,
 				(errmsg("could not open file \"%s\": %m", path)));
 
-	pgstat_report_wait_start(WAIT_EVENT_SNAPBUILD_WRITE);
-	if ((write(fd, ondisk, needed_length)) != needed_length)
-	{
-		int			save_errno = errno;
-
-		CloseTransientFile(fd);
-
-		/* if write didn't set errno, assume problem is no disk space */
-		errno = save_errno ? save_errno : ENOSPC;
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not write to file \"%s\": %m", tmppath)));
-	}
-	pgstat_report_wait_end();
+	WriteTransientFile(fd, (char *) ondisk, needed_length, ERROR, tmppath,
+					   WAIT_EVENT_SNAPBUILD_WRITE);
 
 	/*
 	 * fsync the file before renaming so that even if we crash after this we
@@ -1686,7 +1674,6 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 	int			fd;
 	char		path[MAXPGPATH];
 	Size		sz;
-	int			readBytes;
 	pg_crc32c	checksum;
 
 	/* no point in loading a snapshot if we're already there */
@@ -1716,29 +1703,9 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 	fsync_fname(path, false);
 	fsync_fname("pg_logical/snapshots", true);
 
-
 	/* read statically sized portion of snapshot */
-	pgstat_report_wait_start(WAIT_EVENT_SNAPBUILD_READ);
-	readBytes = read(fd, &ondisk, SnapBuildOnDiskConstantSize);
-	pgstat_report_wait_end();
-	if (readBytes != SnapBuildOnDiskConstantSize)
-	{
-		int			save_errno = errno;
-
-		CloseTransientFile(fd);
-
-		if (readBytes < 0)
-		{
-			errno = save_errno;
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not read file \"%s\": %m", path)));
-		}
-		else
-			ereport(ERROR,
-					(errmsg("could not read file \"%s\": read %d of %zu",
-							path, readBytes, SnapBuildOnDiskConstantSize)));
-	}
+	(void) ReadTransientFile(fd, (char *) &ondisk, SnapBuildOnDiskConstantSize,
+					  ERROR, path, WAIT_EVENT_SNAPBUILD_READ);
 
 	if (ondisk.magic != SNAPBUILD_MAGIC)
 		ereport(ERROR,
@@ -1756,80 +1723,23 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 				SnapBuildOnDiskConstantSize - SnapBuildOnDiskNotChecksummedSize);
 
 	/* read SnapBuild */
-	pgstat_report_wait_start(WAIT_EVENT_SNAPBUILD_READ);
-	readBytes = read(fd, &ondisk.builder, sizeof(SnapBuild));
-	pgstat_report_wait_end();
-	if (readBytes != sizeof(SnapBuild))
-	{
-		int			save_errno = errno;
-
-		CloseTransientFile(fd);
-
-		if (readBytes < 0)
-		{
-			errno = save_errno;
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not read file \"%s\": %m", path)));
-		}
-		else
-			ereport(ERROR,
-					(errmsg("could not read file \"%s\": read %d of %zu",
-							path, readBytes, sizeof(SnapBuild))));
-	}
+	(void) ReadTransientFile(fd, (char *) &ondisk.builder, sizeof(SnapBuild),
+					  ERROR, path, WAIT_EVENT_SNAPBUILD_READ);
 	COMP_CRC32C(checksum, &ondisk.builder, sizeof(SnapBuild));
 
 	/* restore running xacts (dead, but kept for backward compat) */
 	sz = sizeof(TransactionId) * ondisk.builder.was_running.was_xcnt_space;
 	ondisk.builder.was_running.was_xip =
 		MemoryContextAllocZero(builder->context, sz);
-	pgstat_report_wait_start(WAIT_EVENT_SNAPBUILD_READ);
-	readBytes = read(fd, ondisk.builder.was_running.was_xip, sz);
-	pgstat_report_wait_end();
-	if (readBytes != sz)
-	{
-		int			save_errno = errno;
-
-		CloseTransientFile(fd);
-
-		if (readBytes < 0)
-		{
-			errno = save_errno;
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not read file \"%s\": %m", path)));
-		}
-		else
-			ereport(ERROR,
-					(errmsg("could not read file \"%s\": read %d of %zu",
-							path, readBytes, sz)));
-	}
+	(void) ReadTransientFile(fd, (char *) ondisk.builder.was_running.was_xip, sz,
+					  ERROR, path, WAIT_EVENT_SNAPBUILD_READ);
 	COMP_CRC32C(checksum, ondisk.builder.was_running.was_xip, sz);
 
 	/* restore committed xacts information */
 	sz = sizeof(TransactionId) * ondisk.builder.committed.xcnt;
 	ondisk.builder.committed.xip = MemoryContextAllocZero(builder->context, sz);
-	pgstat_report_wait_start(WAIT_EVENT_SNAPBUILD_READ);
-	readBytes = read(fd, ondisk.builder.committed.xip, sz);
-	pgstat_report_wait_end();
-	if (readBytes != sz)
-	{
-		int			save_errno = errno;
-
-		CloseTransientFile(fd);
-
-		if (readBytes < 0)
-		{
-			errno = save_errno;
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not read file \"%s\": %m", path)));
-		}
-		else
-			ereport(ERROR,
-					(errmsg("could not read file \"%s\": read %d of %zu",
-							path, readBytes, sz)));
-	}
+	(void) ReadTransientFile(fd, (char *) ondisk.builder.committed.xip, sz,
+					  ERROR, path, WAIT_EVENT_SNAPBUILD_READ);
 	COMP_CRC32C(checksum, ondisk.builder.committed.xip, sz);
 
 	CloseTransientFile(fd);
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index afbaf8c80d..818b13e973 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -1354,7 +1354,6 @@ RestoreSlotFromDisk(const char *name)
 	char		path[MAXPGPATH + 22];
 	int			fd;
 	bool		restored = false;
-	int			readBytes;
 	pg_crc32c	checksum;
 
 	/* no need to lock here, no concurrent access allowed yet */
@@ -1405,25 +1404,8 @@ RestoreSlotFromDisk(const char *name)
 	END_CRIT_SECTION();
 
 	/* read part of statefile that's guaranteed to be version independent */
-	pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_READ);
-	readBytes = read(fd, &cp, ReplicationSlotOnDiskConstantSize);
-	pgstat_report_wait_end();
-	if (readBytes != ReplicationSlotOnDiskConstantSize)
-	{
-		int			saved_errno = errno;
-
-		CloseTransientFile(fd);
-		errno = saved_errno;
-		if (readBytes < 0)
-			ereport(PANIC,
-					(errcode_for_file_access(),
-					 errmsg("could not read file \"%s\": %m", path)));
-		else
-			ereport(PANIC,
-					(errmsg("could not read file \"%s\": read %d of %zu",
-							path, readBytes,
-							ReplicationSlotOnDiskConstantSize)));
-	}
+	(void) ReadTransientFile(fd, (char *) &cp, ReplicationSlotOnDiskConstantSize,
+							 PANIC, path, WAIT_EVENT_REPLICATION_SLOT_READ);
 
 	/* verify magic */
 	if (cp.magic != SLOT_MAGIC)
@@ -1447,27 +1429,9 @@ RestoreSlotFromDisk(const char *name)
 						path, cp.length)));
 
 	/* Now that we know the size, read the entire file */
-	pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_READ);
-	readBytes = read(fd,
-					 (char *) &cp + ReplicationSlotOnDiskConstantSize,
-					 cp.length);
-	pgstat_report_wait_end();
-	if (readBytes != cp.length)
-	{
-		int			saved_errno = errno;
-
-		CloseTransientFile(fd);
-		errno = saved_errno;
-		if (readBytes < 0)
-			ereport(PANIC,
-					(errcode_for_file_access(),
-					 errmsg("could not read file \"%s\": %m", path)));
-		else
-			ereport(PANIC,
-					(errmsg("could not read file \"%s\": read %d of %zu",
-							path, readBytes, (Size) cp.length)));
-	}
-
+	(void) ReadTransientFile(fd, (char *) &cp + ReplicationSlotOnDiskConstantSize,
+							 cp.length, PANIC, path,
+							 WAIT_EVENT_REPLICATION_SLOT_READ);
 	CloseTransientFile(fd);
 
 	/* now verify the CRC */
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 8dd51f1767..fba7774ddc 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -47,8 +47,9 @@
  * ownership mechanism that provides automatic cleanup for shared files when
  * the last of a group of backends detaches.
  *
- * AllocateFile, AllocateDir, OpenPipeStream and OpenTransientFile are
- * wrappers around fopen(3), opendir(3), popen(3) and open(2), respectively.
+ * AllocateFile, AllocateDir, OpenPipeStream, OpenTransientFile,
+ * WriteTransientFile and ReadTransientFile are wrappers around fopen(3),
+ * opendir(3), popen(3), open(2), write(2) and read(2) respectively.
  * They behave like the corresponding native functions, except that the handle
  * is registered with the current subtransaction, and will be automatically
  * closed at abort. These are intended mainly for short operations like
@@ -2480,6 +2481,98 @@ TryAgain:
 	return NULL;
 }
 
+/*
+ * Write to a file which has been opened using OpenTransientFile or
+ * OpenTransientFilePerm.  Equivalent to write(2).
+ */
+void
+WriteTransientFile(int fd, char *buf, Size count, int elevel,
+				   const char *filename, uint32 wait_event_info)
+{
+	int			r;
+
+	pgstat_report_wait_start(wait_event_info);
+	r = write(fd, buf, count);
+	pgstat_report_wait_end();
+
+	if (r != count)
+	{
+		int         save_errno = errno;
+
+		(void) CloseTransientFile(fd);
+
+		/* if write didn't set errno, assume problem is no disk space */
+		errno = save_errno ? save_errno : ENOSPC;
+		ereport(elevel,
+				(errcode_for_file_access(),
+				 errmsg("could not write to file \"%s\": %m", filename)));
+	}
+}
+
+/*
+ * Read from a file which has been opened using OpenTransientFile or
+ * OpenTransientFilePerm.  Equivalent to read(2).  Returns true on
+ * success and false on failure.
+ */
+bool
+ReadTransientFile(int fd, char *buf, Size count, int elevel,
+				  const char *filename, uint32 wait_event_info)
+{
+	int			r;
+
+	pgstat_report_wait_start(wait_event_info);
+	r = read(fd, buf, count);
+	pgstat_report_wait_end();
+
+	if (r != count)
+	{
+		int         save_errno = errno;
+
+		CloseTransientFile(fd);
+
+		if (r < 0)
+		{
+			errno = save_errno;
+			ereport(elevel,
+					(errcode_for_file_access(),
+					 errmsg("could not read file \"%s\": %m", filename)));
+		}
+		else
+			ereport(elevel,
+					(errmsg("could not read file \"%s\": read %d of %zu",
+							filename, r, count)));
+		return false;
+	}
+
+	return true;
+}
+
+/*
+ * Write to a file which has been opened using OpenTransientFile or
+ * OpenTransientFilePerm.  Equivalent to fsync(2).
+ */
+void
+SyncTransientFile(int fd, int elevel, const char *filename,
+				  uint32 wait_event_info)
+{
+	int			status;
+
+	pgstat_report_wait_start(wait_event_info);
+	status = pg_fsync(fd);
+	pgstat_report_wait_end();
+
+	if (status != 0)
+	{
+		int         save_errno = errno;
+
+		(void) CloseTransientFile(fd);
+		errno = save_errno;
+		ereport(elevel,
+				(errcode_for_file_access(),
+				 errmsg("could not fsync file \"%s\": %m", filename)));
+	}
+}
+
 /*
  * Free an AllocateDesc of any type.
  *
diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c
index 2d31f9f912..e6eff58d40 100644
--- a/src/backend/utils/cache/relmapper.c
+++ b/src/backend/utils/cache/relmapper.c
@@ -629,7 +629,6 @@ load_relmap_file(bool shared)
 	char		mapfilename[MAXPGPATH];
 	pg_crc32c	crc;
 	int			fd;
-	int			r;
 
 	if (shared)
 	{
@@ -659,20 +658,8 @@ load_relmap_file(bool shared)
 	 * look, the sinval signaling mechanism will make us re-read it before we
 	 * are able to access any relation that's affected by the change.
 	 */
-	pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_READ);
-	r = read(fd, map, sizeof(RelMapFile));
-	if (r != sizeof(RelMapFile))
-	{
-		if (r < 0)
-			ereport(FATAL,
-					(errcode_for_file_access(),
-					 errmsg("could not read file \"%s\": %m", mapfilename)));
-		else
-			ereport(FATAL,
-					(errmsg("could not read file \"%s\": read %d of %zu",
-							mapfilename, r, sizeof(RelMapFile))));
-	}
-	pgstat_report_wait_end();
+	(void) ReadTransientFile(fd, (char *) map, sizeof(RelMapFile), FATAL,
+							 mapfilename, WAIT_EVENT_RELATION_MAP_READ);
 
 	CloseTransientFile(fd);
 
@@ -782,18 +769,9 @@ write_relmap_file(bool shared, RelMapFile *newmap,
 	}
 
 	errno = 0;
-	pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_WRITE);
-	if (write(fd, newmap, sizeof(RelMapFile)) != sizeof(RelMapFile))
-	{
-		/* if write didn't set errno, assume problem is no disk space */
-		if (errno == 0)
-			errno = ENOSPC;
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not write file \"%s\": %m",
-						mapfilename)));
-	}
-	pgstat_report_wait_end();
+
+	WriteTransientFile(fd, (char *) newmap, sizeof(RelMapFile), ERROR,
+					   mapfilename, WAIT_EVENT_RELATION_MAP_WRITE);
 
 	/*
 	 * We choose to fsync the data to disk before considering the task done.
@@ -801,13 +779,7 @@ write_relmap_file(bool shared, RelMapFile *newmap,
 	 * issue, but it would complicate checkpointing --- see notes for
 	 * CheckPointRelationMap.
 	 */
-	pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_SYNC);
-	if (pg_fsync(fd) != 0)
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not fsync file \"%s\": %m",
-						mapfilename)));
-	pgstat_report_wait_end();
+	SyncTransientFile(fd, ERROR, mapfilename, WAIT_EVENT_RELATION_MAP_SYNC);
 
 	if (CloseTransientFile(fd))
 		ereport(ERROR,
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index 8e7c9728f4..4309d9da95 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -34,7 +34,9 @@
  *
  * Likewise, use AllocateDir/FreeDir, not opendir/closedir, to allocate
  * open directories (DIR*), and OpenTransientFile/CloseTransient File for an
- * unbuffered file descriptor.
+ * unbuffered file descriptor.  WriteTransientFile should be used instead
+ * of write(2), ReadTransientFile instead of read(2), and SyncTransientFile
+ * instead of fsync(2).
  */
 #ifndef FD_H
 #define FD_H
@@ -105,6 +107,12 @@ extern int	FreeDir(DIR *dir);
 /* Operations to allow use of a plain kernel FD, with automatic cleanup */
 extern int	OpenTransientFile(const char *fileName, int fileFlags);
 extern int	OpenTransientFilePerm(const char *fileName, int fileFlags, mode_t fileMode);
+extern void WriteTransientFile(int fd, char *buf, Size count, int elevel,
+							   const char *filename, uint32 wait_event_info);
+extern bool ReadTransientFile(int fd, char *buf, Size count, int elevel,
+							  const char *filename, uint32 wait_event_info);
+extern void SyncTransientFile(int fd, int elevel,  const char *filename,
+							  uint32 wait_event_info);
 extern int	CloseTransientFile(int fd);
 
 /* If you've really really gotta have a plain kernel FD, use this */
-- 
2.18.0

#24Michael Paquier
michael@paquier.xyz
In reply to: Michael Paquier (#23)
Re: Fix some error handling for read() and errno

On Sat, Jul 14, 2018 at 03:37:56PM +0900, Michael Paquier wrote:

For now, I think that just moving forward with 0001, and then revisit
0002 once the other 2PC patch is settled makes the most sense. On the
other thread, the current 2PC behavior can create silent data loss so
I would like to back-patch it, so that would be less work.

Are there any objections with this plan? If none, then I would like to
move on with 0001 as there is clearly a consensus to simplify the work
of translators and to clean up the error code paths for read() calls.
Let's sort of the rest after the 2PC code paths are addressed.
--
Michael

#25Alvaro Herrera
alvherre@2ndquadrant.com
In reply to: Michael Paquier (#24)
Re: Fix some error handling for read() and errno

On 2018-Jul-16, Michael Paquier wrote:

On Sat, Jul 14, 2018 at 03:37:56PM +0900, Michael Paquier wrote:

For now, I think that just moving forward with 0001, and then revisit
0002 once the other 2PC patch is settled makes the most sense. On the
other thread, the current 2PC behavior can create silent data loss so
I would like to back-patch it, so that would be less work.

Are there any objections with this plan? If none, then I would like to
move on with 0001 as there is clearly a consensus to simplify the work
of translators and to clean up the error code paths for read() calls.
Let's sort of the rest after the 2PC code paths are addressed.

No objection here -- incremental progress is better than none.

--
�lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#26Michael Paquier
michael@paquier.xyz
In reply to: Alvaro Herrera (#25)
Re: Fix some error handling for read() and errno

On Mon, Jul 16, 2018 at 04:04:12PM -0400, Alvaro Herrera wrote:

No objection here -- incremental progress is better than none.

Thanks. I have pushed 0001 now. I have found some more work which
could be done, for which I'll spawn a new thread.
--
Michael