>From 53c1c1c6aebbae799fe06570a98143c899b7c5b5 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Sun, 28 Sep 2014 00:22:39 +0200
Subject: [PATCH 3/7] Process 'die' interrupts while reading/writing from the
 client socket.

Up to now it was impossible to terminate a backend that was trying to
send/recv data to/from the client when the socket's buffer was already
full/empty. While the send/recv calls itself might have gotten
interrupted by signals on some platforms, we just immediately retried.

That could lead to situations where a backend couldn't be terminated
after the client died without the connection being closed.

The problem was far more likely to be hit when sending data than when
reading, as when reading a command from the client and during
authentication, we already processed interrupt. That primarily left
COPY FROM STDIN as being problematic.

Change things so that that we process 'die' events immediately when
the appropriate signal arrives during both recv/send. We can't react
to query cancels at that point, because we might loose sync with the
client as we could be in the middle of writing a message.

We don't interrupt writes if the write buffer isn't full as that would
lead to far fewer error messages reaching clients.

Per discussion with Kyotaro HORIGUCHI and Heikki Linnakangas

Discussion: 20140927191243.GD5423@alap3.anarazel.de
---
 src/backend/libpq/be-secure-openssl.c | 24 +++++++++++----
 src/backend/libpq/be-secure.c         | 58 ++++++++++++++++++++++-------------
 src/backend/tcop/postgres.c           | 50 ++++++++++++++++++++++++++++--
 src/include/tcop/tcopprot.h           |  3 +-
 4 files changed, 105 insertions(+), 30 deletions(-)

diff --git a/src/backend/libpq/be-secure-openssl.c b/src/backend/libpq/be-secure-openssl.c
index e4c1a25..08e3422 100644
--- a/src/backend/libpq/be-secure-openssl.c
+++ b/src/backend/libpq/be-secure-openssl.c
@@ -556,7 +556,7 @@ rloop:
 			if (latchret & WL_LATCH_SET)
 			{
 				ResetLatch(MyLatch);
-				ProcessClientReadInterrupt();  /* preserves errno */
+				ProcessClientReadInterrupt(true);  /* preserves errno */
 			}
 			goto rloop;
 		case SSL_ERROR_SYSCALL:
@@ -598,6 +598,7 @@ be_tls_write(Port *port, void *ptr, size_t len)
 	ssize_t		n;
 	int			err;
 	int			waitfor;
+	int			latchret;
 
 	/*
 	 * If SSL renegotiations are enabled and we're getting close to the
@@ -662,16 +663,27 @@ wloop:
 		case SSL_ERROR_WANT_READ:
 		case SSL_ERROR_WANT_WRITE:
 
+			waitfor = WL_LATCH_SET;
+
 			if (err == SSL_ERROR_WANT_READ)
-				waitfor = WL_SOCKET_READABLE;
+				waitfor |= WL_SOCKET_READABLE;
 			else
-				waitfor = WL_SOCKET_WRITEABLE;
+				waitfor |= WL_SOCKET_WRITEABLE;
+
+			latchret = WaitLatchOrSocket(MyLatch, waitfor, port->sock, 0);
 
-			WaitLatchOrSocket(MyLatch, waitfor, port->sock, 0);
 			/*
-			 * XXX: We'll, at some later point, likely want to add interrupt
-			 * processing here.
+			 * Check for interrupts here, in addition to secure_write(),
+			 * because an interrupted write in secure_raw_write() will return
+			 * here, and we cannot return to secure_write() until we've
+			 * written something.
 			 */
+			if (latchret & WL_LATCH_SET)
+			{
+				ResetLatch(MyLatch);
+				ProcessClientWriteInterrupt(true); /* preserves errno */
+			}
+
 			goto wloop;
 		case SSL_ERROR_SYSCALL:
 			/* leave it to caller to ereport the value of errno */
diff --git a/src/backend/libpq/be-secure.c b/src/backend/libpq/be-secure.c
index b90ab0e..c2c1842 100644
--- a/src/backend/libpq/be-secure.c
+++ b/src/backend/libpq/be-secure.c
@@ -140,14 +140,27 @@ retry:
 		n = secure_raw_read(port, ptr, len);
 	}
 
-	/* Process interrupts that happened while (or before) receiving. */
-	ProcessClientReadInterrupt(); /* preserves errno */
-
 	/* retry after processing interrupts */
 	if (n < 0 && errno == EINTR)
 	{
+		/*
+		 * We tried to read data, the socket was empty, and we were
+		 * interrupted while waiting for readability. We only process
+		 * interrupts if we got interrupted while reading and when in blocking
+		 * mode. In other cases it's better to allow the interrupts to be
+		 * handled at higher layers.
+		 */
+		ProcessClientReadInterrupt(!port->noblock); /* preserves errno */
 		goto retry;
 	}
+
+	/*
+	 * Process interrupts that happened while (or before) receiving. Note that
+	 * we signal that we're not blocking, which will prevent some types of
+	 * interrupts from being processed.
+	 */
+	ProcessClientReadInterrupt(false);
+
 	return n;
 }
 
@@ -224,18 +237,17 @@ retry:
 		n = secure_raw_write(port, ptr, len);
 	}
 
-	/*
-	 * XXX: We'll, at some later point, likely want to add interrupt
-	 * processing here.
-	 */
-
-	/*
-	 * Retry after processing interrupts. This can be triggered even though we
-	 * don't check for latch set's during writing yet, because SSL
-	 * renegotiations might have required reading from the socket.
-	 */
+	/* retry after processing interrupts */
 	if (n < 0 && errno == EINTR)
 	{
+		/*
+		 * We tried to send data, the socket was full, and we were interrupted
+		 * while waiting for writability. We only process interrupts if we got
+		 * interrupted while writing and when in blocking mode. In other cases
+		 * it's better to allow the interrupts to be handled at higher layers.
+		 */
+		ProcessClientWriteInterrupt(!port->noblock);
+
 		goto retry;
 	}
 
@@ -262,17 +274,21 @@ wloop:
 		int		w;
 		int		save_errno = errno;
 
-		/*
-		 * We probably want to check for latches being set at some point
-		 * here. That'd allow us to handle interrupts while blocked on
-		 * writes. If set we'd not retry directly, but return. That way we
-		 * don't do anything while (possibly) inside a ssl library.
-		 */
 		w = WaitLatchOrSocket(MyLatch,
-							  WL_SOCKET_WRITEABLE,
+							  WL_LATCH_SET | WL_SOCKET_WRITEABLE,
 							  port->sock, 0);
 
-		if (w & WL_SOCKET_WRITEABLE)
+		if (w & WL_LATCH_SET)
+		{
+			ResetLatch(MyLatch);
+			/*
+			 * Force a return, so interrupts can be processed when not
+			 * (possibly) underneath a ssl library.
+			 */
+			errno = EINTR;
+			return -1;
+		}
+		else if (w & WL_SOCKET_WRITEABLE)
 		{
 			goto wloop;
 		}
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 1f26c6c..060f62a 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -318,7 +318,7 @@ interactive_getc(void)
 
 	c = getc(stdin);
 
-	ProcessClientReadInterrupt();
+	ProcessClientReadInterrupt(true);
 
 	return c;
 }
@@ -529,7 +529,7 @@ ReadCommand(StringInfo inBuf)
  * Must preserve errno!
  */
 void
-ProcessClientReadInterrupt(void)
+ProcessClientReadInterrupt(bool blocked)
 {
 	int			save_errno = errno;
 
@@ -546,10 +546,56 @@ ProcessClientReadInterrupt(void)
 		if (notifyInterruptPending)
 			ProcessNotifyInterrupt();
 	}
+	else if (ProcDiePending && blocked)
+	{
+		/*
+		 * We're dying. It's safe (and sane) to handle that now.
+		 */
+		CHECK_FOR_INTERRUPTS();
+	}
 
 	errno = save_errno;
 }
 
+/*
+ * ProcessClientWriteInterrupt() - Process interrupts specific to client writes
+ *
+ * This is called just after low-level writes. That might be after the read
+ * finished successfully, or it was interrupted via interrupt. 'blocked' tells
+ * us whether the
+ *
+ * Must preserve errno!
+ */
+void
+ProcessClientWriteInterrupt(bool blocked)
+{
+	int			save_errno = errno;
+
+	Assert(InterruptHoldoffCount == 0 && CritSectionCount == 0);
+
+	/*
+	 * We only want to process the interrupt here if socket writes are
+	 * blocking to increase the chance to get an error message to the
+	 * client. If we're not blocked there'll soon be a
+	 * CHECK_FOR_INTERRUPTS(). But if we're blocked we'll never get out of
+	 * that situation if the client has died.
+	 */
+	if (ProcDiePending && blocked)
+	{
+		/*
+		 * We're dying. It's safe (and sane) to handle that now. But we don't
+		 * want to send the client the error message as that a) would possibly
+		 * block again b) would possibly lead to sending an error message to
+		 * the client, while we already started to send something else.
+		 */
+		if (whereToSendOutput == DestRemote)
+			whereToSendOutput = DestNone;
+
+		CHECK_FOR_INTERRUPTS();
+	}
+
+	errno = save_errno;
+}
 
 /*
  * Do raw parsing (only).
diff --git a/src/include/tcop/tcopprot.h b/src/include/tcop/tcopprot.h
index fe8c725..3e17770 100644
--- a/src/include/tcop/tcopprot.h
+++ b/src/include/tcop/tcopprot.h
@@ -67,7 +67,8 @@ extern void StatementCancelHandler(SIGNAL_ARGS);
 extern void FloatExceptionHandler(SIGNAL_ARGS) __attribute__((noreturn));
 extern void RecoveryConflictInterrupt(ProcSignalReason reason); /* called from SIGUSR1
 																 * handler */
-extern void ProcessClientReadInterrupt(void);
+extern void ProcessClientReadInterrupt(bool blocked);
+extern void ProcessClientWriteInterrupt(bool blocked);
 
 extern void process_postgres_switches(int argc, char *argv[],
 						  GucContext ctx, const char **dbname);
-- 
2.0.0.rc2.4.g1dc51c6.dirty

