Adding TCP_USER_TIMEOUT support for libpq/psqlodbc

Started by Nonameover 7 years ago1 messages
#1Noname
AYahorau@ibagroup.eu
2 attachment(s)

Hello PostgreSQL Community!

Not long ago I faced the situation concerning ODBC/libpq client hanging in
case of some network problems.
I had a discussion regarding this issue within pgsql-odbc@postgresql.org
and got some suggestions.
Here is this discussion:
/messages/by-id/OF33DF00A3.D6444835-ON432582C3.003EA7C5-432582C3.0045562B@iba.by

In a few words the suggestion was to use pqopt keepalive options for ODBC
configuration for example as follows:
pqopt = keepalives=1 keepalives_idle=5 keepalives_count=1
keepalives_interval=1

But under some circumstances it can be not reliable and the connection can
loose its keepalives properties and it can remain hanging.

Here is a quote from our discussion:
Hmm it seems keepalive stops while waiting for ack.
Therefore it's a matter of retransmission control
You can use TCP_USER_TIMEOUT on linux but the option is not used in libpq.

In my opinion it make sense to add the support of TCP_USER_TIMEOUT socket
option to libpq/psqlodbc connection.
The attachment contains a patch with the source code changes regarding
this issue.
In my implementation it can be configured by new keepalives_user_timeout
option within pqopt parameter.

Best regards,
Andrei Yahorau

Attachments:

0001_TCP_USER_TIMEOUT_libpq-int.patchapplication/octet-stream; name=0001_TCP_USER_TIMEOUT_libpq-int.patchDownload
--- ./old/postgres/src/interfaces/libpq/libpq-int.h	2018-07-19 19:18:19.028995039 +0300
+++ ./new/postgres/src/interfaces/libpq/libpq-int.h	2018-07-20 11:30:24.540155942 +0300
@@ -349,6 +349,11 @@
 										 * retransmits */
 	char	   *keepalives_count;	/* maximum number of TCP keepalive
 									 * retransmits */
+	char	   *keepalives_user_timeout;		 /*
+								 * maximum amount of time in milliseconds that transmitted
+								 * data may remain unacknowledged before TCP will forcibly
+								 * close the corresponding connection
+								 */
 	char	   *scram_channel_binding;	/* SCRAM channel binding type */
 	char	   *sslmode;		/* SSL mode (require,prefer,allow,disable) */
 	char	   *sslcompression; /* SSL compression (0 or 1) */
0001_TCP_USER_TIMEOUT_fe-connect.patchapplication/octet-stream; name=0001_TCP_USER_TIMEOUT_fe-connect.patchDownload
--- ./old/postgres/src/interfaces/libpq/fe-connect.c	2018-07-19 19:18:14.436766664 +0300
+++ ./new/postgres/src/interfaces/libpq/fe-connect.c	2018-07-20 11:31:42.040486688 +0300
@@ -264,6 +264,10 @@
 		"TCP-Keepalives-Count", "", 10, /* strlen(INT32_MAX) == 10 */
 	offsetof(struct pg_conn, keepalives_count)},
 
+	{"keepalives_user_timeout", NULL, NULL, NULL,
+		"TCP-User-Timeout", "", 10, /* strlen(INT32_MAX) == 10 */
+	offsetof(struct pg_conn, keepalives_user_timeout)},
+
 	{"scram_channel_binding", NULL, DefaultSCRAMChannelBinding, NULL,
 		"SCRAM-Channel-Binding", "D",
 		21,						/* sizeof("tls-server-end-point") == 21 */
@@ -1628,6 +1632,38 @@
 
 	return 1;
 }
+
+/*
+ * Set the RTP_USER_TIMEOUT
+ */
+static int
+setKeepalivesTcpUserTimeout(PGconn *conn)
+{
+	int timeout;
+
+	if (conn->keepalives_user_timeout == NULL)
+		return 1;
+
+	timeout = atoi(conn->keepalives_user_timeout);
+	if (timeout < 0)
+		timeout = 0;
+
+#ifdef TCP_USER_TIMEOUT
+	if (setsockopt(conn->sock, IPPROTO_TCP, TCP_USER_TIMEOUT,
+				  (char *) &timeout, sizeof(timeout)) < 0 )
+	{
+		char sebuf[256];
+		appendPQExpBuffer(&conn->errorMessage,
+						  libpq_gettext("seckopt(%s) failed: %s \n"),
+						  "TCP_USER_TIMEOUT",
+						  SOCK_STRERROR(SOCK_ERRNO, sebuf, sizeof(sebuf)));
+		return 0;
+	}
+#endif
+
+	return 1;
+}
+
 #else							/* WIN32 */
 #ifdef SIO_KEEPALIVE_VALS
 /*
@@ -1751,7 +1787,7 @@
 				if (ret || !ch->addrlist)
 					appendPQExpBuffer(&conn->errorMessage,
 									  libpq_gettext("could not parse network address \"%s\": %s\n"),
-									  ch->host, gai_strerror(ret));
+									  ch->hostaddr, gai_strerror(ret));
 				break;
 
 			case CHT_UNIX_SOCKET:
@@ -2185,7 +2221,8 @@
 						}
 						else if (!setKeepalivesIdle(conn)
 								 || !setKeepalivesInterval(conn)
-								 || !setKeepalivesCount(conn))
+								 || !setKeepalivesCount(conn)
+								 || !setKeepalivesTcpUserTimeout(conn))
 							err = 1;
 #else							/* WIN32 */
 #ifdef SIO_KEEPALIVE_VALS