Adding TCP_USER_TIMEOUT support for libpq/psqlodbc
Hello PostgreSQL Community!
Not long ago I faced the situation concerning ODBC/libpq client hanging in
case of some network problems.
I had a discussion regarding this issue within pgsql-odbc@postgresql.org
and got some suggestions.
Here is this discussion:
/messages/by-id/OF33DF00A3.D6444835-ON432582C3.003EA7C5-432582C3.0045562B@iba.by
In a few words the suggestion was to use pqopt keepalive options for ODBC
configuration for example as follows:
pqopt = keepalives=1 keepalives_idle=5 keepalives_count=1
keepalives_interval=1
But under some circumstances it can be not reliable and the connection can
loose its keepalives properties and it can remain hanging.
Here is a quote from our discussion:
Hmm it seems keepalive stops while waiting for ack.
Therefore it's a matter of retransmission control
You can use TCP_USER_TIMEOUT on linux but the option is not used in libpq.
In my opinion it make sense to add the support of TCP_USER_TIMEOUT socket
option to libpq/psqlodbc connection.
The attachment contains a patch with the source code changes regarding
this issue.
In my implementation it can be configured by new keepalives_user_timeout
option within pqopt parameter.
Best regards,
Andrei Yahorau
Attachments:
0001_TCP_USER_TIMEOUT_libpq-int.patchapplication/octet-stream; name=0001_TCP_USER_TIMEOUT_libpq-int.patchDownload
--- ./old/postgres/src/interfaces/libpq/libpq-int.h 2018-07-19 19:18:19.028995039 +0300
+++ ./new/postgres/src/interfaces/libpq/libpq-int.h 2018-07-20 11:30:24.540155942 +0300
@@ -349,6 +349,11 @@
* retransmits */
char *keepalives_count; /* maximum number of TCP keepalive
* retransmits */
+ char *keepalives_user_timeout; /*
+ * maximum amount of time in milliseconds that transmitted
+ * data may remain unacknowledged before TCP will forcibly
+ * close the corresponding connection
+ */
char *scram_channel_binding; /* SCRAM channel binding type */
char *sslmode; /* SSL mode (require,prefer,allow,disable) */
char *sslcompression; /* SSL compression (0 or 1) */
0001_TCP_USER_TIMEOUT_fe-connect.patchapplication/octet-stream; name=0001_TCP_USER_TIMEOUT_fe-connect.patchDownload
--- ./old/postgres/src/interfaces/libpq/fe-connect.c 2018-07-19 19:18:14.436766664 +0300
+++ ./new/postgres/src/interfaces/libpq/fe-connect.c 2018-07-20 11:31:42.040486688 +0300
@@ -264,6 +264,10 @@
"TCP-Keepalives-Count", "", 10, /* strlen(INT32_MAX) == 10 */
offsetof(struct pg_conn, keepalives_count)},
+ {"keepalives_user_timeout", NULL, NULL, NULL,
+ "TCP-User-Timeout", "", 10, /* strlen(INT32_MAX) == 10 */
+ offsetof(struct pg_conn, keepalives_user_timeout)},
+
{"scram_channel_binding", NULL, DefaultSCRAMChannelBinding, NULL,
"SCRAM-Channel-Binding", "D",
21, /* sizeof("tls-server-end-point") == 21 */
@@ -1628,6 +1632,38 @@
return 1;
}
+
+/*
+ * Set the RTP_USER_TIMEOUT
+ */
+static int
+setKeepalivesTcpUserTimeout(PGconn *conn)
+{
+ int timeout;
+
+ if (conn->keepalives_user_timeout == NULL)
+ return 1;
+
+ timeout = atoi(conn->keepalives_user_timeout);
+ if (timeout < 0)
+ timeout = 0;
+
+#ifdef TCP_USER_TIMEOUT
+ if (setsockopt(conn->sock, IPPROTO_TCP, TCP_USER_TIMEOUT,
+ (char *) &timeout, sizeof(timeout)) < 0 )
+ {
+ char sebuf[256];
+ appendPQExpBuffer(&conn->errorMessage,
+ libpq_gettext("seckopt(%s) failed: %s \n"),
+ "TCP_USER_TIMEOUT",
+ SOCK_STRERROR(SOCK_ERRNO, sebuf, sizeof(sebuf)));
+ return 0;
+ }
+#endif
+
+ return 1;
+}
+
#else /* WIN32 */
#ifdef SIO_KEEPALIVE_VALS
/*
@@ -1751,7 +1787,7 @@
if (ret || !ch->addrlist)
appendPQExpBuffer(&conn->errorMessage,
libpq_gettext("could not parse network address \"%s\": %s\n"),
- ch->host, gai_strerror(ret));
+ ch->hostaddr, gai_strerror(ret));
break;
case CHT_UNIX_SOCKET:
@@ -2185,7 +2221,8 @@
}
else if (!setKeepalivesIdle(conn)
|| !setKeepalivesInterval(conn)
- || !setKeepalivesCount(conn))
+ || !setKeepalivesCount(conn)
+ || !setKeepalivesTcpUserTimeout(conn))
err = 1;
#else /* WIN32 */
#ifdef SIO_KEEPALIVE_VALS