pgbench internal contention

Started by Robert Haasover 14 years ago16 messages
#1Robert Haas
robertmhaas@gmail.com
1 attachment(s)

On machines with lots of CPU cores, pgbench can start eating up a lot
of system time. Investigation reveals that the problem is with
random(), which glibc implements like this:

long int
__random ()
{
int32_t retval;
__libc_lock_lock (lock);
(void) __random_r (&unsafe_state, &retval);
__libc_lock_unlock (lock);
return retval;
}
weak_alias (__random, random)

Rather obviously, if you're running enough pgbench threads, you're
going to have a pretty ugly point of contention there. On the 32-core
machine provided by Nate Boley, with my usual 5-minute SELECT-only
test, lazy-vxid and sinval-fastmessages applied, and scale factor 100,
"time" shows that pgbench uses almost as much system time as user
time:

$ time pgbench -n -S -T 300 -c 64 -j 64
transaction type: SELECT only
scaling factor: 100
query mode: simple
number of clients: 64
number of threads: 64
duration: 300 s
number of transactions actually processed: 55319555
tps = 184396.016257 (including connections establishing)
tps = 184410.926840 (excluding connections establishing)

real 5m0.019s
user 21m10.100s
sys 17m45.480s

I patched it to use random_r() - the patch is attached - and here are
the (rather gratifying) results of that test:

$ time ./pgbench -n -S -T 300 -c 64 -j 64
transaction type: SELECT only
scaling factor: 100
query mode: simple
number of clients: 64
number of threads: 64
duration: 300 s
number of transactions actually processed: 71851589
tps = 239503.585813 (including connections establishing)
tps = 239521.816698 (excluding connections establishing)

real 5m0.016s
user 20m40.880s
sys 9m25.930s

Since a client-limited benchmark isn't very interesting, I think this
change makes sense. Thoughts? Objections? Coding style
improvements?

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

Attachments:

random_r.patchapplication/octet-stream; name=random_r.patchDownload
diff --git a/configure b/configure
index 0e537d9..f11cccf 100755
--- a/configure
+++ b/configure
@@ -18923,7 +18923,8 @@ fi
 
 
 
-for ac_func in cbrt dlopen fcvt fdatasync getifaddrs getpeerucred getrlimit memmove poll pstat readlink scandir setproctitle setsid sigprocmask symlink sysconf towlower utime utimes waitpid wcstombs wcstombs_l
+
+for ac_func in cbrt dlopen fcvt fdatasync getifaddrs getpeerucred getrlimit memmove poll pstat random_r readlink scandir setproctitle setsid sigprocmask symlink sysconf towlower utime utimes waitpid wcstombs wcstombs_l
 do
 as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
 { $as_echo "$as_me:$LINENO: checking for $ac_func" >&5
diff --git a/configure.in b/configure.in
index 150f9a5..fc1e73a 100644
--- a/configure.in
+++ b/configure.in
@@ -1192,7 +1192,7 @@ PGAC_VAR_INT_TIMEZONE
 AC_FUNC_ACCEPT_ARGTYPES
 PGAC_FUNC_GETTIMEOFDAY_1ARG
 
-AC_CHECK_FUNCS([cbrt dlopen fcvt fdatasync getifaddrs getpeerucred getrlimit memmove poll pstat readlink scandir setproctitle setsid sigprocmask symlink sysconf towlower utime utimes waitpid wcstombs wcstombs_l])
+AC_CHECK_FUNCS([cbrt dlopen fcvt fdatasync getifaddrs getpeerucred getrlimit memmove poll pstat random_r readlink scandir setproctitle setsid sigprocmask symlink sysconf towlower utime utimes waitpid wcstombs wcstombs_l])
 
 AC_REPLACE_FUNCS(fseeko)
 case $host_os in
diff --git a/contrib/pgbench/pgbench.c b/contrib/pgbench/pgbench.c
index 9892f4c..751deb1 100644
--- a/contrib/pgbench/pgbench.c
+++ b/contrib/pgbench/pgbench.c
@@ -104,6 +104,8 @@ extern int	optind;
 
 #define DEFAULT_NXACTS	10		/* default nxacts */
 
+#define RAND_STATE_BYTES 128	/* glibc supports powers of 2, 8 .. 256 */
+
 int			nxacts = 0;			/* number of transactions per client */
 int			duration = 0;		/* duration in seconds */
 
@@ -198,6 +200,10 @@ typedef struct
 	instr_time	start_time;		/* thread start time */
 	instr_time *exec_elapsed;	/* time spent executing cmds (per Command) */
 	int		   *exec_count;		/* number of cmd executions (per Command) */
+#ifdef HAVE_RANDOM_R
+	char		randstate[RAND_STATE_BYTES];	/* random state */
+	struct random_data	randbuf;				/* random data buffer */
+#endif
 } TState;
 
 #define INVALID_THREAD		((pthread_t) 0)
@@ -380,13 +386,21 @@ usage(const char *progname)
 
 /* random number generator: uniform distribution from min to max inclusive */
 static int
-getrand(int min, int max)
+getrand(TState *thread, int min, int max)
 {
+#ifdef HAVE_RANDOM_R
+	int32_t	raw;
+	random_r(&thread->randbuf, &raw);
+#else
+	long	raw;
+	raw = random();
+#endif
+
 	/*
 	 * Odd coding is so that min and max have approximately the same chance of
 	 * being selected as do numbers between them.
 	 */
-	return min + (int) (((max - min + 1) * (double) random()) / (MAX_RANDOM_VALUE + 1.0));
+	return min + (int) (((max - min + 1) * (double) raw) / (MAX_RANDOM_VALUE + 1.0));
 }
 
 /* call PQexec() and exit() on failure */
@@ -901,7 +915,7 @@ top:
 		if (commands[st->state] == NULL)
 		{
 			st->state = 0;
-			st->use_file = getrand(0, num_files - 1);
+			st->use_file = getrand(thread, 0, num_files - 1);
 			commands = sql_files[st->use_file];
 		}
 	}
@@ -1068,9 +1082,9 @@ top:
 			}
 
 #ifdef DEBUG
-			printf("min: %d max: %d random: %d\n", min, max, getrand(min, max));
+			printf("min: %d max: %d random: %d\n", min, max, getrand(thread, min, max));
 #endif
-			snprintf(res, sizeof(res), "%d", getrand(min, max));
+			snprintf(res, sizeof(res), "%d", getrand(thread, min, max));
 
 			if (!putVariable(st, argv[0], argv[1], res))
 			{
@@ -2242,6 +2256,15 @@ main(int argc, char **argv)
 		thread->tid = i;
 		thread->state = &state[nclients / nthreads * i];
 		thread->nstate = nclients / nthreads;
+#ifdef HAVE_RANDOM_R
+		/* glibc gets confused if we don't zero these */
+		memset(thread->randstate, 0, RAND_STATE_BYTES);
+		memset(&thread->randbuf, 0, sizeof(thread->randbuf));
+		initstate_r(((unsigned int) INSTR_TIME_GET_MICROSEC(start_time)) +
+					((unsigned int) i),
+					thread->randstate, RAND_STATE_BYTES,
+					&thread->randbuf);
+#endif
 
 		if (is_latencies)
 		{
@@ -2384,7 +2407,7 @@ threadRun(void *arg)
 		Command   **commands = sql_files[st->use_file];
 		int			prev_ecnt = st->ecnt;
 
-		st->use_file = getrand(0, num_files - 1);
+		st->use_file = getrand(thread, 0, num_files - 1);
 		if (!doCustom(thread, st, &result->conn_time, logfile))
 			remains--;			/* I've aborted */
 
@@ -2590,9 +2613,11 @@ pthread_create(pthread_t *thread,
 	 * output of a single random() sequence, which should be okay for our
 	 * purposes.)
 	 */
+#ifndef HAVE_RANDOM_R
 	INSTR_TIME_SET_CURRENT(start_time);
 	srandom(((unsigned int) INSTR_TIME_GET_MICROSEC(start_time)) +
 			((unsigned int) getpid()));
+#endif
 
 	ret = start_routine(arg);
 	write(th->pipes[1], ret, sizeof(TResult));
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 24b951e..8e87667 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -405,6 +405,9 @@
 /* Define to 1 if you have the `random' function. */
 #undef HAVE_RANDOM
 
+/* Define to 1 if you have the `random_r' function. */
+#undef HAVE_RANDOM_R
+
 /* Define to 1 if you have the <readline.h> header file. */
 #undef HAVE_READLINE_H
 
#2Tom Lane
tgl@sss.pgh.pa.us
In reply to: Robert Haas (#1)
Re: pgbench internal contention

Robert Haas <robertmhaas@gmail.com> writes:

On machines with lots of CPU cores, pgbench can start eating up a lot
of system time. Investigation reveals that the problem is with
random(),

Interesting.

I patched it to use random_r() - the patch is attached - and here are
the (rather gratifying) results of that test:
Since a client-limited benchmark isn't very interesting, I think this
change makes sense. Thoughts? Objections?

Portability, or rather lack of it. What about using erand48, which we
already have a dependency on (and substitute code for)?

regards, tom lane

#3Robert Haas
robertmhaas@gmail.com
In reply to: Tom Lane (#2)
Re: pgbench internal contention

On Fri, Jul 29, 2011 at 5:25 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:

Robert Haas <robertmhaas@gmail.com> writes:

On machines with lots of CPU cores, pgbench can start eating up a lot
of system time.  Investigation reveals that the problem is with
random(),

Interesting.

I patched it to use random_r() - the patch is attached - and here are
the (rather gratifying) results of that test:
Since a client-limited benchmark isn't very interesting, I think this
change makes sense.  Thoughts?  Objections?

Portability, or rather lack of it.  What about using erand48, which we
already have a dependency on (and substitute code for)?

Neither our implementation nor glibc's appears to be thread-safe, and
erand48() is deprecated according to my Linux man page:

NOTES
These functions are declared obsolete by SVID 3, which states that
rand(3) should be used instead.

glibc provides erand48_r(), and I suppose we could kludge up something
similar out of what's already in src/port?

This is also not exactly the world's most sophisticated algorithm, but
perhaps for pgbench that doesn't matter.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#4Tom Lane
tgl@sss.pgh.pa.us
In reply to: Robert Haas (#3)
Re: pgbench internal contention

Robert Haas <robertmhaas@gmail.com> writes:

On Fri, Jul 29, 2011 at 5:25 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:

Portability, or rather lack of it. �What about using erand48, which we
already have a dependency on (and substitute code for)?

Neither our implementation nor glibc's appears to be thread-safe,

I think you're confusing srand48 with erand48. The latter relies on a
caller-supplied state value, so if it's not thread-safe the caller has
only itself to blame.

regards, tom lane

#5Robert Haas
robertmhaas@gmail.com
In reply to: Tom Lane (#4)
Re: pgbench internal contention

On Sat, Jul 30, 2011 at 2:32 AM, Tom Lane <tgl@sss.pgh.pa.us> wrote:

Robert Haas <robertmhaas@gmail.com> writes:

On Fri, Jul 29, 2011 at 5:25 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:

Portability, or rather lack of it.  What about using erand48, which we
already have a dependency on (and substitute code for)?

Neither our implementation nor glibc's appears to be thread-safe,

I think you're confusing srand48 with erand48.  The latter relies on a
caller-supplied state value, so if it's not thread-safe the caller has
only itself to blame.

I may be confused, but I'm not mixing it up with srand48. On my
Fedora 12 VM, the man page says for erand48_r says, in relevant part:

SYNOPSIS
#include <stdlib.h>

int erand48_r(unsigned short xsubi[3],
struct drand48_data *buffer, double *result);

DESCRIPTION
These functions are the reentrant analogs of the functions described in
drand48(3). Instead of modifying the global random generator state,
they use the supplied data buffer.

And the glibc implementation of erand48 is this:

double
erand48 (xsubi)
unsigned short int xsubi[3];
{
double result;

(void) __erand48_r (xsubi, &__libc_drand48_data, &result);

return result;
}

On second look, I think our version is probably safe in this regard
since it appears modify only the state passed in and not anything
else.

*pokes a little more*

If I'm reading the code right, it only modifies __libc_drand48_data on
first call, so as long as we called erand48() at least once before
spawning the child threads, it would probably work. That seems pretty
fragile in the face of the fact that they explicitly state that
they're modifying the global random generator state and that you
should use erand48_r() if you want reentrant behavior. So I think if
we're going to go the erand48() route we probably ought to force
pgbench to always use our version rather than any OS-supplied version.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#6Andy Colson
andy@squeakycode.net
In reply to: Robert Haas (#1)
Re: pgbench internal contention

On 07/29/2011 04:00 PM, Robert Haas wrote:

On machines with lots of CPU cores, pgbench can start eating up a lot
of system time. Investigation reveals that the problem is with
random(), which glibc implements like this:

long int
__random ()
{
int32_t retval;
__libc_lock_lock (lock);
(void) __random_r (&unsafe_state,&retval);
__libc_lock_unlock (lock);
return retval;
}
weak_alias (__random, random)

Rather obviously, if you're running enough pgbench threads, you're
going to have a pretty ugly point of contention there. On the 32-core
machine provided by Nate Boley, with my usual 5-minute SELECT-only
test, lazy-vxid and sinval-fastmessages applied, and scale factor 100,
"time" shows that pgbench uses almost as much system time as user
time:

$ time pgbench -n -S -T 300 -c 64 -j 64
transaction type: SELECT only
scaling factor: 100
query mode: simple
number of clients: 64
number of threads: 64
duration: 300 s
number of transactions actually processed: 55319555
tps = 184396.016257 (including connections establishing)
tps = 184410.926840 (excluding connections establishing)

real 5m0.019s
user 21m10.100s
sys 17m45.480s

I patched it to use random_r() - the patch is attached - and here are
the (rather gratifying) results of that test:

$ time ./pgbench -n -S -T 300 -c 64 -j 64
transaction type: SELECT only
scaling factor: 100
query mode: simple
number of clients: 64
number of threads: 64
duration: 300 s
number of transactions actually processed: 71851589
tps = 239503.585813 (including connections establishing)
tps = 239521.816698 (excluding connections establishing)

real 5m0.016s
user 20m40.880s
sys 9m25.930s

Since a client-limited benchmark isn't very interesting, I think this
change makes sense. Thoughts? Objections? Coding style
improvements?

How much randomness do we really need for test data. What if it where changed to more of a random starting point and then autoinc'd after that. Or if there were two func's, a rand() and a next(). If your test really needs randomness use rand(), otherwise use next(), it would be way faster, and you dont really care what the number is anyway.

-Andy

#7Robert Haas
robertmhaas@gmail.com
In reply to: Andy Colson (#6)
Re: pgbench internal contention

On Jul 30, 2011, at 9:40 AM, Andy Colson <andy@squeakycode.net> wrote:

On 07/29/2011 04:00 PM, Robert Haas wrote:

On machines with lots of CPU cores, pgbench can start eating up a lot
of system time. Investigation reveals that the problem is with
random(), which glibc implements like this:

long int
__random ()
{
int32_t retval;
__libc_lock_lock (lock);
(void) __random_r (&unsafe_state,&retval);
__libc_lock_unlock (lock);
return retval;
}
weak_alias (__random, random)

Rather obviously, if you're running enough pgbench threads, you're
going to have a pretty ugly point of contention there. On the 32-core
machine provided by Nate Boley, with my usual 5-minute SELECT-only
test, lazy-vxid and sinval-fastmessages applied, and scale factor 100,
"time" shows that pgbench uses almost as much system time as user
time:

$ time pgbench -n -S -T 300 -c 64 -j 64
transaction type: SELECT only
scaling factor: 100
query mode: simple
number of clients: 64
number of threads: 64
duration: 300 s
number of transactions actually processed: 55319555
tps = 184396.016257 (including connections establishing)
tps = 184410.926840 (excluding connections establishing)

real 5m0.019s
user 21m10.100s
sys 17m45.480s

I patched it to use random_r() - the patch is attached - and here are
the (rather gratifying) results of that test:

$ time ./pgbench -n -S -T 300 -c 64 -j 64
transaction type: SELECT only
scaling factor: 100
query mode: simple
number of clients: 64
number of threads: 64
duration: 300 s
number of transactions actually processed: 71851589
tps = 239503.585813 (including connections establishing)
tps = 239521.816698 (excluding connections establishing)

real 5m0.016s
user 20m40.880s
sys 9m25.930s

Since a client-limited benchmark isn't very interesting, I think this
change makes sense. Thoughts? Objections? Coding style
improvements?

How much randomness do we really need for test data. What if it where changed to more of a random starting point and then autoinc'd after that. Or if there were two func's, a rand() and a next(). If your test really needs randomness use rand(), otherwise use next(), it would be way faster, and you dont really care what the number is anyway.

Well, I think you need at least pseudo-randomness for pgbench - reading the table in sequential order is not going to perform the same as doing random fetches against it.

...Robert

#8Robert Haas
robertmhaas@gmail.com
In reply to: Robert Haas (#5)
1 attachment(s)
Re: pgbench internal contention

On Sat, Jul 30, 2011 at 9:08 AM, Robert Haas <robertmhaas@gmail.com> wrote:

If I'm reading the code right, it only modifies __libc_drand48_data on
first call, so as long as we called erand48() at least once before
spawning the child threads, it would probably work.  That seems pretty
fragile in the face of the fact that they explicitly state that
they're modifying the global random generator state and that you
should use erand48_r() if you want reentrant behavior.  So I think if
we're going to go the erand48() route we probably ought to force
pgbench to always use our version rather than any OS-supplied version.

Attached is a try at that approach. Performance benefits are similar
to before. Same test case as in my OP on this thread, alternating
runs without and with this patch:

tps = 199133.418419 (including connections establishing)
real 5m0.017s
user 23m42.170s
sys 18m46.270s

tps = 226202.289151 (including connections establishing)
real 5m0.018s
user 22m7.520s
sys 9m54.570s

tps = 191144.247489 (including connections establishing)
real 5m0.025s
user 23m35.200s
sys 17m19.070s

tps = 226353.187955 (including connections establishing)
real 5m0.017s
user 21m42.300s
sys 10m9.820s

tps = 189602.248908 (including connections establishing)
real 5m0.044s
user 23m24.060s
sys 17m1.050s

tps = 224521.459164 (including connections establishing)
real 5m0.017s
user 22m9.620s
sys 10m22.590s

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

Attachments:

pgbench-erand48.patchapplication/octet-stream; name=pgbench-erand48.patchDownload
diff --git a/configure b/configure
index 0e537d9..edae78d 100755
--- a/configure
+++ b/configure
@@ -18923,7 +18923,8 @@ fi
 
 
 
-for ac_func in cbrt dlopen fcvt fdatasync getifaddrs getpeerucred getrlimit memmove poll pstat readlink scandir setproctitle setsid sigprocmask symlink sysconf towlower utime utimes waitpid wcstombs wcstombs_l
+
+for ac_func in cbrt dlopen erand48 fcvt fdatasync getifaddrs getpeerucred getrlimit memmove poll pstat readlink scandir setproctitle setsid sigprocmask symlink sysconf towlower utime utimes waitpid wcstombs wcstombs_l
 do
 as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
 { $as_echo "$as_me:$LINENO: checking for $ac_func" >&5
@@ -20493,8 +20494,7 @@ LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
 
 
 
-
-for ac_func in crypt erand48 getopt getrusage inet_aton random rint srandom strdup strerror strlcat strlcpy strtol strtoul
+for ac_func in crypt getopt getrusage inet_aton random rint srandom strdup strerror strlcat strlcpy strtol strtoul
 do
 as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
 { $as_echo "$as_me:$LINENO: checking for $ac_func" >&5
diff --git a/configure.in b/configure.in
index 150f9a5..e761aa0 100644
--- a/configure.in
+++ b/configure.in
@@ -1192,7 +1192,7 @@ PGAC_VAR_INT_TIMEZONE
 AC_FUNC_ACCEPT_ARGTYPES
 PGAC_FUNC_GETTIMEOFDAY_1ARG
 
-AC_CHECK_FUNCS([cbrt dlopen fcvt fdatasync getifaddrs getpeerucred getrlimit memmove poll pstat readlink scandir setproctitle setsid sigprocmask symlink sysconf towlower utime utimes waitpid wcstombs wcstombs_l])
+AC_CHECK_FUNCS([cbrt dlopen erand48 fcvt fdatasync getifaddrs getpeerucred getrlimit memmove poll pstat readlink scandir setproctitle setsid sigprocmask symlink sysconf towlower utime utimes waitpid wcstombs wcstombs_l])
 
 AC_REPLACE_FUNCS(fseeko)
 case $host_os in
@@ -1311,7 +1311,7 @@ fi
 pgac_save_LIBS="$LIBS"
 LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
 
-AC_REPLACE_FUNCS([crypt erand48 getopt getrusage inet_aton random rint srandom strdup strerror strlcat strlcpy strtol strtoul])
+AC_REPLACE_FUNCS([crypt getopt getrusage inet_aton random rint srandom strdup strerror strlcat strlcpy strtol strtoul])
 
 case $host_os in
 
diff --git a/contrib/pgbench/pgbench.c b/contrib/pgbench/pgbench.c
index 9d57436..f563a72 100644
--- a/contrib/pgbench/pgbench.c
+++ b/contrib/pgbench/pgbench.c
@@ -198,6 +198,7 @@ typedef struct
 	instr_time	start_time;		/* thread start time */
 	instr_time *exec_elapsed;	/* time spent executing cmds (per Command) */
 	int		   *exec_count;		/* number of cmd executions (per Command) */
+	unsigned short random_seed[3];	/* random seed */
 } TState;
 
 #define INVALID_THREAD		((pthread_t) 0)
@@ -380,13 +381,16 @@ usage(const char *progname)
 
 /* random number generator: uniform distribution from min to max inclusive */
 static int
-getrand(int min, int max)
+getrand(TState *thread, int min, int max)
 {
 	/*
 	 * Odd coding is so that min and max have approximately the same chance of
 	 * being selected as do numbers between them.
+	 *
+	 * Our version of erand48 is thread-safe, so we force the use of it
+	 * even if an OS-supplied version is also available.
 	 */
-	return min + (int) (((max - min + 1) * (double) random()) / (MAX_RANDOM_VALUE + 1.0));
+	return min + (int) (((max - min + 1) * pg_erand48(thread->random_seed)) / (MAX_RANDOM_VALUE + 1.0));
 }
 
 /* call PQexec() and exit() on failure */
@@ -901,7 +905,7 @@ top:
 		if (commands[st->state] == NULL)
 		{
 			st->state = 0;
-			st->use_file = getrand(0, num_files - 1);
+			st->use_file = getrand(thread, 0, num_files - 1);
 			commands = sql_files[st->use_file];
 		}
 	}
@@ -1068,9 +1072,9 @@ top:
 			}
 
 #ifdef DEBUG
-			printf("min: %d max: %d random: %d\n", min, max, getrand(min, max));
+			printf("min: %d max: %d random: %d\n", min, max, getrand(thread, min, max));
 #endif
-			snprintf(res, sizeof(res), "%d", getrand(min, max));
+			snprintf(res, sizeof(res), "%d", getrand(thread, min, max));
 
 			if (!putVariable(st, argv[0], argv[1], res))
 			{
@@ -2242,6 +2246,9 @@ main(int argc, char **argv)
 		thread->tid = i;
 		thread->state = &state[nclients / nthreads * i];
 		thread->nstate = nclients / nthreads;
+		thread->random_seed[0] = random();
+		thread->random_seed[1] = random();
+		thread->random_seed[2] = random();
 
 		if (is_latencies)
 		{
@@ -2384,7 +2391,7 @@ threadRun(void *arg)
 		Command   **commands = sql_files[st->use_file];
 		int			prev_ecnt = st->ecnt;
 
-		st->use_file = getrand(0, num_files - 1);
+		st->use_file = getrand(thread, 0, num_files - 1);
 		if (!doCustom(thread, st, &result->conn_time, logfile))
 			remains--;			/* I've aborted */
 
diff --git a/src/include/port.h b/src/include/port.h
index 2cab65f..b0676cb 100644
--- a/src/include/port.h
+++ b/src/include/port.h
@@ -380,11 +380,14 @@ extern off_t ftello(FILE *stream);
 #endif
 #endif
 
+extern double pg_erand48(unsigned short xseed[3]);
+extern long pg_lrand48(void);
+extern void pg_srand48(long seed);
+
 #ifndef HAVE_ERAND48
-/* we assume all of these are present or missing together */
-extern double erand48(unsigned short xseed[3]);
-extern long lrand48(void);
-extern void srand48(long seed);
+#define erand48(x) pg_erand48(x)
+#define lrand48() pg_lrand48()
+#define srand48(x) pg_srand48(x)
 #endif
 
 #ifndef HAVE_FSEEKO
diff --git a/src/port/Makefile b/src/port/Makefile
index 60295dc..ffbe95e 100644
--- a/src/port/Makefile
+++ b/src/port/Makefile
@@ -30,8 +30,8 @@ include $(top_builddir)/src/Makefile.global
 override CPPFLAGS := -I$(top_builddir)/src/port -DFRONTEND $(CPPFLAGS)
 LIBS += $(PTHREAD_LIBS)
 
-OBJS = $(LIBOBJS) chklocale.o dirmod.o exec.o inet_net_ntop.o noblock.o \
-	path.o pgcheckdir.o pgmkdirp.o pgsleep.o pgstrcasecmp.o \
+OBJS = $(LIBOBJS) chklocale.o dirmod.o erand48.o exec.o inet_net_ntop.o \
+	noblock.o path.o pgcheckdir.o pgmkdirp.o pgsleep.o pgstrcasecmp.o \
 	qsort.o qsort_arg.o sprompt.o thread.o
 
 # foo_srv.o and foo.o are both built from foo.c, but only foo.o has -DFRONTEND
diff --git a/src/port/erand48.c b/src/port/erand48.c
index 64db7a5..3efbb3f 100644
--- a/src/port/erand48.c
+++ b/src/port/erand48.c
@@ -70,9 +70,9 @@ _dorand48(unsigned short xseed[3])
 	xseed[2] = (unsigned short) accu;
 }
 
-
+/* pgbench relies on this being thread-safe */
 double
-erand48(unsigned short xseed[3])
+pg_erand48(unsigned short xseed[3])
 {
 	_dorand48(xseed);
 	return ldexp((double) xseed[0], -48) +
@@ -81,14 +81,14 @@ erand48(unsigned short xseed[3])
 }
 
 long
-lrand48(void)
+pg_lrand48(void)
 {
 	_dorand48(_rand48_seed);
 	return ((long) _rand48_seed[2] << 15) + ((long) _rand48_seed[1] >> 1);
 }
 
 void
-srand48(long seed)
+pg_srand48(long seed)
 {
 	_rand48_seed[0] = RAND48_SEED_0;
 	_rand48_seed[1] = (unsigned short) seed;
#9Tom Lane
tgl@sss.pgh.pa.us
In reply to: Robert Haas (#5)
Re: pgbench internal contention

Robert Haas <robertmhaas@gmail.com> writes:

If I'm reading the code right, it only modifies __libc_drand48_data on
first call, so as long as we called erand48() at least once before
spawning the child threads, it would probably work. That seems pretty
fragile in the face of the fact that they explicitly state that
they're modifying the global random generator state and that you
should use erand48_r() if you want reentrant behavior. So I think if
we're going to go the erand48() route we probably ought to force
pgbench to always use our version rather than any OS-supplied version.

Or add erand48_r() to our version and use that.

regards, tom lane

#10Tom Lane
tgl@sss.pgh.pa.us
In reply to: Robert Haas (#8)
Re: pgbench internal contention

Robert Haas <robertmhaas@gmail.com> writes:

On Sat, Jul 30, 2011 at 9:08 AM, Robert Haas <robertmhaas@gmail.com> wrote:

If I'm reading the code right, it only modifies __libc_drand48_data on
first call, so as long as we called erand48() at least once before
spawning the child threads, it would probably work. �That seems pretty
fragile in the face of the fact that they explicitly state that
they're modifying the global random generator state and that you
should use erand48_r() if you want reentrant behavior. �So I think if
we're going to go the erand48() route we probably ought to force
pgbench to always use our version rather than any OS-supplied version.

Attached is a try at that approach.

I don't find this to be a particularly safe idea:

#ifndef HAVE_ERAND48
-/* we assume all of these are present or missing together */
-extern double erand48(unsigned short xseed[3]);
-extern long lrand48(void);
-extern void srand48(long seed);
+#define erand48(x) pg_erand48(x)
+#define lrand48() pg_lrand48()
+#define srand48(x) pg_srand48(x)
#endif

See our recent trials with python headers for an example of why this
might cause problems on some platforms. For that matter, I believe
<stdlib.h> would be within its rights to be defining these names as
macros already.

If you want erand48_r, best to provide that API, not kluge up some
other functions.

regards, tom lane

#11Greg Smith
greg@2ndQuadrant.com
In reply to: Robert Haas (#5)
Re: pgbench internal contention

On 07/30/2011 09:08 AM, Robert Haas wrote:

If I'm reading the code right, it only modifies __libc_drand48_data on
first call, so as long as we called erand48() at least once before
spawning the child threads, it would probably work. That seems pretty
fragile in the face of the fact that they explicitly state that
they're modifying the global random generator state and that you
should use erand48_r() if you want reentrant behavior. So I think if
we're going to go the erand48() route we probably ought to force
pgbench to always use our version rather than any OS-supplied version.

By the way: the landmines in this whole area are what sunk the attempt
to switch pgbench over to using 64 bits for the accounts table back in
February. See the last few messages of
http://postgresql.1045698.n5.nabble.com/Re-PERFORM-pgbench-to-the-MAXINT-td3337374.html
to revisit. I think you've retraced all of that now, but double
checking your plan against things like the AIX specific weirdness I
pointed out there may be useful.

--
Greg Smith 2ndQuadrant US greg@2ndQuadrant.com Baltimore, MD
PostgreSQL Training, Services, and 24x7 Support www.2ndQuadrant.us

#12Robert Haas
robertmhaas@gmail.com
In reply to: Tom Lane (#10)
Re: pgbench internal contention

On Mon, Aug 1, 2011 at 11:33 AM, Tom Lane <tgl@sss.pgh.pa.us> wrote:

Attached is a try at that approach.

I don't find this to be a particularly safe idea:

 #ifndef HAVE_ERAND48
-/* we assume all of these are present or missing together */
-extern double erand48(unsigned short xseed[3]);
-extern long lrand48(void);
-extern void srand48(long seed);
+#define erand48(x) pg_erand48(x)
+#define lrand48() pg_lrand48()
+#define srand48(x) pg_srand48(x)
 #endif

See our recent trials with python headers for an example of why this
might cause problems on some platforms.  For that matter, I believe
<stdlib.h> would be within its rights to be defining these names as
macros already.

If HAVE_ERAND48 isn't defined and erand48() is nevertheless defined as
a macro, it's a bug in the autoconf test for erand48(). The whole
point is to do that only when there isn't any erand48(). But we could
dodge the issue at any rate by making erand48(), lrand48(), and
srand48() functions that call pg_erand48(), pg_lrand48(), and
pg_srand48() rather than macros. And I'd rather do that than this,
honestly:

If you want erand48_r, best to provide that API, not kluge up some
other functions.

...because erand48() is a GNU extension with a stupid API. I don't
see much value in supporting that, on both counts. We're going to end
up with the built-in erand48_r() on precisely those systems that use
glibc, and our own everywhere else. For the 25 SLOCs it's going cost
us, I'd rather use the same code everywhere.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#13Tom Lane
tgl@sss.pgh.pa.us
In reply to: Robert Haas (#12)
Re: pgbench internal contention

Robert Haas <robertmhaas@gmail.com> writes:

If you want erand48_r, best to provide that API, not kluge up some
other functions.

...because erand48() is a GNU extension with a stupid API.

I assume you mean erand48_r, there, because erand48 is pretty standard.

I don't
see much value in supporting that, on both counts. We're going to end
up with the built-in erand48_r() on precisely those systems that use
glibc, and our own everywhere else. For the 25 SLOCs it's going cost
us, I'd rather use the same code everywhere.

Maybe. But if that's the approach we want to use, let's just call it
pg_erand48 in the code, and dispense with the alias macros as well as
all vestiges of configure support.

BTW, as far as the original plan of using random_r is concerned, how
did you manage to not run into this?
http://sourceware.org/bugzilla/show_bug.cgi?id=3662
I just wasted half an hour on that stupidity in an unrelated context...

regards, tom lane

#14Robert Haas
robertmhaas@gmail.com
In reply to: Tom Lane (#13)
Re: pgbench internal contention

On Tue, Aug 2, 2011 at 8:44 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:

Robert Haas <robertmhaas@gmail.com> writes:

If you want erand48_r, best to provide that API, not kluge up some
other functions.

...because erand48() is a GNU extension with a stupid API.

I assume you mean erand48_r, there, because erand48 is pretty standard.

Yes.

I don't
see much value in supporting that, on both counts.  We're going to end
up with the built-in erand48_r() on precisely those systems that use
glibc, and our own everywhere else.  For the 25 SLOCs it's going cost
us, I'd rather use the same code everywhere.

Maybe.  But if that's the approach we want to use, let's just call it
pg_erand48 in the code, and dispense with the alias macros as well as
all vestiges of configure support.

Works for me. Just to confirm, that means we'd also change GEQO to
use pg_erand48().

BTW, as far as the original plan of using random_r is concerned, how
did you manage to not run into this?
http://sourceware.org/bugzilla/show_bug.cgi?id=3662
I just wasted half an hour on that stupidity in an unrelated context...

Good grief. It's hard to imagine a more user-hostile attitude than
the one taken there. I did run into that precise issue, but managed
to stumble on what is apparently the officially sanctioned method of
working around it - viz, zeroing the state array. However, that bug
report is a pretty compelling argument for the position that expecting
any of the GNU blah_r() functions to behave halfway sanely or be
properly documented is a pipe dream.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

#15Tom Lane
tgl@sss.pgh.pa.us
In reply to: Robert Haas (#14)
Re: pgbench internal contention

Robert Haas <robertmhaas@gmail.com> writes:

On Tue, Aug 2, 2011 at 8:44 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:

Maybe. �But if that's the approach we want to use, let's just call it
pg_erand48 in the code, and dispense with the alias macros as well as
all vestiges of configure support.

Works for me. Just to confirm, that means we'd also change GEQO to
use pg_erand48().

Right, that's what I was thinking.

regards, tom lane

#16Bruce Momjian
bruce@momjian.us
In reply to: Robert Haas (#14)
Re: pgbench internal contention

Robert Haas wrote:

Works for me. Just to confirm, that means we'd also change GEQO to
use pg_erand48().

BTW, as far as the original plan of using random_r is concerned, how
did you manage to not run into this?
http://sourceware.org/bugzilla/show_bug.cgi?id=3662
I just wasted half an hour on that stupidity in an unrelated context...

Good grief. It's hard to imagine a more user-hostile attitude than
the one taken there. I did run into that precise issue, but managed

Yes, it is scary to think people are relying on software written by
people which such a poor attitude toward quality.

--
Bruce Momjian <bruce@momjian.us> http://momjian.us
EnterpriseDB http://enterprisedb.com

+ It's impossible for everything to be true. +