diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 46be67adaf..79a2a10dee 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -113,18 +113,29 @@ typedef struct socket_set
  */
 
 #ifdef WIN32
+#define PTHREAD_BARRIER_SERIAL_THREAD (-1)
+
 /* Use native win32 threads on Windows */
 typedef struct win32_pthread *pthread_t;
 typedef int pthread_attr_t;
+typedef SYNCHRONIZATION_BARRIER pthread_barrier_t;
 
 static int	pthread_create(pthread_t *thread, pthread_attr_t *attr, void *(*start_routine) (void *), void *arg);
 static int	pthread_join(pthread_t th, void **thread_return);
+
+static int	pthread_barrier_init(pthread_barrier_t *barrier, void *unused, int nthreads);
+static int	pthread_barrier_wait(pthread_barrier_t *barrier);
+static int	pthread_barrier_destroy(pthread_barrier_t *barrier);
 #elif defined(ENABLE_THREAD_SAFETY)
 /* Use platform-dependent pthread capability */
 #include <pthread.h>
 #else
 /* No threads implementation, use none (-j 1) */
 #define pthread_t void *
+#define pthread_barrier_t void *
+#define pthread_barrier_init(a, b, c) /* ignore */
+#define pthread_barrier_wait(a) /* ignore */
+#define pthread_barrier_destroy(a) /* ignore */
 #endif
 
 
@@ -310,6 +321,9 @@ typedef struct RandomState
 /* Various random sequences are initialized from this one. */
 static RandomState base_random_sequence;
 
+/* Synchronization barrier for start and connection */
+static pthread_barrier_t barrier;
+
 /*
  * Connection state machine states.
  */
@@ -453,8 +467,8 @@ typedef struct
 
 	/* per thread collected stats in microseconds */
 	pg_time_usec_t	create_time;	/* thread creation time */
-	pg_time_usec_t	started_time;	/* thread is running */
-	pg_time_usec_t	bench_start; 	/* thread is benchmarking */
+	pg_time_usec_t	started_time;	/* thread is running after start barrier */
+	pg_time_usec_t	bench_start; 	/* thread is benchmarking after connection barrier */
 	pg_time_usec_t	conn_duration;	/* cumulated connection and deconnection delays */
 
 	StatsData	stats;
@@ -6114,6 +6128,8 @@ main(int argc, char **argv)
 	if (duration > 0)
 		setalarm(duration);
 
+	pthread_barrier_init(&barrier, NULL, nthreads);
+
 #ifdef ENABLE_THREAD_SAFETY
 	/* start all threads but thread 0 which is executed directly later */
 	for (i = 1; i < nthreads; i++)
@@ -6185,6 +6201,8 @@ main(int argc, char **argv)
 	printResults(&stats, pg_time_now() - bench_start, conn_total_duration,
 				 bench_start - start_time, latency_late);
 
+	pthread_barrier_destroy(&barrier);
+
 	if (exit_code != 0)
 		pg_log_fatal("Run was aborted; the above results are incomplete.");
 
@@ -6233,6 +6251,8 @@ threadRun(void *arg)
 	}
 
 	/* READY */
+	pthread_barrier_wait(&barrier);
+
 	thread_start = pg_time_now();
 	thread->started_time = thread_start;
 	last_report = thread_start;
@@ -6257,6 +6277,8 @@ threadRun(void *arg)
 		thread->conn_duration = 0;
 	}
 
+	/* GO */
+	pthread_barrier_wait(&barrier);
 
 	start = pg_time_now();
 	thread->bench_start = start;
@@ -6762,4 +6784,26 @@ pthread_join(pthread_t th, void **thread_return)
 	return 0;
 }
 
+static int
+pthread_barrier_init(pthread_barrier_t *barrier, void *unused, int nthreads)
+{
+	/* no spinning: threads are not expected to arrive at the barrier together */
+	bool ok = InitializeSynchronizationBarrier(barrier, nthreads, 0);
+	return 0;
+}
+
+static int
+pthread_barrier_wait(pthread_barrier_t *barrier)
+{
+	bool last = EnterSynchronizationBarrier(barrier, SYNCHRONIZATION_BARRIER_FLAGS_BLOCK_ONLY);
+	return last ? PTHREAD_BARRIER_SERIAL_THREAD : 0;
+}
+
+static int
+pthread_barrier_destroy(pthread_barrier_t *barrier)
+{
+	(void) DeleteSynchronizationBarrier(barrier);
+	return 0;
+}
+
 #endif							/* WIN32 */
