From 07d2102ed7faee6c1ed7cbacf44b3c0d95d3e2d5 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Thu, 17 Jun 2021 22:36:56 +1200
Subject: [PATCH] Revert "pgbench: Improve time logic."

Discussion: https://postgr.es/m/CAHLJuCW_8Vpcr0%3Dt6O_gozrg3wXXWXZXDioYJd3NhvKriqgpfQ%40mail.gmail.com
---
 doc/src/sgml/ref/pgbench.sgml    |  39 ++-
 src/bin/pgbench/pgbench.c        | 443 ++++++++++++++++---------------
 src/tools/pgindent/typedefs.list |   1 -
 3 files changed, 248 insertions(+), 235 deletions(-)
diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 0c60077e1f..eba6409148 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -58,10 +58,8 @@ number of clients: 10
 number of threads: 1
 number of transactions per client: 1000
 number of transactions actually processed: 10000/10000
-latency average = 11.013 ms
-latency stddev = 7.351 ms
-initial connection time = 45.758 ms
-tps = 896.967014 (without initial connection time)
+tps = 85.184871 (including connections establishing)
+tps = 85.296346 (excluding connections establishing)
 </screen>
 
   The first six lines report some of the most important parameter
@@ -70,7 +68,8 @@ tps = 896.967014 (without initial connection time)
   and number of transactions per client); these will be equal unless the run
   failed before completion.  (In <option>-T</option> mode, only the actual
   number of transactions is printed.)
-  The last line reports the number of transactions per second.
+  The last two lines report the number of transactions per second,
+  figured with and without counting the time to start database sessions.
  </para>
 
   <para>
@@ -2339,22 +2338,22 @@ number of clients: 10
 number of threads: 1
 number of transactions per client: 1000
 number of transactions actually processed: 10000/10000
-latency average = 10.870 ms
-latency stddev = 7.341 ms
-initial connection time = 30.954 ms
-tps = 907.949122 (without initial connection time)
+latency average = 15.844 ms
+latency stddev = 2.715 ms
+tps = 618.764555 (including connections establishing)
+tps = 622.977698 (excluding connections establishing)
 statement latencies in milliseconds:
-    0.001  \set aid random(1, 100000 * :scale)
-    0.001  \set bid random(1, 1 * :scale)
-    0.001  \set tid random(1, 10 * :scale)
-    0.000  \set delta random(-5000, 5000)
-    0.046  BEGIN;
-    0.151  UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;
-    0.107  SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
-    4.241  UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid;
-    5.245  UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid;
-    0.102  INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);
-    0.974  END;
+        0.002  \set aid random(1, 100000 * :scale)
+        0.005  \set bid random(1, 1 * :scale)
+        0.002  \set tid random(1, 10 * :scale)
+        0.001  \set delta random(-5000, 5000)
+        0.326  BEGIN;
+        0.603  UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;
+        0.454  SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
+        5.528  UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid;
+        7.335  UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid;
+        0.371  INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);
+        1.212  END;
 </screen>
   </para>
 
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index d7479925cb..49a7cabf0f 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -321,20 +321,13 @@ typedef struct SimpleStats
 	double		sum2;			/* sum of squared values */
 } SimpleStats;
 
-/*
- * The instr_time type is expensive when dealing with time arithmetic.  Define
- * a type to hold microseconds instead.  Type int64 is good enough for about
- * 584500 years.
- */
-typedef int64 pg_time_usec_t;
-
 /*
  * Data structure to hold various statistics: per-thread and per-script stats
  * are maintained and merged together.
  */
 typedef struct StatsData
 {
-	pg_time_usec_t start_time;	/* interval start time, for aggregates */
+	time_t		start_time;		/* interval start time, for aggregates */
 	int64		cnt;			/* number of transactions, including skipped */
 	int64		skipped;		/* number of transactions skipped under --rate
 								 * and --latency-limit */
@@ -463,11 +456,11 @@ typedef struct
 	int			nvariables;		/* number of variables */
 	bool		vars_sorted;	/* are variables sorted by name? */
 
-	/* various times about current transaction in microseconds */
-	pg_time_usec_t txn_scheduled;	/* scheduled start time of transaction */
-	pg_time_usec_t sleep_until; /* scheduled start time of next cmd */
-	pg_time_usec_t txn_begin;	/* used for measuring schedule lag times */
-	pg_time_usec_t stmt_begin;	/* used for measuring statement latencies */
+	/* various times about current transaction */
+	int64		txn_scheduled;	/* scheduled start time of transaction (usec) */
+	int64		sleep_until;	/* scheduled start time of next cmd (usec) */
+	instr_time	txn_begin;		/* used for measuring schedule lag times */
+	instr_time	stmt_begin;		/* used for measuring statement latencies */
 
 	bool		prepared[MAX_SCRIPTS];	/* whether client prepared the script */
 
@@ -497,15 +490,11 @@ typedef struct
 	int64		throttle_trigger;	/* previous/next throttling (us) */
 	FILE	   *logfile;		/* where to log, or NULL */
 
-	/* per thread collected stats in microseconds */
-	pg_time_usec_t create_time; /* thread creation time */
-	pg_time_usec_t started_time;	/* thread is running */
-	pg_time_usec_t bench_start; /* thread is benchmarking */
-	pg_time_usec_t conn_duration;	/* cumulated connection and deconnection
-									 * delays */
-
+	/* per thread collected stats */
+	instr_time	start_time;		/* thread start time */
+	instr_time	conn_time;
 	StatsData	stats;
-	int64		latency_late;	/* count executed but late transactions */
+	int64		latency_late;	/* executed but late transactions */
 } TState;
 
 /*
@@ -647,10 +636,10 @@ static void setIntValue(PgBenchValue *pv, int64 ival);
 static void setDoubleValue(PgBenchValue *pv, double dval);
 static bool evaluateExpr(CState *st, PgBenchExpr *expr,
 						 PgBenchValue *retval);
-static ConnectionStateEnum executeMetaCommand(CState *st, pg_time_usec_t *now);
+static ConnectionStateEnum executeMetaCommand(CState *st, instr_time *now);
 static void doLog(TState *thread, CState *st,
 				  StatsData *agg, bool skipped, double latency, double lag);
-static void processXactStats(TState *thread, CState *st, pg_time_usec_t *now,
+static void processXactStats(TState *thread, CState *st, instr_time *now,
 							 bool skipped, StatsData *agg);
 static void addScript(ParsedScript script);
 static THREAD_FUNC_RETURN_TYPE THREAD_FUNC_CC threadRun(void *arg);
@@ -669,24 +658,6 @@ static const PsqlScanCallbacks pgbench_callbacks = {
 	NULL,						/* don't need get_variable functionality */
 };
 
-static inline pg_time_usec_t
-pg_time_now(void)
-{
-	instr_time	now;
-
-	INSTR_TIME_SET_CURRENT(now);
-
-	return (pg_time_usec_t) INSTR_TIME_GET_MICROSEC(now);
-}
-
-static inline void
-pg_time_now_lazy(pg_time_usec_t *now)
-{
-	if ((*now) == 0)
-		(*now) = pg_time_now();
-}
-
-#define PG_TIME_GET_DOUBLE(t) (0.000001 * (t))
 
 static void
 usage(void)
@@ -1279,9 +1250,9 @@ mergeSimpleStats(SimpleStats *acc, SimpleStats *ss)
  * the given value.
  */
 static void
-initStats(StatsData *sd, pg_time_usec_t start)
+initStats(StatsData *sd, time_t start_time)
 {
-	sd->start_time = start;
+	sd->start_time = start_time;
 	sd->cnt = 0;
 	sd->skipped = 0;
 	initSimpleStats(&sd->latency);
@@ -3118,6 +3089,7 @@ evaluateSleep(CState *st, int argc, char **argv, int *usecs)
 static void
 advanceConnectionState(TState *thread, CState *st, StatsData *agg)
 {
+	instr_time	now;
 
 	/*
 	 * gettimeofday() isn't free, so we get the current timestamp lazily the
@@ -3127,7 +3099,7 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
 	 * means "not set yet".  Reset "now" when we execute shell commands or
 	 * expressions, which might take a non-negligible amount of time, though.
 	 */
-	pg_time_usec_t now = 0;
+	INSTR_TIME_SET_ZERO(now);
 
 	/*
 	 * Loop in the state machine, until we have to wait for a result from the
@@ -3162,30 +3134,29 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
 
 				/* Start new transaction (script) */
 			case CSTATE_START_TX:
-				pg_time_now_lazy(&now);
 
 				/* establish connection if needed, i.e. under --connect */
 				if (st->con == NULL)
 				{
-					pg_time_usec_t start = now;
+					instr_time	start;
 
+					INSTR_TIME_SET_CURRENT_LAZY(now);
+					start = now;
 					if ((st->con = doConnect()) == NULL)
 					{
 						pg_log_error("client %d aborted while establishing connection", st->id);
 						st->state = CSTATE_ABORTED;
 						break;
 					}
-
-					/* reset now after connection */
-					now = pg_time_now();
-
-					thread->conn_duration += now - start;
+					INSTR_TIME_SET_CURRENT(now);
+					INSTR_TIME_ACCUM_DIFF(thread->conn_time, now, start);
 
 					/* Reset session-local state */
 					memset(st->prepared, 0, sizeof(st->prepared));
 				}
 
 				/* record transaction start time */
+				INSTR_TIME_SET_CURRENT_LAZY(now);
 				st->txn_begin = now;
 
 				/*
@@ -3193,7 +3164,7 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
 				 * scheduled start time.
 				 */
 				if (!throttle_delay)
-					st->txn_scheduled = now;
+					st->txn_scheduled = INSTR_TIME_GET_MICROSEC(now);
 
 				/* Begin with the first command */
 				st->state = CSTATE_START_COMMAND;
@@ -3229,9 +3200,12 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
 				 */
 				if (latency_limit)
 				{
-					pg_time_now_lazy(&now);
+					int64		now_us;
+
+					INSTR_TIME_SET_CURRENT_LAZY(now);
+					now_us = INSTR_TIME_GET_MICROSEC(now);
 
-					while (thread->throttle_trigger < now - latency_limit &&
+					while (thread->throttle_trigger < now_us - latency_limit &&
 						   (nxacts <= 0 || st->cnt < nxacts))
 					{
 						processXactStats(thread, st, &now, true, agg);
@@ -3264,9 +3238,9 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
 				 * Wait until it's time to start next transaction.
 				 */
 			case CSTATE_THROTTLE:
-				pg_time_now_lazy(&now);
+				INSTR_TIME_SET_CURRENT_LAZY(now);
 
-				if (now < st->txn_scheduled)
+				if (INSTR_TIME_GET_MICROSEC(now) < st->txn_scheduled)
 					return;		/* still sleeping, nothing to do here */
 
 				/* done sleeping, but don't start transaction if we're done */
@@ -3289,7 +3263,7 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
 				/* record begin time of next command, and initiate it */
 				if (report_per_command)
 				{
-					pg_time_now_lazy(&now);
+					INSTR_TIME_SET_CURRENT_LAZY(now);
 					st->stmt_begin = now;
 				}
 
@@ -3485,8 +3459,8 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
 				 * instead of CSTATE_START_TX.
 				 */
 			case CSTATE_SLEEP:
-				pg_time_now_lazy(&now);
-				if (now < st->sleep_until)
+				INSTR_TIME_SET_CURRENT_LAZY(now);
+				if (INSTR_TIME_GET_MICROSEC(now) < st->sleep_until)
 					return;		/* still sleeping, nothing to do here */
 				/* Else done sleeping. */
 				st->state = CSTATE_END_COMMAND;
@@ -3506,12 +3480,13 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
 				{
 					Command    *command;
 
-					pg_time_now_lazy(&now);
+					INSTR_TIME_SET_CURRENT_LAZY(now);
 
 					command = sql_script[st->use_file].commands[st->command];
 					/* XXX could use a mutex here, but we choose not to */
 					addToSimpleStats(&command->stats,
-									 PG_TIME_GET_DOUBLE(now - st->stmt_begin));
+									 INSTR_TIME_GET_DOUBLE(now) -
+									 INSTR_TIME_GET_DOUBLE(st->stmt_begin));
 				}
 
 				/* Go ahead with next command, to be executed or skipped */
@@ -3537,7 +3512,7 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
 				if (is_connect)
 				{
 					finishCon(st);
-					now = 0;
+					INSTR_TIME_SET_ZERO(now);
 				}
 
 				if ((st->cnt >= nxacts && duration <= 0) || timer_exceeded)
@@ -3575,7 +3550,7 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
  * take no time to execute.
  */
 static ConnectionStateEnum
-executeMetaCommand(CState *st, pg_time_usec_t *now)
+executeMetaCommand(CState *st, instr_time *now)
 {
 	Command    *command = sql_script[st->use_file].commands[st->command];
 	int			argc;
@@ -3617,8 +3592,8 @@ executeMetaCommand(CState *st, pg_time_usec_t *now)
 			return CSTATE_ABORTED;
 		}
 
-		pg_time_now_lazy(now);
-		st->sleep_until = (*now) + usec;
+		INSTR_TIME_SET_CURRENT_LAZY(*now);
+		st->sleep_until = INSTR_TIME_GET_MICROSEC(*now) + usec;
 		return CSTATE_SLEEP;
 	}
 	else if (command->meta == META_SET)
@@ -3760,7 +3735,7 @@ executeMetaCommand(CState *st, pg_time_usec_t *now)
 	 * executing the expression or shell command might have taken a
 	 * non-negligible amount of time, so reset 'now'
 	 */
-	*now = 0;
+	INSTR_TIME_SET_ZERO(*now);
 
 	return CSTATE_END_COMMAND;
 }
@@ -3770,15 +3745,14 @@ executeMetaCommand(CState *st, pg_time_usec_t *now)
  *
  * We print Unix-epoch timestamps in the log, so that entries can be
  * correlated against other logs.  On some platforms this could be obtained
- * from the caller, but rather than get entangled with that, we just eat
- * the cost of an extra syscall in all cases.
+ * from the instr_time reading the caller has, but rather than get entangled
+ * with that, we just eat the cost of an extra syscall in all cases.
  */
 static void
 doLog(TState *thread, CState *st,
 	  StatsData *agg, bool skipped, double latency, double lag)
 {
 	FILE	   *logfile = thread->logfile;
-	pg_time_usec_t now = pg_time_now();
 
 	Assert(use_log);
 
@@ -3798,12 +3772,13 @@ doLog(TState *thread, CState *st,
 		 * any empty intervals in between (this may happen with very low tps,
 		 * e.g. --rate=0.1).
 		 */
+		time_t		now = time(NULL);
 
 		while (agg->start_time + agg_interval <= now)
 		{
 			/* print aggregated report to logfile */
-			fprintf(logfile, INT64_FORMAT " " INT64_FORMAT " %.0f %.0f %.0f %.0f",
-					agg->start_time,
+			fprintf(logfile, "%ld " INT64_FORMAT " %.0f %.0f %.0f %.0f",
+					(long) agg->start_time,
 					agg->cnt,
 					agg->latency.sum,
 					agg->latency.sum2,
@@ -3831,15 +3806,17 @@ doLog(TState *thread, CState *st,
 	else
 	{
 		/* no, print raw transactions */
+		struct timeval tv;
+
+		gettimeofday(&tv, NULL);
 		if (skipped)
-			fprintf(logfile, "%d " INT64_FORMAT " skipped %d " INT64_FORMAT " "
-					INT64_FORMAT,
-					st->id, st->cnt, st->use_file, now / 1000000, now % 1000000);
+			fprintf(logfile, "%d " INT64_FORMAT " skipped %d %ld %ld",
+					st->id, st->cnt, st->use_file,
+					(long) tv.tv_sec, (long) tv.tv_usec);
 		else
-			fprintf(logfile, "%d " INT64_FORMAT " %.0f %d " INT64_FORMAT " "
-					INT64_FORMAT,
+			fprintf(logfile, "%d " INT64_FORMAT " %.0f %d %ld %ld",
 					st->id, st->cnt, latency, st->use_file,
-					now / 1000000, now % 1000000);
+					(long) tv.tv_sec, (long) tv.tv_usec);
 		if (throttle_delay)
 			fprintf(logfile, " %.0f", lag);
 		fputc('\n', logfile);
@@ -3853,7 +3830,7 @@ doLog(TState *thread, CState *st,
  * Note that even skipped transactions are counted in the "cnt" fields.)
  */
 static void
-processXactStats(TState *thread, CState *st, pg_time_usec_t *now,
+processXactStats(TState *thread, CState *st, instr_time *now,
 				 bool skipped, StatsData *agg)
 {
 	double		latency = 0.0,
@@ -3863,11 +3840,11 @@ processXactStats(TState *thread, CState *st, pg_time_usec_t *now,
 
 	if (detailed && !skipped)
 	{
-		pg_time_now_lazy(now);
+		INSTR_TIME_SET_CURRENT_LAZY(*now);
 
 		/* compute latency & lag */
-		latency = (*now) - st->txn_scheduled;
-		lag = st->txn_begin - st->txn_scheduled;
+		latency = INSTR_TIME_GET_MICROSEC(*now) - st->txn_scheduled;
+		lag = INSTR_TIME_GET_MICROSEC(st->txn_begin) - st->txn_scheduled;
 	}
 
 	if (thread_details)
@@ -4118,7 +4095,10 @@ initGenerateDataClientSide(PGconn *con)
 	int64		k;
 
 	/* used to track elapsed time and estimate of the remaining time */
-	pg_time_usec_t start;
+	instr_time	start,
+				diff;
+	double		elapsed_sec,
+				remaining_sec;
 	int			log_interval = 1;
 
 	/* Stay on the same line if reporting to a terminal */
@@ -4170,7 +4150,7 @@ initGenerateDataClientSide(PGconn *con)
 	}
 	PQclear(res);
 
-	start = pg_time_now();
+	INSTR_TIME_SET_CURRENT(start);
 
 	for (k = 0; k < (int64) naccounts * scale; k++)
 	{
@@ -4195,8 +4175,11 @@ initGenerateDataClientSide(PGconn *con)
 		 */
 		if ((!use_quiet) && (j % 100000 == 0))
 		{
-			double		elapsed_sec = PG_TIME_GET_DOUBLE(pg_time_now() - start);
-			double		remaining_sec = ((double) scale * naccounts - j) * elapsed_sec / j;
+			INSTR_TIME_SET_CURRENT(diff);
+			INSTR_TIME_SUBTRACT(diff, start);
+
+			elapsed_sec = INSTR_TIME_GET_DOUBLE(diff);
+			remaining_sec = ((double) scale * naccounts - j) * elapsed_sec / j;
 
 			fprintf(stderr, INT64_FORMAT " of " INT64_FORMAT " tuples (%d%%) done (elapsed %.2f s, remaining %.2f s)%c",
 					j, (int64) naccounts * scale,
@@ -4206,8 +4189,11 @@ initGenerateDataClientSide(PGconn *con)
 		/* let's not call the timing for each row, but only each 100 rows */
 		else if (use_quiet && (j % 100 == 0))
 		{
-			double		elapsed_sec = PG_TIME_GET_DOUBLE(pg_time_now() - start);
-			double		remaining_sec = ((double) scale * naccounts - j) * elapsed_sec / j;
+			INSTR_TIME_SET_CURRENT(diff);
+			INSTR_TIME_SUBTRACT(diff, start);
+
+			elapsed_sec = INSTR_TIME_GET_DOUBLE(diff);
+			remaining_sec = ((double) scale * naccounts - j) * elapsed_sec / j;
 
 			/* have we reached the next interval (or end)? */
 			if ((j == scale * naccounts) || (elapsed_sec >= log_interval * LOG_STEP_SECONDS))
@@ -4412,8 +4398,10 @@ runInitSteps(const char *initialize_steps)
 
 	for (step = initialize_steps; *step != '\0'; step++)
 	{
+		instr_time	start;
 		char	   *op = NULL;
-		pg_time_usec_t start = pg_time_now();
+
+		INSTR_TIME_SET_CURRENT(start);
 
 		switch (*step)
 		{
@@ -4455,7 +4443,12 @@ runInitSteps(const char *initialize_steps)
 
 		if (op != NULL)
 		{
-			double		elapsed_sec = PG_TIME_GET_DOUBLE(pg_time_now() - start);
+			instr_time	diff;
+			double		elapsed_sec;
+
+			INSTR_TIME_SET_CURRENT(diff);
+			INSTR_TIME_SUBTRACT(diff, start);
+			elapsed_sec = INSTR_TIME_GET_DOUBLE(diff);
 
 			if (!first)
 				appendPQExpBufferStr(&stats, ", ");
@@ -5403,12 +5396,12 @@ addScript(ParsedScript script)
  * progress report.  On exit, they are updated with the new stats.
  */
 static void
-printProgressReport(TState *threads, int64 test_start, pg_time_usec_t now,
+printProgressReport(TState *threads, int64 test_start, int64 now,
 					StatsData *last, int64 *last_report)
 {
 	/* generate and show report */
-	pg_time_usec_t run = now - *last_report;
-	int64		ntx;
+	int64		run = now - *last_report,
+				ntx;
 	double		tps,
 				total_run,
 				latency,
@@ -5455,7 +5448,16 @@ printProgressReport(TState *threads, int64 test_start, pg_time_usec_t now,
 
 	if (progress_timestamp)
 	{
-		snprintf(tbuf, sizeof(tbuf), "%.3f s", PG_TIME_GET_DOUBLE(now));
+		/*
+		 * On some platforms the current system timestamp is available in
+		 * now_time, but rather than get entangled with that, we just eat the
+		 * cost of an extra syscall in all cases.
+		 */
+		struct timeval tv;
+
+		gettimeofday(&tv, NULL);
+		snprintf(tbuf, sizeof(tbuf), "%ld.%03ld s",
+				 (long) tv.tv_sec, (long) (tv.tv_usec / 1000));
 	}
 	else
 	{
@@ -5495,18 +5497,21 @@ printSimpleStats(const char *prefix, SimpleStats *ss)
 
 /* print out results */
 static void
-printResults(StatsData *total,
-			 pg_time_usec_t total_duration, /* benchmarking time */
-			 pg_time_usec_t conn_total_duration,	/* is_connect */
-			 pg_time_usec_t conn_elapsed_duration,	/* !is_connect */
-			 int64 latency_late)
+printResults(StatsData *total, instr_time total_time,
+			 instr_time conn_total_time, int64 latency_late)
 {
-	/* tps is about actually executed transactions during benchmarking */
+	double		time_include,
+				tps_include,
+				tps_exclude;
 	int64		ntx = total->cnt - total->skipped;
-	double		bench_duration = PG_TIME_GET_DOUBLE(total_duration);
-	double		tps = ntx / bench_duration;
 
-	printf("pgbench (PostgreSQL) %d.%d\n", PG_VERSION_NUM / 10000, PG_VERSION_NUM % 100);
+	time_include = INSTR_TIME_GET_DOUBLE(total_time);
+
+	/* tps is about actually executed transactions */
+	tps_include = ntx / time_include;
+	tps_exclude = ntx /
+		(time_include - (INSTR_TIME_GET_DOUBLE(conn_total_time) / nclients));
+
 	/* Report test parameters. */
 	printf("transaction type: %s\n",
 		   num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
@@ -5537,7 +5542,8 @@ printResults(StatsData *total,
 
 	if (throttle_delay && latency_limit)
 		printf("number of transactions skipped: " INT64_FORMAT " (%.3f %%)\n",
-			   total->skipped, 100.0 * total->skipped / total->cnt);
+			   total->skipped,
+			   100.0 * total->skipped / total->cnt);
 
 	if (latency_limit)
 		printf("number of transactions above the %.1f ms latency limit: " INT64_FORMAT "/" INT64_FORMAT " (%.3f %%)\n",
@@ -5550,7 +5556,7 @@ printResults(StatsData *total,
 	{
 		/* no measurement, show average latency computed from run time */
 		printf("latency average = %.3f ms\n",
-			   0.001 * total_duration * nclients / total->cnt);
+			   1000.0 * time_include * nclients / total->cnt);
 	}
 
 	if (throttle_delay)
@@ -5565,25 +5571,8 @@ printResults(StatsData *total,
 			   0.001 * total->lag.sum / total->cnt, 0.001 * total->lag.max);
 	}
 
-	/*
-	 * Under -C/--connect, each transaction incurs a significant connection
-	 * cost, it would not make much sense to ignore it in tps, and it would
-	 * not be tps anyway.
-	 *
-	 * Otherwise connections are made just once at the beginning of the run
-	 * and should not impact performance but for very short run, so they are
-	 * (right)fully ignored in tps.
-	 */
-	if (is_connect)
-	{
-		printf("average connection time = %.3f ms\n", 0.001 * conn_total_duration / total->cnt);
-		printf("tps = %f (including reconnection times)\n", tps);
-	}
-	else
-	{
-		printf("initial connection time = %.3f ms\n", 0.001 * conn_elapsed_duration);
-		printf("tps = %f (without initial connection time)\n", tps);
-	}
+	printf("tps = %f (including connections establishing)\n", tps_include);
+	printf("tps = %f (excluding connections establishing)\n", tps_exclude);
 
 	/* Report per-script/command statistics */
 	if (per_script_stats || report_per_command)
@@ -5604,7 +5593,7 @@ printResults(StatsData *total,
 					   100.0 * sql_script[i].weight / total_weight,
 					   sstats->cnt,
 					   100.0 * sstats->cnt / total->cnt,
-					   (sstats->cnt - sstats->skipped) / bench_duration);
+					   (sstats->cnt - sstats->skipped) / time_include);
 
 				if (throttle_delay && latency_limit && sstats->cnt > 0)
 					printf(" - number of transactions skipped: " INT64_FORMAT " (%.3f%%)\n",
@@ -5652,7 +5641,10 @@ set_random_seed(const char *seed)
 	if (seed == NULL || strcmp(seed, "time") == 0)
 	{
 		/* rely on current time */
-		iseed = pg_time_now();
+		instr_time	now;
+
+		INSTR_TIME_SET_CURRENT(now);
+		iseed = (uint64) INSTR_TIME_GET_MICROSEC(now);
 	}
 	else if (strcmp(seed, "rand") == 0)
 	{
@@ -5755,11 +5747,9 @@ main(int argc, char **argv)
 	CState	   *state;			/* status of clients */
 	TState	   *threads;		/* array of thread */
 
-	pg_time_usec_t
-				start_time,		/* start up time */
-				bench_start = 0,	/* first recorded benchmarking time */
-				conn_total_duration;	/* cumulated connection time in
-										 * threads */
+	instr_time	start_time;		/* start up time */
+	instr_time	total_time;
+	instr_time	conn_total_time;
 	int64		latency_late = 0;
 	StatsData	stats;
 	int			weight;
@@ -6428,55 +6418,67 @@ main(int argc, char **argv)
 	/* all clients must be assigned to a thread */
 	Assert(nclients_dealt == nclients);
 
-	/* get start up time for the whole computation */
-	start_time = pg_time_now();
+	/* get start up time */
+	INSTR_TIME_SET_CURRENT(start_time);
 
 	/* set alarm if duration is specified. */
 	if (duration > 0)
 		setalarm(duration);
 
+	/* set up the barrier that we'll use to synchronize threads */
 	errno = THREAD_BARRIER_INIT(&barrier, nthreads);
 	if (errno != 0)
 		pg_log_fatal("could not initialize barrier: %m");
 
+	/* start threads */
 #ifdef ENABLE_THREAD_SAFETY
-	/* start all threads but thread 0 which is executed directly later */
-	for (i = 1; i < nthreads; i++)
+	for (i = 0; i < nthreads; i++)
 	{
 		TState	   *thread = &threads[i];
 
-		thread->create_time = pg_time_now();
-		errno = THREAD_CREATE(&thread->thread, threadRun, thread);
+		INSTR_TIME_SET_CURRENT(thread->start_time);
+
+		/* compute when to stop */
+		if (duration > 0)
+			end_time = INSTR_TIME_GET_MICROSEC(thread->start_time) +
+				(int64) 1000000 * duration;
 
-		if (errno != 0)
+		/* the first thread (i = 0) is executed by main thread */
+		if (i > 0)
 		{
-			pg_log_fatal("could not create thread: %m");
-			exit(1);
+			errno = THREAD_CREATE(&thread->thread, threadRun, thread);
+
+			if (errno != 0)
+			{
+				pg_log_fatal("could not create thread: %m");
+				exit(1);
+			}
 		}
 	}
 #else
-	Assert(nthreads == 1);
-#endif							/* ENABLE_THREAD_SAFETY */
-
+	INSTR_TIME_SET_CURRENT(threads[0].start_time);
 	/* compute when to stop */
-	threads[0].create_time = pg_time_now();
 	if (duration > 0)
-		end_time = threads[0].create_time + (int64) 1000000 * duration;
-
-	/* run thread 0 directly */
-	(void) threadRun(&threads[0]);
+		end_time = INSTR_TIME_GET_MICROSEC(threads[0].start_time) +
+			(int64) 1000000 * duration;
+#endif							/* ENABLE_THREAD_SAFETY */
 
-	/* wait for other threads and accumulate results */
+	/* wait for threads and accumulate results */
 	initStats(&stats, 0);
-	conn_total_duration = 0;
-
+	INSTR_TIME_SET_ZERO(conn_total_time);
 	for (i = 0; i < nthreads; i++)
 	{
 		TState	   *thread = &threads[i];
 
 #ifdef ENABLE_THREAD_SAFETY
-		if (i > 0)
+		if (i == 0)
+			/* actually run this thread directly in the main thread */
+			(void) threadRun(thread);
+		else
+			/* wait of other threads. should check that 0 is returned? */
 			THREAD_JOIN(thread->thread);
+#else
+		(void) threadRun(thread);
 #endif							/* ENABLE_THREAD_SAFETY */
 
 		for (int j = 0; j < thread->nstate; j++)
@@ -6489,25 +6491,23 @@ main(int argc, char **argv)
 		stats.cnt += thread->stats.cnt;
 		stats.skipped += thread->stats.skipped;
 		latency_late += thread->latency_late;
-		conn_total_duration += thread->conn_duration;
-
-		/* first recorded benchmarking start time */
-		if (bench_start == 0 || thread->bench_start < bench_start)
-			bench_start = thread->bench_start;
+		INSTR_TIME_ADD(conn_total_time, thread->conn_time);
 	}
-
-	/* XXX should this be connection time? */
 	disconnect_all(state, nclients);
 
 	/*
-	 * Beware that performance of short benchmarks with many threads and
-	 * possibly long transactions can be deceptive because threads do not
-	 * start and finish at the exact same time. The total duration computed
-	 * here encompasses all transactions so that tps shown is somehow slightly
-	 * underestimated.
+	 * XXX We compute results as though every client of every thread started
+	 * and finished at the same time.  That model can diverge noticeably from
+	 * reality for a short benchmark run involving relatively many threads.
+	 * The first thread may process notably many transactions before the last
+	 * thread begins.  Improving the model alone would bring limited benefit,
+	 * because performance during those periods of partial thread count can
+	 * easily exceed steady state performance.  This is one of the many ways
+	 * short runs convey deceptive performance figures.
 	 */
-	printResults(&stats, pg_time_now() - bench_start, conn_total_duration,
-				 bench_start - start_time, latency_late);
+	INSTR_TIME_SET_CURRENT(total_time);
+	INSTR_TIME_SUBTRACT(total_time, start_time);
+	printResults(&stats, total_time, conn_total_time, latency_late);
 
 	THREAD_BARRIER_DESTROY(&barrier);
 
@@ -6522,16 +6522,34 @@ threadRun(void *arg)
 {
 	TState	   *thread = (TState *) arg;
 	CState	   *state = thread->state;
-	pg_time_usec_t start;
+	instr_time	start,
+				end;
 	int			nstate = thread->nstate;
 	int			remains = nstate;	/* number of remaining clients */
 	socket_set *sockets = alloc_socket_set(nstate);
-	int64		thread_start,
-				last_report,
-				next_report;
+	int			i;
+
+	/* for reporting progress: */
+	int64		thread_start = INSTR_TIME_GET_MICROSEC(thread->start_time);
+	int64		last_report = thread_start;
+	int64		next_report = last_report + (int64) progress * 1000000;
 	StatsData	last,
 				aggs;
 
+	/*
+	 * Initialize throttling rate target for all of the thread's clients.  It
+	 * might be a little more accurate to reset thread->start_time here too.
+	 * The possible drift seems too small relative to typical throttle delay
+	 * times to worry about it.
+	 */
+	INSTR_TIME_SET_CURRENT(start);
+	thread->throttle_trigger = INSTR_TIME_GET_MICROSEC(start);
+
+	INSTR_TIME_SET_ZERO(thread->conn_time);
+
+	initStats(&aggs, time(NULL));
+	last = aggs;
+
 	/* open log file if requested */
 	if (use_log)
 	{
@@ -6552,23 +6570,10 @@ threadRun(void *arg)
 		}
 	}
 
-	/* explicitly initialize the state machines */
-	for (int i = 0; i < nstate; i++)
-		state[i].state = CSTATE_CHOOSE_SCRIPT;
-
-	/* READY */
-	THREAD_BARRIER_WAIT(&barrier);
-
-	thread_start = pg_time_now();
-	thread->started_time = thread_start;
-	last_report = thread_start;
-	next_report = last_report + (int64) 1000000 * progress;
-
-	/* STEADY */
 	if (!is_connect)
 	{
 		/* make connections to the database before starting */
-		for (int i = 0; i < nstate; i++)
+		for (i = 0; i < nstate; i++)
 		{
 			if ((state[i].con = doConnect()) == NULL)
 			{
@@ -6584,32 +6589,26 @@ threadRun(void *arg)
 				goto done;
 			}
 		}
-
-		/* compute connection delay */
-		thread->conn_duration = pg_time_now() - thread->started_time;
-	}
-	else
-	{
-		/* no connection delay to record */
-		thread->conn_duration = 0;
 	}
 
-	/* GO */
-	THREAD_BARRIER_WAIT(&barrier);
+	/* time after thread and connections set up */
+	INSTR_TIME_SET_CURRENT(thread->conn_time);
+	INSTR_TIME_SUBTRACT(thread->conn_time, thread->start_time);
 
-	start = pg_time_now();
-	thread->bench_start = start;
-	thread->throttle_trigger = start;
+	THREAD_BARRIER_WAIT(&barrier);
 
-	initStats(&aggs, start);
-	last = aggs;
+	/* explicitly initialize the state machines */
+	for (i = 0; i < nstate; i++)
+	{
+		state[i].state = CSTATE_CHOOSE_SCRIPT;
+	}
 
 	/* loop till all clients have terminated */
 	while (remains > 0)
 	{
 		int			nsocks;		/* number of sockets to be waited for */
-		pg_time_usec_t min_usec;
-		pg_time_usec_t now = 0; /* set this only if needed */
+		int64		min_usec;
+		int64		now_usec = 0;	/* set this only if needed */
 
 		/*
 		 * identify which client sockets should be checked for input, and
@@ -6618,21 +6617,27 @@ threadRun(void *arg)
 		clear_socket_set(sockets);
 		nsocks = 0;
 		min_usec = PG_INT64_MAX;
-		for (int i = 0; i < nstate; i++)
+		for (i = 0; i < nstate; i++)
 		{
 			CState	   *st = &state[i];
 
 			if (st->state == CSTATE_SLEEP || st->state == CSTATE_THROTTLE)
 			{
 				/* a nap from the script, or under throttling */
-				pg_time_usec_t this_usec;
+				int64		this_usec;
 
 				/* get current time if needed */
-				pg_time_now_lazy(&now);
+				if (now_usec == 0)
+				{
+					instr_time	now;
+
+					INSTR_TIME_SET_CURRENT(now);
+					now_usec = INSTR_TIME_GET_MICROSEC(now);
+				}
 
 				/* min_usec should be the minimum delay across all clients */
 				this_usec = (st->state == CSTATE_SLEEP ?
-							 st->sleep_until : st->txn_scheduled) - now;
+							 st->sleep_until : st->txn_scheduled) - now_usec;
 				if (min_usec > this_usec)
 					min_usec = this_usec;
 			}
@@ -6667,12 +6672,19 @@ threadRun(void *arg)
 		/* also wake up to print the next progress report on time */
 		if (progress && min_usec > 0 && thread->tid == 0)
 		{
-			pg_time_now_lazy(&now);
+			/* get current time if needed */
+			if (now_usec == 0)
+			{
+				instr_time	now;
 
-			if (now >= next_report)
+				INSTR_TIME_SET_CURRENT(now);
+				now_usec = INSTR_TIME_GET_MICROSEC(now);
+			}
+
+			if (now_usec >= next_report)
 				min_usec = 0;
-			else if ((next_report - now) < min_usec)
-				min_usec = next_report - now;
+			else if ((next_report - now_usec) < min_usec)
+				min_usec = next_report - now_usec;
 		}
 
 		/*
@@ -6721,7 +6733,7 @@ threadRun(void *arg)
 
 		/* ok, advance the state machine of each connection */
 		nsocks = 0;
-		for (int i = 0; i < nstate; i++)
+		for (i = 0; i < nstate; i++)
 		{
 			CState	   *st = &state[i];
 
@@ -6759,8 +6771,11 @@ threadRun(void *arg)
 		/* progress report is made by thread 0 for all threads */
 		if (progress && thread->tid == 0)
 		{
-			pg_time_usec_t now = pg_time_now();
+			instr_time	now_time;
+			int64		now;
 
+			INSTR_TIME_SET_CURRENT(now_time);
+			now = INSTR_TIME_GET_MICROSEC(now_time);
 			if (now >= next_report)
 			{
 				/*
@@ -6778,17 +6793,17 @@ threadRun(void *arg)
 				 */
 				do
 				{
-					next_report += (int64) 1000000 * progress;
+					next_report += (int64) progress * 1000000;
 				} while (now >= next_report);
 			}
 		}
 	}
 
 done:
-	start = pg_time_now();
+	INSTR_TIME_SET_CURRENT(start);
 	disconnect_all(state, nstate);
-	thread->conn_duration += pg_time_now() - start;
-
+	INSTR_TIME_SET_CURRENT(end);
+	INSTR_TIME_ACCUM_DIFF(thread->conn_time, end, start);
 	if (thread->logfile)
 	{
 		if (agg_interval > 0)
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index abdb08319c..ce7963a674 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3347,7 +3347,6 @@ pg_sha512_ctx
 pg_snapshot
 pg_stack_base_t
 pg_time_t
-pg_time_usec_t
 pg_tz
 pg_tz_cache
 pg_tzenum
-- 
2.30.2