From 29a12302bab6b2fb8c2475834510b90c4a6197ce Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Thu, 11 Jun 2020 19:38:18 -0700
Subject: [PATCH v1 2/2] WIP: Use cpu reference cycles, via rdtsc, to measure
 time for instrumentation.

---
 src/include/portability/instr_time.h | 68 ++++++++++++++++++++++++----
 1 file changed, 60 insertions(+), 8 deletions(-)

diff --git a/src/include/portability/instr_time.h b/src/include/portability/instr_time.h
index fc058d548a8..8b2f9a2e707 100644
--- a/src/include/portability/instr_time.h
+++ b/src/include/portability/instr_time.h
@@ -83,7 +83,9 @@
 #define PG_INSTR_CLOCK	CLOCK_REALTIME
 #endif
 
+/* time in baseline cpu cycles */
 typedef int64 instr_time;
+
 #define NS_PER_S INT64CONST(1000000000)
 #define US_PER_S INT64CONST(1000000)
 #define MS_PER_S INT64CONST(1000)
@@ -95,17 +97,67 @@ typedef int64 instr_time;
 
 #define INSTR_TIME_SET_ZERO(t)	((t) = 0)
 
-static inline instr_time pg_clock_gettime_ns(void)
+#include <x86intrin.h>
+#include <cpuid.h>
+
+/*
+ * Return what the number of cycles needs to be multiplied with to end up with
+ * seconds.
+ *
+ * FIXME: The cold portion should probably be out-of-line. And it'd be better
+ * to not recompute this in every file that uses this. Best would probably be
+ * to require explicit initialization of cycles_to_sec, because having a
+ * branch really is unnecessary.
+ *
+ * FIXME: We should probably not unnecessarily use floating point math
+ * here. And it's likely that the numbers are small enough that we are running
+ * into floating point inaccuracies already. Probably worthwhile to be a good
+ * bit smarter.
+ *
+ * FIXME: This would need to be conditional, with a fallback to something not
+ * rdtsc based.
+ */
+static inline double __attribute__((const))
+get_cycles_to_sec(void)
 {
-	struct timespec tmp;
+	static double cycles_to_sec = 0;
 
-	clock_gettime(PG_INSTR_CLOCK, &tmp);
+	/*
+	 * Compute baseline cpu peformance, determines speed at which rdtsc advances
+	 */
+	if (unlikely(cycles_to_sec == 0))
+	{
+		uint32 cpuinfo[4] = {0};
 
-	return tmp.tv_sec * NS_PER_S + tmp.tv_nsec;
+		__get_cpuid(0x16, cpuinfo, cpuinfo + 1, cpuinfo + 2, cpuinfo + 3);
+		cycles_to_sec = 1 / ((double) cpuinfo[0] * 1000 * 1000);
+	}
+
+	return cycles_to_sec;
+}
+
+static inline instr_time pg_clock_gettime_ref_cycles(void)
+{
+	/*
+	 * The rdtscp waits for all in-flight instructions to finish (but allows
+	 * later instructions to start concurrently). That's good for some timing
+	 * situations (when the time is supposed to cover all the work), but
+	 * terrible for others (when sub-parts of work are measured, because then
+	 * the pipeline stall due to the wait change the overall timing).
+	 */
+#if 0
+	unsigned int aux;
+	int64 tsc = __rdtscp(&aux);
+
+	return tsc;
+#else
+
+	return __rdtsc();
+#endif
 }
 
 #define INSTR_TIME_SET_CURRENT(t) \
-	(t) = pg_clock_gettime_ns()
+	(t) = pg_clock_gettime_ref_cycles()
 
 #define INSTR_TIME_ADD(x,y) \
 	do { \
@@ -123,13 +175,13 @@ static inline instr_time pg_clock_gettime_ns(void)
 	} while (0)
 
 #define INSTR_TIME_GET_DOUBLE(t) \
-	((double) (t) / NS_PER_S)
+	((double) (t) * get_cycles_to_sec())
 
 #define INSTR_TIME_GET_MILLISEC(t) \
-	((double) (t) / NS_PER_MS)
+	((double) (t) * (get_cycles_to_sec() * MS_PER_S))
 
 #define INSTR_TIME_GET_MICROSEC(t) \
-	((double) (t) / NS_PER_US)
+	((double) (t) * (get_cycles_to_sec() * US_PER_S))
 
 #else							/* !HAVE_CLOCK_GETTIME */
 
-- 
2.25.0.114.g5b0ca878e0

