diff --git a/src/bin/pg_test_timing/pg_test_timing.c b/src/bin/pg_test_timing/pg_test_timing.c
index c29d6f8762..20b2785f50 100644
--- a/src/bin/pg_test_timing/pg_test_timing.c
+++ b/src/bin/pg_test_timing/pg_test_timing.c
@@ -19,9 +19,12 @@ static void handle_args(int argc, char *argv[]);
 static uint64 test_timing(unsigned int duration);
 static void output(uint64 loop_count);
 
-/* record duration in powers of 2 microseconds */
+/* record duration in powers of 2 nanoseconds */
 long long int histogram[32];
 
+/* record duration of first 128 ns directly */
+long long int direct_histogram[128];
+
 int
 main(int argc, char *argv[])
 {
@@ -130,10 +133,10 @@ test_timing(unsigned int duration)
 				end_time,
 				temp;
 
-	total_time = duration > 0 ? duration * INT64CONST(1000000) : 0;
+	total_time = duration > 0 ? duration * INT64CONST(1000000000) : 0;
 
 	INSTR_TIME_SET_CURRENT(start_time);
-	cur = INSTR_TIME_GET_MICROSEC(start_time);
+	cur = INSTR_TIME_GET_NANOSEC(start_time);
 
 	while (time_elapsed < total_time)
 	{
@@ -142,7 +145,7 @@ test_timing(unsigned int duration)
 
 		prev = cur;
 		INSTR_TIME_SET_CURRENT(temp);
-		cur = INSTR_TIME_GET_MICROSEC(temp);
+		cur = INSTR_TIME_GET_NANOSEC(temp);
 		diff = cur - prev;
 
 		/* Did time go backwards? */
@@ -153,19 +156,25 @@ test_timing(unsigned int duration)
 			exit(1);
 		}
 
+		if(likely(diff < 128))
+			direct_histogram[diff]++;
+
+#if defined(__has_builtin) && __has_builtin(__builtin_clz)
+		bits = 32 - __builtin_clz(diff);
+#else
 		/* What is the highest bit in the time diff? */
 		while (diff)
 		{
 			diff >>= 1;
 			bits++;
 		}
-
+#endif
 		/* Update appropriate duration bucket */
 		histogram[bits]++;
 
 		loop_count++;
 		INSTR_TIME_SUBTRACT(temp, start_time);
-		time_elapsed = INSTR_TIME_GET_MICROSEC(temp);
+		time_elapsed = INSTR_TIME_GET_NANOSEC(temp);
 	}
 
 	INSTR_TIME_SET_CURRENT(end_time);
@@ -183,26 +192,46 @@ output(uint64 loop_count)
 {
 	int64		max_bit = 31,
 				i;
-	char	   *header1 = _("< us");
+	char	   *header1 = _("<= ns");
 	char	   *header2 = /* xgettext:no-c-format */ _("% of total");
+	char	   *header2a = /* xgettext:no-c-format */ _("running %");
 	char	   *header3 = _("count");
 	int			len1 = strlen(header1);
 	int			len2 = strlen(header2);
+	int			len2a = strlen(header2a);
 	int			len3 = strlen(header3);
+	float       rprct;
 
 	/* find highest bit value */
 	while (max_bit > 0 && histogram[max_bit] == 0)
 		max_bit--;
 
 	printf(_("Histogram of timing durations:\n"));
-	printf("%*s   %*s %*s\n",
-		   Max(6, len1), header1,
+	printf("%*s   %*s %*s %*s\n",
+		   Max(8, len1), header1,
 		   Max(10, len2), header2,
+		   Max(10, len2a), header2a,
 		   Max(10, len3), header3);
 
-	for (i = 0; i <= max_bit; i++)
-		printf("%*ld    %*.5f %*lld\n",
-			   Max(6, len1), 1l << i,
-			   Max(10, len2) - 1, (double) histogram[i] * 100 / loop_count,
-			   Max(10, len3), histogram[i]);
+	for (i = 0, rprct=0; i <= max_bit; i++){
+		rprct += (double) histogram[i] * 100 / loop_count;
+		printf("%*ld    %*.4f  %*.4f %*lld\n",
+			Max(8, len1), (1l << i) - 1,
+			Max(10, len2) - 1, (double) histogram[i] * 100 / loop_count,
+			Max(10, len2a) - 1, rprct,
+			Max(10, len3), histogram[i]);
+	}
+
+    printf("First 128 nanoseconds:\n");
+	for (i = 0, rprct=0; i < 128; i++){
+		rprct += (double) direct_histogram[i] * 100 / loop_count;
+	    if (direct_histogram[i])
+			printf("%*ld    %*.4f  %*.4f %*lld\n",
+			   Max(8, len1), i,
+			   Max(10, len2) - 1, (double) direct_histogram[i] * 100 / loop_count,
+			   Max(10, len2a) - 1, rprct,
+			   Max(10, len3), direct_histogram[i]);
+	}
+
 }
+
