Use static inline functions for Float <-> Datum conversions

Started by Heikki Linnakangasover 9 years ago4 messages
#1Heikki Linnakangas
hlinnaka@iki.fi
1 attachment(s)

Hi,

Now that we are OK with static inline functions, we can save some cycles
from floating-point functions, by turning Float4GetDatum,
Float8GetDatum, and DatumGetFloat8 into static inlines. They are only a
few instructions, but couldn't be implemented as macros before, because
they need a local union-variable for the conversion.

That can add up to significant speedups with float-heavy queries. For
example:

create table floats as select g::float8 as a, g::float8 as b, g::float8
as c from generate_series(1, 1000000) g;

select sum(a+b+c+1) from floats;

The sum query is about 4% faster on my laptop with this patch.

- Heikki

Attachments:

0001-Use-static-inline-functions-for-float-Datum-conversi.patchapplication/x-patch; name=0001-Use-static-inline-functions-for-float-Datum-conversi.patchDownload
From 8ed0f619093b16d113a0f23ee903268799eb2212 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Wed, 31 Aug 2016 13:15:06 +0300
Subject: [PATCH] Use static inline functions for float <-> Datum conversions.

Now that we are OK with using static inline functions, we can use them
to avoid function call overhead for pass-by-val versions of Float4GetDatum,
DatumGetFloat8, and Float8GetDatum. Those functions are only a few CPU
instructions long, but they could not be written into macros previously,
because we needed a local union variable for the conversion.

I kept the pass-by-ref versions as regular functions. Those are very simple
too, but they call palloc() anyway, so shaving a few instructions from the
function call doesn't seem so important there.

diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c
index 7e6a60d..7aae350 100644
--- a/src/backend/utils/fmgr/fmgr.c
+++ b/src/backend/utils/fmgr/fmgr.c
@@ -2126,10 +2126,7 @@ fmgr(Oid procedureId,...)
  *
  * int8, float4, and float8 can be passed by value if Datum is wide enough.
  * (For backwards-compatibility reasons, we allow pass-by-ref to be chosen
- * at compile time even if pass-by-val is possible.)  For the float types,
- * we need a support routine even if we are passing by value, because many
- * machines pass int and float function parameters/results differently;
- * so we need to play weird games with unions.
+ * at compile time even if pass-by-val is possible.)
  *
  * Note: there is only one switch controlling the pass-by-value option for
  * both int8 and float8; this is to avoid making things unduly complicated
@@ -2149,77 +2146,29 @@ Int64GetDatum(int64 X)
 }
 #endif   /* USE_FLOAT8_BYVAL */
 
+#ifndef USE_FLOAT4_BYVAL
+
 Datum
 Float4GetDatum(float4 X)
 {
-#ifdef USE_FLOAT4_BYVAL
-	union
-	{
-		float4		value;
-		int32		retval;
-	}			myunion;
-
-	myunion.value = X;
-	return SET_4_BYTES(myunion.retval);
-#else
 	float4	   *retval = (float4 *) palloc(sizeof(float4));
 
 	*retval = X;
 	return PointerGetDatum(retval);
-#endif
 }
+#endif
 
-#ifdef USE_FLOAT4_BYVAL
-
-float4
-DatumGetFloat4(Datum X)
-{
-	union
-	{
-		int32		value;
-		float4		retval;
-	}			myunion;
-
-	myunion.value = GET_4_BYTES(X);
-	return myunion.retval;
-}
-#endif   /* USE_FLOAT4_BYVAL */
+#ifndef USE_FLOAT8_BYVAL
 
 Datum
 Float8GetDatum(float8 X)
 {
-#ifdef USE_FLOAT8_BYVAL
-	union
-	{
-		float8		value;
-		int64		retval;
-	}			myunion;
-
-	myunion.value = X;
-	return SET_8_BYTES(myunion.retval);
-#else
 	float8	   *retval = (float8 *) palloc(sizeof(float8));
 
 	*retval = X;
 	return PointerGetDatum(retval);
-#endif
 }
-
-#ifdef USE_FLOAT8_BYVAL
-
-float8
-DatumGetFloat8(Datum X)
-{
-	union
-	{
-		int64		value;
-		float8		retval;
-	}			myunion;
-
-	myunion.value = GET_8_BYTES(X);
-	return myunion.retval;
-}
-#endif   /* USE_FLOAT8_BYVAL */
+#endif
 
 
 /*-------------------------------------------------------------------------
diff --git a/src/include/postgres.h b/src/include/postgres.h
index fb1933f..d999013 100644
--- a/src/include/postgres.h
+++ b/src/include/postgres.h
@@ -657,6 +657,14 @@ extern Datum Int64GetDatum(int64 X);
 #endif
 
 /*
+ * Float <-> Datum conversions
+ *
+ * These have to be implemented as inline functions rather than macros, when
+ * passing by value, because many machines pass int and float function
+ * parameters/results differently; so we need to play weird games with unions.
+ */
+
+/*
  * DatumGetFloat4
  *		Returns 4-byte floating point value of a datum.
  *
@@ -664,7 +672,18 @@ extern Datum Int64GetDatum(int64 X);
  */
 
 #ifdef USE_FLOAT4_BYVAL
-extern float4 DatumGetFloat4(Datum X);
+static inline float4
+DatumGetFloat4(Datum X)
+{
+	union
+	{
+		int32		value;
+		float4		retval;
+	}			myunion;
+
+	myunion.value = GET_4_BYTES(X);
+	return myunion.retval;
+}
 #else
 #define DatumGetFloat4(X) (* ((float4 *) DatumGetPointer(X)))
 #endif
@@ -676,8 +695,22 @@ extern float4 DatumGetFloat4(Datum X);
  * Note: if float4 is pass by reference, this function returns a reference
  * to palloc'd space.
  */
+#ifdef USE_FLOAT4_BYVAL
+static inline Datum
+Float4GetDatum(float4 X)
+{
+	union
+	{
+		float4		value;
+		int32		retval;
+	}			myunion;
 
+	myunion.value = X;
+	return SET_4_BYTES(myunion.retval);
+}
+#else
 extern Datum Float4GetDatum(float4 X);
+#endif
 
 /*
  * DatumGetFloat8
@@ -687,7 +720,18 @@ extern Datum Float4GetDatum(float4 X);
  */
 
 #ifdef USE_FLOAT8_BYVAL
-extern float8 DatumGetFloat8(Datum X);
+static inline float8
+DatumGetFloat8(Datum X)
+{
+	union
+	{
+		int64		value;
+		float8		retval;
+	}			myunion;
+
+	myunion.value = GET_8_BYTES(X);
+	return myunion.retval;
+}
 #else
 #define DatumGetFloat8(X) (* ((float8 *) DatumGetPointer(X)))
 #endif
@@ -700,7 +744,22 @@ extern float8 DatumGetFloat8(Datum X);
  * to palloc'd space.
  */
 
+#ifdef USE_FLOAT8_BYVAL
+static inline Datum
+Float8GetDatum(float8 X)
+{
+	union
+	{
+		float8		value;
+		int64		retval;
+	}			myunion;
+
+	myunion.value = X;
+	return SET_8_BYTES(myunion.retval);
+}
+#else
 extern Datum Float8GetDatum(float8 X);
+#endif
 
 
 /*
-- 
2.9.3

#2Tom Lane
tgl@sss.pgh.pa.us
In reply to: Heikki Linnakangas (#1)
Re: Use static inline functions for Float <-> Datum conversions

Heikki Linnakangas <hlinnaka@iki.fi> writes:

Now that we are OK with static inline functions, we can save some cycles
from floating-point functions, by turning Float4GetDatum,
Float8GetDatum, and DatumGetFloat8 into static inlines.

Looks good to me.

I wonder whether there is a compiler-dependent way of avoiding the union
trick ... or maybe gcc is already smart enough that it doesn't matter?

regards, tom lane

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#3Heikki Linnakangas
hlinnaka@iki.fi
In reply to: Tom Lane (#2)
Re: Use static inline functions for Float <-> Datum conversions

On 08/31/2016 02:38 PM, Tom Lane wrote:

Heikki Linnakangas <hlinnaka@iki.fi> writes:

Now that we are OK with static inline functions, we can save some cycles
from floating-point functions, by turning Float4GetDatum,
Float8GetDatum, and DatumGetFloat8 into static inlines.

Looks good to me.

Ok, will push.

I wonder whether there is a compiler-dependent way of avoiding the union
trick ... or maybe gcc is already smart enough that it doesn't matter?

It seems to compile into a single instruction, so it can't get any
better from a performance point of view.

float8pl:
.LFB79:
.loc 1 871 0
.cfi_startproc
.LVL297:
.LBB959:
.LBB960:
.loc 2 733 0
movsd 40(%rdi), %xmm2
.LBE960:
.LBE959:
.LBB961:
.LBB962:
movsd 32(%rdi), %xmm1
...

A union is probably what language pedantics would prefer anyway, and
anything else would be more of a trick.

- Heikki

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#4Tom Lane
tgl@sss.pgh.pa.us
In reply to: Heikki Linnakangas (#3)
Re: Use static inline functions for Float <-> Datum conversions

Heikki Linnakangas <hlinnaka@iki.fi> writes:

On 08/31/2016 02:38 PM, Tom Lane wrote:

I wonder whether there is a compiler-dependent way of avoiding the union
trick ... or maybe gcc is already smart enough that it doesn't matter?

It seems to compile into a single instruction, so it can't get any
better from a performance point of view.

Yeah, confirmed here. On my not-real-new gcc (version 4.4.7, which
ships with RHEL6), these test functions:

Datum
compare_int8(PG_FUNCTION_ARGS)
{
int64 x = PG_GETARG_INT64(0);
int64 y = PG_GETARG_INT64(1);

PG_RETURN_BOOL(x < y);
}

Datum
compare_float8(PG_FUNCTION_ARGS)
{
double x = PG_GETARG_FLOAT8(0);
double y = PG_GETARG_FLOAT8(1);

PG_RETURN_BOOL(x < y);
}

compile into this (at -O2):

compare_int8:
.cfi_startproc
movq 40(%rdi), %rax
cmpq %rax, 32(%rdi)
setl %al
movzbl %al, %eax
ret
.cfi_endproc

compare_float8:
.cfi_startproc
movsd 40(%rdi), %xmm0
xorl %eax, %eax
ucomisd 32(%rdi), %xmm0
seta %al
ret
.cfi_endproc

(Not sure why the compiler does the widening of the comparison result
differently, but it doesn't look like it matters.) Before this patch,
that looked like:

compare_float8:
.cfi_startproc
pushq %rbx
.cfi_def_cfa_offset 16
.cfi_offset 3, -16
movq %rdi, %rbx
subq $16, %rsp
.cfi_def_cfa_offset 32
movq 32(%rdi), %rdi
call DatumGetFloat8
movq 40(%rbx), %rdi
movsd %xmm0, 8(%rsp)
call DatumGetFloat8
xorl %eax, %eax
ucomisd 8(%rsp), %xmm0
seta %al
addq $16, %rsp
.cfi_def_cfa_offset 16
popq %rbx
.cfi_def_cfa_offset 8
ret
.cfi_endproc

Nice.

regards, tom lane

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers