From 48493d8806f11e92b5d585b217e0e13b69ecdeb0 Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <peter.geoghegan86@gmail.com>
Date: Fri, 3 Jul 2015 13:48:08 -0700
Subject: [PATCH 1/3] Use unsigned integer abbreviated keys for text

Add DatumToLittleEndian() macro to allow string-like types to represent
abbreviated keys as unsigned integers.  DatumToLittleEndian() will
perform a byteswap on little-endian platforms only.

Using the new macro, arrange for text abbreviated keys to be represented
such that a simple 3-way unsigned integer comparison using the new
representation is correct.  Replace the comparator along those lines.
This is faster on at least some platforms, since unsigned integer
comparisons are now used rather than memcmp() during sorting proper.
---
 src/backend/utils/adt/varlena.c | 28 +++++++++++++++++++---------
 src/include/port/pg_bswap.h     | 22 ++++++++++++++++++++++
 2 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 2fbbf54..fadd827 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -26,6 +26,7 @@
 #include "libpq/pqformat.h"
 #include "miscadmin.h"
 #include "parser/scansup.h"
+#include "port/pg_bswap.h"
 #include "regex/regex.h"
 #include "utils/builtins.h"
 #include "utils/bytea.h"
@@ -1967,25 +1968,25 @@ done:
 static int
 bttextcmp_abbrev(Datum x, Datum y, SortSupport ssup)
 {
-	char	   *a = (char *) &x;
-	char	   *b = (char *) &y;
-	int			result;
-
-	result = memcmp(a, b, sizeof(Datum));
-
 	/*
-	 * When result = 0, the core system will call bttextfastcmp_c() or
+	 * When 0 is returned, the core system will call bttextfastcmp_c() or
 	 * bttextfastcmp_locale().  Even a strcmp() on two non-truncated strxfrm()
 	 * blobs cannot indicate *equality* authoritatively, for the same reason
 	 * that there is a strcoll() tie-breaker call to strcmp() in varstr_cmp().
 	 */
-	return result;
+	if (x > y)
+		return 1;
+	else if (x == y)
+		return 0;
+	else
+		return -1;
 }
 
 /*
  * Conversion routine for sortsupport.  Converts original text to abbreviated
  * key representation.  Our encoding strategy is simple -- pack the first 8
- * bytes of a strxfrm() blob into a Datum.
+ * bytes of a strxfrm() blob into a Datum (on little-endian machines, the 8
+ * bytes are stored in reverse order), and treat it as an unsigned integer.
  */
 static Datum
 bttext_abbrev_convert(Datum original, SortSupport ssup)
@@ -2104,6 +2105,15 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
 
 	addHyperLogLog(&tss->abbr_card, hash);
 
+	/*
+	 * Byteswap on little-endian machines.
+	 *
+	 * This is needed so that bttextcmp_abbrev() (an unsigned integer 3-way
+	 * comparator) works correctly on all platforms.  If this was skipped, then
+	 * the comparator would have to call memcmp() with a pair of pointers to
+	 * the first byte of each abbreviated key, which is slower.
+	 */
+	res = DatumToLittleEndian(res);
 	/* Don't leak memory here */
 	if (PointerGetDatum(authoritative) != original)
 		pfree(authoritative);
diff --git a/src/include/port/pg_bswap.h b/src/include/port/pg_bswap.h
index 6555942..ac060ea 100644
--- a/src/include/port/pg_bswap.h
+++ b/src/include/port/pg_bswap.h
@@ -43,4 +43,26 @@
 					((x >> 56) & 0x00000000000000ffUL))
 #endif	/* HAVE__BUILTIN_BSWAP64 */
 
+/*
+ * Rearrange the bytes of a Datum into little-endian order from big-endian
+ * order.  On big-endian machines, this does nothing at all.  Note that the C
+ * type Datum is an unsigned integer type on all platforms.
+ *
+ * One possible application of the DatumToLittleEndian() macro is to make
+ * bitwise comparisons cheaper.  A simple 3-way comparison of Datums
+ * transformed by the macro (based on native, unsigned comparisons) will return
+ * the same result as a memcmp() of the corresponding original Datums, but can
+ * be much cheaper.  It's generally safe to do this on big-endian systems
+ * without any special transformation occurring first.
+ */
+#ifdef WORDS_BIGENDIAN
+#define		DatumToLittleEndian(x)	(x)
+#else											/* !WORDS_BIGENDIAN */
+#if SIZEOF_DATUM == 8
+#define		DatumToLittleEndian(x)	BSWAP64(x)
+#else											/* SIZEOF_DATUM != 8 */
+#define		DatumToLittleEndian(x)	BSWAP32(x)
+#endif											/* SIZEOF_DATUM == 8 */
+#endif											/* WORDS_BIGENDIAN */
+
 #endif   /* PG_BSWAP_H */
-- 
1.9.1

