From a0348e8dedb9a7ee53af8f88adbba0e5aeff577d Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Fri, 9 Jan 2026 19:31:19 +0200
Subject: [PATCH 1/1] Make the deduplication in ginExtractEntries() a little
 faster

The idea is to remove NULLs from the array first, and use qsort to
deduplicate only the non-NULL items. That simplifies the comparison
function a little.
---
 src/backend/access/gin/ginutil.c | 139 +++++++++++++------------------
 src/include/access/gin_private.h |   2 +-
 2 files changed, 61 insertions(+), 80 deletions(-)

diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index d205093e21d..29966b9d05b 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -387,19 +387,6 @@ GinInitMetabuffer(Buffer b)
 		((char *) metadata + sizeof(GinMetaPageData)) - (char *) page;
 }
 
-/*
- * Support for sorting key datums in ginExtractEntries
- *
- * Note: we only have to worry about null and not-null keys here;
- * ginExtractEntries never generates more than one placeholder null,
- * so it doesn't have to sort those.
- */
-typedef struct
-{
-	Datum		datum;
-	bool		isnull;
-} keyEntryData;
-
 typedef struct
 {
 	FmgrInfo   *cmpDatumFunc;
@@ -410,24 +397,14 @@ typedef struct
 static int
 cmpEntries(const void *a, const void *b, void *arg)
 {
-	const keyEntryData *aa = (const keyEntryData *) a;
-	const keyEntryData *bb = (const keyEntryData *) b;
+	const Datum *aa = (const Datum *) a;
+	const Datum *bb = (const Datum *) b;
 	cmpEntriesArg *data = (cmpEntriesArg *) arg;
 	int			res;
 
-	if (aa->isnull)
-	{
-		if (bb->isnull)
-			res = 0;			/* NULL "=" NULL */
-		else
-			res = 1;			/* NULL ">" not-NULL */
-	}
-	else if (bb->isnull)
-		res = -1;				/* not-NULL "<" NULL */
-	else
-		res = DatumGetInt32(FunctionCall2Coll(data->cmpDatumFunc,
-											  data->collation,
-											  aa->datum, bb->datum));
+	res = DatumGetInt32(FunctionCall2Coll(data->cmpDatumFunc,
+										  data->collation,
+										  *aa, *bb));
 
 	/*
 	 * Detect if we have any duplicates.  If there are equal keys, qsort must
@@ -450,11 +427,13 @@ cmpEntries(const void *a, const void *b, void *arg)
 Datum *
 ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
 				  Datum value, bool isNull,
-				  int32 *nentries, GinNullCategory **categories)
+				  int32 *nentries_p, GinNullCategory **categories_p)
 {
 	Datum	   *entries;
 	bool	   *nullFlags;
-	int32		i;
+	GinNullCategory *categories;
+	bool		hasNull;
+	int32		nentries;
 
 	/*
 	 * We don't call the extractValueFn on a null item.  Instead generate a
@@ -462,42 +441,60 @@ ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
 	 */
 	if (isNull)
 	{
-		*nentries = 1;
+		*nentries_p = 1;
 		entries = palloc_object(Datum);
 		entries[0] = (Datum) 0;
-		*categories = palloc_object(GinNullCategory);
-		(*categories)[0] = GIN_CAT_NULL_ITEM;
+		*categories_p = palloc_object(GinNullCategory);
+		(*categories_p)[0] = GIN_CAT_NULL_ITEM;
 		return entries;
 	}
 
 	/* OK, call the opclass's extractValueFn */
 	nullFlags = NULL;			/* in case extractValue doesn't set it */
+	nentries = 0;
 	entries = (Datum *)
 		DatumGetPointer(FunctionCall3Coll(&ginstate->extractValueFn[attnum - 1],
 										  ginstate->supportCollation[attnum - 1],
 										  value,
-										  PointerGetDatum(nentries),
+										  PointerGetDatum(&nentries),
 										  PointerGetDatum(&nullFlags)));
 
 	/*
 	 * Generate a placeholder if the item contained no keys.
 	 */
-	if (entries == NULL || *nentries <= 0)
+	if (entries == NULL || nentries <= 0)
 	{
-		*nentries = 1;
+		*nentries_p = 1;
 		entries = palloc_object(Datum);
 		entries[0] = (Datum) 0;
-		*categories = palloc_object(GinNullCategory);
-		(*categories)[0] = GIN_CAT_EMPTY_ITEM;
+		*categories_p = palloc_object(GinNullCategory);
+		(*categories_p)[0] = GIN_CAT_EMPTY_ITEM;
 		return entries;
 	}
 
 	/*
-	 * If the extractValueFn didn't create a nullFlags array, create one,
-	 * assuming that everything's non-null.
+	 * Scan the items for any NULLs.  All NULLs are considered equal, so we
+	 * just need to check and remember if there are any.  We remove them from
+	 * the array here, and if necessary, put back one NULL entry to represent
+	 * them all after deduplication.
 	 */
-	if (nullFlags == NULL)
-		nullFlags = (bool *) palloc0(*nentries * sizeof(bool));
+	hasNull = false;
+	if (nullFlags)
+	{
+		int32		numNonNulls = 0;
+
+		for (int32 i = 0; i < nentries; i++)
+		{
+			if (nullFlags[i])
+				hasNull = true;
+			else
+			{
+				entries[numNonNulls] = entries[i];
+				numNonNulls++;
+			}
+		}
+		nentries = numNonNulls;
+	}
 
 	/*
 	 * If there's more than one key, sort and unique-ify.
@@ -506,63 +503,47 @@ ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
 	 * pretty bad too.  For small numbers of keys it'd likely be better to use
 	 * a simple insertion sort.
 	 */
-	if (*nentries > 1)
+	if (nentries > 1)
 	{
-		keyEntryData *keydata;
 		cmpEntriesArg arg;
 
-		keydata = palloc_array(keyEntryData, *nentries);
-		for (i = 0; i < *nentries; i++)
-		{
-			keydata[i].datum = entries[i];
-			keydata[i].isnull = nullFlags[i];
-		}
-
 		arg.cmpDatumFunc = &ginstate->compareFn[attnum - 1];
 		arg.collation = ginstate->supportCollation[attnum - 1];
 		arg.haveDups = false;
-		qsort_arg(keydata, *nentries, sizeof(keyEntryData),
+		qsort_arg(entries, nentries, sizeof(Datum),
 				  cmpEntries, &arg);
 
 		if (arg.haveDups)
 		{
 			/* there are duplicates, must get rid of 'em */
-			int32		j;
+			int32		j = 1;
 
-			entries[0] = keydata[0].datum;
-			nullFlags[0] = keydata[0].isnull;
-			j = 1;
-			for (i = 1; i < *nentries; i++)
+			for (int32 i = 1; i < nentries; i++)
 			{
-				if (cmpEntries(&keydata[i - 1], &keydata[i], &arg) != 0)
-				{
-					entries[j] = keydata[i].datum;
-					nullFlags[j] = keydata[i].isnull;
-					j++;
-				}
+				if (cmpEntries(&entries[i - 1], &entries[i], &arg) != 0)
+					entries[j++] = entries[i];
 			}
-			*nentries = j;
+			nentries = j;
 		}
-		else
-		{
-			/* easy, no duplicates */
-			for (i = 0; i < *nentries; i++)
-			{
-				entries[i] = keydata[i].datum;
-				nullFlags[i] = keydata[i].isnull;
-			}
-		}
-
-		pfree(keydata);
 	}
 
 	/*
-	 * Create GinNullCategory representation from nullFlags.
+	 * Create GinNullCategory representation.
 	 */
-	*categories = (GinNullCategory *) palloc0(*nentries * sizeof(GinNullCategory));
-	for (i = 0; i < *nentries; i++)
-		(*categories)[i] = (nullFlags[i] ? GIN_CAT_NULL_KEY : GIN_CAT_NORM_KEY);
+	categories = (GinNullCategory *) palloc((nentries + (hasNull ? 1 : 0)) * sizeof(GinNullCategory));
+	for (int32 i = 0; i < nentries; i++)
+		categories[i] = GIN_CAT_NORM_KEY;
+
+	/* Put back a NULL entry, if there were any */
+	if (hasNull)
+	{
+		entries[nentries] = (Datum) 0;
+		categories[nentries] = GIN_CAT_NULL_KEY;
+		nentries++;
+	}
 
+	*nentries_p = nentries;
+	*categories_p = categories;
 	return entries;
 }
 
diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h
index e155045ce8a..1e97e6cb3c9 100644
--- a/src/include/access/gin_private.h
+++ b/src/include/access/gin_private.h
@@ -99,7 +99,7 @@ extern void GinInitPage(Page page, uint32 f, Size pageSize);
 extern void GinInitMetabuffer(Buffer b);
 extern Datum *ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
 								Datum value, bool isNull,
-								int32 *nentries, GinNullCategory **categories);
+								int32 *nentries_p, GinNullCategory **categories_p);
 
 extern OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple);
 extern Datum gintuple_get_key(GinState *ginstate, IndexTuple tuple,
-- 
2.47.3

