From 0cdc87640cf2a2d8c946aff1669706f99280c555 Mon Sep 17 00:00:00 2001
From: David Geier <geidav.pg@gmail.com>
Date: Mon, 10 Nov 2025 14:40:37 +0100
Subject: [PATCH v2 4/8] Avoid dedup and sort in ginExtractEntries

---
 contrib/pg_trgm/trgm_gin.c       |  2 ++
 doc/src/sgml/gin.sgml            |  7 ++++++-
 src/backend/access/gin/ginutil.c | 32 +++++++++++++++++++-------------
 3 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/contrib/pg_trgm/trgm_gin.c b/contrib/pg_trgm/trgm_gin.c
index 66ff6adde99..862e650efec 100644
--- a/contrib/pg_trgm/trgm_gin.c
+++ b/contrib/pg_trgm/trgm_gin.c
@@ -36,10 +36,12 @@ gin_extract_value_trgm(PG_FUNCTION_ARGS)
 {
 	text	   *val = (text *) PG_GETARG_TEXT_PP(0);
 	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);
+	bool	   *uniqueAndSorted = (bool *) PG_GETARG_POINTER(3);
 	Datum	   *entries = NULL;
 	TRGM	   *trg;
 	int32		trglen;
 
+	*uniqueAndSorted = true;
 	*nentries = 0;
 
 	trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
diff --git a/doc/src/sgml/gin.sgml b/doc/src/sgml/gin.sgml
index 82410b1fbdf..b96478731f8 100644
--- a/doc/src/sgml/gin.sgml
+++ b/doc/src/sgml/gin.sgml
@@ -167,7 +167,7 @@
   <variablelist>
     <varlistentry>
      <term><function>Datum *extractValue(Datum itemValue, int32 *nkeys,
-        bool **nullFlags)</function></term>
+        bool **nullFlags, bool *uniqueAndSorted)</function></term>
      <listitem>
       <para>
        Returns a palloc'd array of keys given an item to be indexed.  The
@@ -177,6 +177,11 @@
        <literal>*nullFlags</literal>, and set these null flags as needed.
        <literal>*nullFlags</literal> can be left <symbol>NULL</symbol> (its initial value)
        if all keys are non-null.
+       If the returned keys do not contain duplicates and are sorted w.r.t. the comparison
+       function of the GIN type's operator class, store <symbol>true</symbol> in
+       <literal>uniqueAndSorted</literal>. <literal>uniqueAndSorted</literal> can be left
+       <symbol>false</symbol> (its initial value) if the keys are either unsorted or contain
+       duplicates. In that case, duplicate removal and sorting is performed by the GIN index.
        The return value can be <symbol>NULL</symbol> if the item contains no keys.
       </para>
      </listitem>
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index 75a18f457bc..22b588483d0 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -464,6 +464,7 @@ ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
 {
 	Datum	   *entries;
 	bool	   *nullFlags;
+	bool		uniqueAndSorted = false;
 	int32		i;
 
 	/*
@@ -483,11 +484,12 @@ ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
 	/* OK, call the opclass's extractValueFn */
 	nullFlags = NULL;			/* in case extractValue doesn't set it */
 	entries = (Datum *)
-		DatumGetPointer(FunctionCall3Coll(&ginstate->extractValueFn[attnum - 1],
+		DatumGetPointer(FunctionCall4Coll(&ginstate->extractValueFn[attnum - 1],
 										  ginstate->supportCollation[attnum - 1],
 										  value,
 										  PointerGetDatum(nentries),
-										  PointerGetDatum(&nullFlags)));
+										  PointerGetDatum(&nullFlags),
+										  PointerGetDatum(&uniqueAndSorted)));
 
 	/*
 	 * Generate a placeholder if the item contained no keys.
@@ -502,13 +504,6 @@ ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
 		return entries;
 	}
 
-	/*
-	 * If the extractValueFn didn't create a nullFlags array, create one,
-	 * assuming that everything's non-null.
-	 */
-	if (nullFlags == NULL)
-		nullFlags = (bool *) palloc0(*nentries * sizeof(bool));
-
 	/*
 	 * If there's more than one key, sort and unique-ify.
 	 *
@@ -516,11 +511,18 @@ ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
 	 * pretty bad too.  For small numbers of keys it'd likely be better to use
 	 * a simple insertion sort.
 	 */
-	if (*nentries > 1)
+	if (*nentries > 1 && !uniqueAndSorted)
 	{
 		keyEntryData *keydata;
 		cmpEntriesArg arg;
 
+		/*
+		 * If the extractValueFn didn't create a nullFlags array, create one,
+		 * assuming that everything's non-null.
+		 */
+		if (nullFlags == NULL)
+			nullFlags = (bool *) palloc0(*nentries * sizeof(bool));
+
 		keydata = palloc_array(keyEntryData, *nentries);
 		for (i = 0; i < *nentries; i++)
 		{
@@ -568,9 +570,13 @@ ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
 	/*
 	 * Create GinNullCategory representation from nullFlags.
 	 */
-	*categories = (GinNullCategory *) palloc0(*nentries * sizeof(GinNullCategory));
-	for (i = 0; i < *nentries; i++)
-		(*categories)[i] = (nullFlags[i] ? GIN_CAT_NULL_KEY : GIN_CAT_NORM_KEY);
+	StaticAssertStmt(GIN_CAT_NORM_KEY == 0, "Assuming GIN_CAT_NORM_KEY is 0");
+	*categories = palloc0_array(GinNullCategory, *nentries);
+
+	if (nullFlags != NULL)
+		for (i = 0; i < *nentries; i++)
+			if (nullFlags[i])
+				(*categories)[i] = GIN_CAT_NULL_KEY;
 
 	return entries;
 }
-- 
2.51.0

