From 0aa1ff24e58234d759c07f4eeec163a82244be25 Mon Sep 17 00:00:00 2001
From: Ronan Dunklau <ronan.dunklau@aiven.io>
Date: Wed, 6 Jul 2022 17:29:01 +0200
Subject: [PATCH v1] Fix gin costing.

GIN index scans were not taking any descent CPU-based cost into account. That made
them look cheaper than other types of indexes when they shouldn't be.

We use the same heuristic as for btree indexes, but multiplying it by
the number of searched entries.

Per report of Hung Nguyen.
---
 src/backend/utils/adt/selfuncs.c | 36 +++++++++++++++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index fa1f589fad..21407c3d38 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -7425,6 +7425,7 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
 				qual_arg_cost,
 				spc_random_page_cost,
 				outer_scans;
+	Cost		descentCost;
 	Relation	indexRel;
 	GinStatsData ginStats;
 	ListCell   *lc;
@@ -7524,6 +7525,18 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
 							  &spc_random_page_cost,
 							  NULL);
 
+
+	/*
+	 * We model index descent costs similarly to those for btree, but we also
+	 * need an idea of the tree_height.
+	 * We use numEntries / numEntryPages as the fanout factor.
+	 */
+	if (index->tree_height < 0)
+	{
+		index->tree_height = ceil(log(numEntries) / log(numEntries / numEntryPages));
+	}
+
+
 	/*
 	 * Generic assumption about index correlation: there isn't any.
 	 */
@@ -7649,6 +7662,27 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
 	 */
 	dataPagesFetched = ceil(numDataPages * partialScale);
 
+
+	*indexStartupCost = 0;
+
+	/*
+	 * Add a CPU-cost component similar to btree to represent the costs of the
+	 * initial descent.
+	 * We charge descentCost once for every entry
+	 */
+	if (numTuples > 1)
+	{
+		descentCost = ceil(log(numTuples) / log(2.0)) * cpu_operator_cost;
+		*indexStartupCost += descentCost * counts.searchEntries;
+	}
+
+	/*
+	 * Add a similar per-page charge, depending on the tree heights.
+	 */
+	descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
+	*indexStartupCost += descentCost * counts.searchEntries;
+
+
 	/*
 	 * Calculate cache effects if more than one scan due to nestloops or array
 	 * quals.  The result is pro-rated per nestloop scan, but the array qual
@@ -7672,7 +7706,7 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
 	 * Here we use random page cost because logically-close pages could be far
 	 * apart on disk.
 	 */
-	*indexStartupCost = (entryPagesFetched + dataPagesFetched) * spc_random_page_cost;
+	*indexStartupCost += (entryPagesFetched + dataPagesFetched) * spc_random_page_cost;
 
 	/*
 	 * Now compute the number of data pages fetched during the scan.
-- 
2.37.0

