From 95db6c962ff7ab29987379748d6c61c6aa799d7c Mon Sep 17 00:00:00 2001
From: "yizhi.fzh" <yizhi.fzh@alibaba-inc.com>
Date: Tue, 2 Apr 2024 16:01:05 +0800
Subject: [PATCH v1 8/8] a branch of updates around JoinPairInfo

1. rename rels to relids while the "rels" may reference to list of
RelOptInfo or Relids. but the later one reference to Relids all the
time.

2. Store RestrictInfo to JoinPairInfo.clauses so that we can reuse
the left_relids, right_relids which will save us from calling
pull_varnos.

3. create bms_nth_member function in bitmapset.c and use it
extract_relation_info, the function name is self-documented.

4. pfree the JoinPairInfo array when we are done with that.
---
 src/backend/nodes/bitmapset.c           | 18 ++++++++++++
 src/backend/statistics/extended_stats.c | 37 ++++++++++++-------------
 src/backend/statistics/mcv.c            | 34 ++++++-----------------
 src/include/nodes/bitmapset.h           |  1 +
 4 files changed, 44 insertions(+), 46 deletions(-)

diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c
index cd05c642b0..7c1291ae64 100644
--- a/src/backend/nodes/bitmapset.c
+++ b/src/backend/nodes/bitmapset.c
@@ -772,6 +772,24 @@ bms_num_members(const Bitmapset *a)
 	return result;
 }
 
+/*
+ * bms_nth_member - return the nth member, index starts with 0.
+ */
+int
+bms_nth_member(const Bitmapset *a, int i)
+{
+	int idx, res = -1;
+
+	for (idx = 0; idx <= i; idx++)
+	{
+		res = bms_next_member(a, res);
+
+		if (res < 0)
+			elog(ERROR, "no enough members for %d", i);
+	}
+	return res;
+}
+
 /*
  * bms_membership - does a set have zero, one, or multiple members?
  *
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index 109fe5a04a..8d17fd91f0 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -2965,11 +2965,11 @@ statext_try_join_estimates(PlannerInfo *root, List *clauses, int varRelid,
 }
 
 /*
- * Information about two joined relations, along with the join clauses between.
+ * Information about two joined relations, group by clauses by relids.
  */
 typedef struct JoinPairInfo
 {
-	Bitmapset  *rels;
+	Bitmapset  *relids;
 	List	   *clauses;
 } JoinPairInfo;
 
@@ -3032,9 +3032,9 @@ statext_build_join_pairs(PlannerInfo *root, List *clauses,
 		found = false;
 		for (i = 0; i < cnt; i++)
 		{
-			if (bms_is_subset(rinfo->clause_relids, info[i].rels))
+			if (bms_is_subset(rinfo->clause_relids, info[i].relids))
 			{
-				info[i].clauses = lappend(info[i].clauses, clause);
+				info[i].clauses = lappend(info[i].clauses, rinfo);
 				found = true;
 				break;
 			}
@@ -3042,14 +3042,17 @@ statext_build_join_pairs(PlannerInfo *root, List *clauses,
 
 		if (!found)
 		{
-			info[cnt].rels = rinfo->clause_relids;
-			info[cnt].clauses = lappend(info[cnt].clauses, clause);
+			info[cnt].relids = rinfo->clause_relids;
+			info[cnt].clauses = lappend(info[cnt].clauses, rinfo);
 			cnt++;
 		}
 	}
 
 	if (cnt == 0)
+	{
+		pfree(info);
 		return NULL;
+	}
 
 	*npairs = cnt;
 	return info;
@@ -3069,7 +3072,6 @@ static RelOptInfo *
 extract_relation_info(PlannerInfo *root, JoinPairInfo *info, int index,
 					  StatisticExtInfo **stat)
 {
-	int			k;
 	int			relid;
 	RelOptInfo *rel;
 	ListCell   *lc;
@@ -3079,16 +3081,7 @@ extract_relation_info(PlannerInfo *root, JoinPairInfo *info, int index,
 
 	Assert((index >= 0) && (index <= 1));
 
-	k = -1;
-	while (index >= 0)
-	{
-		k = bms_next_member(info->rels, k);
-		if (k < 0)
-			elog(ERROR, "failed to extract relid");
-
-		relid = k;
-		index--;
-	}
+	relid = bms_nth_member(info->relids, index);
 
 	rel = find_base_rel(root, relid);
 
@@ -3100,7 +3093,8 @@ extract_relation_info(PlannerInfo *root, JoinPairInfo *info, int index,
 	foreach (lc, info->clauses)
 	{
 		ListCell *lc2;
-		Node *clause = (Node *) lfirst(lc);
+		RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+		Node *clause = (Node *) rinfo->clause;
 		OpExpr *opclause = (OpExpr *) clause;
 
 		/* only opclauses supported for now */
@@ -3140,7 +3134,8 @@ extract_relation_info(PlannerInfo *root, JoinPairInfo *info, int index,
 			 * compatible because we already checked it when building the
 			 * join pairs.
 			 */
-			varnos = pull_varnos(root, arg);
+			varnos = list_cell_number(opclause->args, lc2) == 0 ?
+				rinfo->left_relids : rinfo->right_relids;
 
 			if (relid == bms_singleton_member(varnos))
 				exprs = lappend(exprs, arg);
@@ -3381,7 +3376,8 @@ statext_clauselist_join_selectivity(PlannerInfo *root, List *clauses,
 		 */
 		foreach (lc, info->clauses)
 		{
-			Node *clause = (Node *) lfirst(lc);
+			RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+			Node *clause = (Node *) rinfo->clause;
 			ListCell *lc2;
 
 			listidx = -1;
@@ -3403,5 +3399,6 @@ statext_clauselist_join_selectivity(PlannerInfo *root, List *clauses,
 		}
 	}
 
+	pfree(info);
 	return s;
 }
diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c
index 49299ed907..53b481a291 100644
--- a/src/backend/statistics/mcv.c
+++ b/src/backend/statistics/mcv.c
@@ -2214,8 +2214,7 @@ mcv_combine_extended(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
 
 	MCVList    *mcv1,
 			   *mcv2;
-	int			idx,
-				i,
+	int			i,
 				j;
 	Selectivity s = 0;
 
@@ -2306,25 +2305,14 @@ mcv_combine_extended(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
 	indexes2 = (int *) palloc(sizeof(int) * list_length(clauses));
 	reverse = (bool *) palloc(sizeof(bool) * list_length(clauses));
 
-	idx = 0;
 	foreach (lc, clauses)
 	{
-		Node	   *clause = (Node *) lfirst(lc);
+		RestrictInfo	*rinfo = (RestrictInfo *) lfirst(lc);
+		Node	   *clause = (Node *) rinfo->clause;
 		OpExpr	   *opexpr;
 		Node	   *expr1,
 				   *expr2;
-		Bitmapset  *relids1,
-				   *relids2;
-
-		/*
-		 * Strip the RestrictInfo node, get the actual clause.
-		 *
-		 * XXX Not sure if we need to care about removing other node types
-		 * too (e.g. RelabelType etc.). statext_is_supported_join_clause
-		 * matches this, but maybe we need to relax it?
-		 */
-		if (IsA(clause, RestrictInfo))
-			clause = (Node *) ((RestrictInfo *) clause)->clause;
+		int		idx = list_cell_number(clauses, lc);
 
 		opexpr = (OpExpr *) clause;
 
@@ -2338,12 +2326,8 @@ mcv_combine_extended(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
 		expr1 = linitial(opexpr->args);
 		expr2 = lsecond(opexpr->args);
 
-		/* determine order of clauses (rel1 op rel2) or (rel2 op rel1) */
-		relids1 = pull_varnos(root, expr1);
-		relids2 = pull_varnos(root, expr2);
-
-		if ((bms_singleton_member(relids1) == rel1->relid) &&
-			(bms_singleton_member(relids2) == rel2->relid))
+		if ((bms_singleton_member(rinfo->left_relids) == rel1->relid) &&
+			(bms_singleton_member(rinfo->right_relids) == rel2->relid))
 		{
 			Oid		collid;
 
@@ -2358,8 +2342,8 @@ mcv_combine_extended(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
 			exprs1 = lappend(exprs1, expr1);
 			exprs2 = lappend(exprs2, expr2);
 		}
-		else if ((bms_singleton_member(relids2) == rel1->relid) &&
-				 (bms_singleton_member(relids1) == rel2->relid))
+		else if ((bms_singleton_member(rinfo->right_relids) == rel1->relid) &&
+				 (bms_singleton_member(rinfo->left_relids) == rel2->relid))
 		{
 			Oid		collid;
 
@@ -2383,8 +2367,6 @@ mcv_combine_extended(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
 
 		Assert((indexes2[idx] >= 0) &&
 			   (indexes2[idx] < bms_num_members(stat2->keys) + list_length(stat2->exprs)));
-
-		idx++;
 	}
 
 	/*
diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h
index 283bea5ea9..8d32e7a244 100644
--- a/src/include/nodes/bitmapset.h
+++ b/src/include/nodes/bitmapset.h
@@ -110,6 +110,7 @@ extern bool bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b);
 extern int	bms_singleton_member(const Bitmapset *a);
 extern bool bms_get_singleton_member(const Bitmapset *a, int *member);
 extern int	bms_num_members(const Bitmapset *a);
+extern int  bms_nth_member(const Bitmapset *a, int i);
 
 /* optimized tests when we don't need to know exact membership count: */
 extern BMS_Membership bms_membership(const Bitmapset *a);
-- 
2.34.1

