From 1bc7921b4ad51cf23723c2ad7c7f618cde6fcba3 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.vondra@postgresql.org>
Date: Wed, 17 Nov 2021 22:52:31 +0100
Subject: [PATCH 2/2] review

---
 .gitignore                            |  1 +
 src/backend/commands/explain.c        |  5 +++++
 src/backend/executor/nodeMergejoin.c  | 24 +++++++++++++++++++++---
 src/backend/optimizer/path/costsize.c |  8 ++++----
 src/backend/optimizer/path/joinpath.c |  9 +++++++++
 src/backend/optimizer/path/pathkeys.c |  8 ++++++++
 src/backend/utils/adt/rangetypes.c    |  4 ++++
 src/backend/utils/cache/lsyscache.c   |  2 ++
 src/include/nodes/pathnodes.h         |  4 ++--
 9 files changed, 56 insertions(+), 9 deletions(-)

diff --git a/.gitignore b/.gitignore
index 5dca1a39ec..23a555185c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,6 +44,7 @@ lib*.pc
 /tmp_install/
 
 # Thomas
+# XXX Shouldn't be in the patch.
 /data/
 /server/
 .idea/
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 9b7503630c..3610048c90 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1206,6 +1206,11 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			pname = sname = "Nested Loop";
 			break;
 		case T_MergeJoin:
+			/*
+			 * XXX Not sure we want to add another top-level node (Merge vs. Range Merge).
+			 *
+			 * I'd use Merge for both and just differentiate them using Range Cond.
+			 */
 			if(((MergeJoin *) plan)->rangeclause)
 			{
 				pname = "Range Merge";	/* "Join" gets added by jointype switch */
diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c
index 09131479b1..d7f9ba809e 100644
--- a/src/backend/executor/nodeMergejoin.c
+++ b/src/backend/executor/nodeMergejoin.c
@@ -145,7 +145,9 @@ typedef struct MergeJoinClauseData
 }			MergeJoinClauseData;
 
 /*
- * Runtime data for the range clause
+ * Runtime data for the range clause.
+ *
+ * XXX This really needs some comments, explaining what the fields are for.
  */
 typedef struct RangeJoinData
 {
@@ -153,7 +155,6 @@ typedef struct RangeJoinData
 	ExprState *endClause;
 	ExprState *rangeExpr;
 	ExprState *elemExpr;
-
 }			RangeJoinData;
 
 /* Result type for MJEvalOuterValues and MJEvalInnerValues */
@@ -294,10 +295,14 @@ MJCreateRangeData(List *rangeclause,
 {
 	RangeData data;
 
-	Assert(list_length(node->rangeclause) < 3);
+	/* XXX how come this does not crash anything? */
+	Assert(false);
+	// FIXME, there's no 'node' variable
+	// Assert(list_length(node->rangeclause) < 3);
 
 	data = (RangeData) palloc0(sizeof(RangeJoinData));
 
+	/* XXX useless, thanks to the palloc0 */
 	data->startClause = NULL;
 	data->endClause = NULL;
 	data->rangeExpr = NULL;
@@ -518,6 +523,10 @@ MJCompare(MergeJoinState *mergestate)
  * Compare the rangejoinable values of the current two input tuples
  * and return 0 if they are equal (ie, the outer interval contains the inner),
  * >0 if outer > inner, <0 if outer < inner.
+ *
+ * XXX So this is essentially a simple comparator function, except that it
+ * also deals with ranges. That'd deserve explanation how clauses with ranges
+ * works, I guess.
  */
 static int
 MJCompareRange(MergeJoinState *mergestate)
@@ -1028,6 +1037,9 @@ ExecMergeJoin(PlanState *pstate)
 						compareResult = MJCompare(node);
 						MJ_DEBUG_COMPARE(compareResult);
 
+						/*
+						 * XXX Incomprehensible. Maybe add some comments?
+						 */
 						if(compareResult == 0)
 						{
 							if(isRangeJoin)
@@ -1237,6 +1249,9 @@ ExecMergeJoin(PlanState *pstate)
 						/* we need not do MJEvalInnerValues again */
 					}
 
+					/*
+					 * Comments?
+					 */
 					if(isRangeJoin)
 					{
 						compareRangeResult = MJCompareRange(node);
@@ -1348,6 +1363,9 @@ ExecMergeJoin(PlanState *pstate)
 
 				if (compareResult == 0)
 				{
+					/*
+					 * XXX Comments?
+					 */
 					if(isRangeJoin)
 					{
 						compareRangeResult = MJCompareRange(node);
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 041331d6bf..835944661b 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -3436,12 +3436,12 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path,
 				rescannedtuples;
 	double		rescanratio;
 
+	/* XXX ??? */
 	allclauses = list_concat(list_copy(mergeclauses), path->path_rangeclause);
 
-
-    /* Protect some assumptions below that rowcounts aren't zero */
-    if (inner_path_rows <= 0)
-        inner_path_rows = 1;
+	/* Protect some assumptions below that rowcounts aren't zero */
+	if (inner_path_rows <= 0)
+		inner_path_rows = 1;
 
 	/* Mark the path with the correct row estimate */
 	if (path->jpath.path.param_info)
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 4777e0a850..3f93f994a2 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -222,6 +222,7 @@ add_paths_to_joinrel(PlannerInfo *root,
 														  jointype,
 														  &mergejoin_allowed);
 
+		/* XXX Why only for inner/left joins? */
 		if (jointype == JOIN_INNER || jointype == JOIN_LEFT)
 			extra.rangeclause_list = select_rangejoin_clauses(outerrel,
 															  innerrel,
@@ -1238,6 +1239,9 @@ sort_inner_and_outer(PlannerInfo *root,
 	 * The pathkey order returned by select_outer_pathkeys_for_merge() has
 	 * some heuristics behind it (see that function), so be sure to try it
 	 * exactly as-is as well as making variants.
+	 *
+	 * XXX This comment probably needs updating? The patch significantly
+	 * reworks the following code ...
 	 */
 
 	range_pathkeys = select_outer_pathkeys_for_range(root,
@@ -1368,6 +1372,7 @@ sort_inner_and_outer(PlannerInfo *root,
 									   jointype,
 									   extra);
 
+		/* XXX comments? */
 		foreach(l, range_pathkeys)
 		{
 			PathKey		*range_outer_pathkey = (PathKey *) lfirst(l);
@@ -2442,6 +2447,8 @@ select_mergejoin_clauses(PlannerInfo *root,
 
 /*
  * range_clause_order
+ *
+ * XXX And what does this do?
  */
 static int
 range_clause_order(RestrictInfo *first,
@@ -2514,6 +2521,8 @@ range_clause_order(RestrictInfo *first,
 
 /*
  * select_rangejoin_clauses
+ *
+ * XXX And what does this do?
  */
 static List *
 select_rangejoin_clauses(RelOptInfo *outerrel,
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index f82e760658..4d1c539cd1 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -1217,6 +1217,8 @@ initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
 
 /*
  * initialize_rangeclause_eclasses
+ *
+ * XXX And what does this do?
  */
 void
 initialize_rangeclause_eclasses(PlannerInfo *root,
@@ -1404,6 +1406,8 @@ find_mergeclauses_for_outer_pathkeys(PlannerInfo *root,
 
 /*
  * find_rangeclauses_for_outer_pathkeys
+ *
+ * XXX And what does this do?
  */
 List *
 find_rangeclauses_for_outer_pathkeys(PlannerInfo *root,
@@ -1637,6 +1641,8 @@ select_outer_pathkeys_for_merge(PlannerInfo *root,
 
 /*
  * select_outer_pathkeys_for_range
+ *
+ * XXX And what does this do?
  */
 List *
 select_outer_pathkeys_for_range(PlannerInfo *root,
@@ -1767,6 +1773,8 @@ make_inner_pathkeys_for_merge(PlannerInfo *root,
 
 /*
  * make_inner_pathkey_for_range
+ *
+ * XXX And what does this do?
  */
 PathKey *
 make_inner_pathkey_for_range(PlannerInfo *root,
diff --git a/src/backend/utils/adt/rangetypes.c b/src/backend/utils/adt/rangetypes.c
index 3972480519..ad3e2933a7 100644
--- a/src/backend/utils/adt/rangetypes.c
+++ b/src/backend/utils/adt/rangetypes.c
@@ -2509,6 +2509,8 @@ range_contains_elem_internal(TypeCacheEntry *typcache, const RangeType *r, Datum
 
 /*
  * Test whether range r is right of a specific element value.
+ *
+ * XXX naming seems a bit strange, all other functions here start with range_
  */
 bool
 elem_before_range_internal(TypeCacheEntry *typcache, Datum val, const RangeType *r)
@@ -2539,6 +2541,8 @@ elem_before_range_internal(TypeCacheEntry *typcache, Datum val, const RangeType
 
 /*
  * Test whether range r is left of a specific element value.
+ *
+ * XXX naming seems a bit strange, all other functions here start with range_
  */
 bool
 elem_after_range_internal(TypeCacheEntry *typcache, Datum val, const RangeType *r)
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c
index f6cc0cdb8a..136a30ed77 100644
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -391,6 +391,8 @@ get_mergejoin_opfamilies(Oid opno)
 
 /*
  * get_rangejoin_opfamilies
+ *
+ * XXX and what does this do?
  */
 List *
 get_rangejoin_opfamilies(Oid opno)
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index c7d4431f06..66840821fb 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -2103,8 +2103,8 @@ typedef struct RestrictInfo
 
 	/* valid if clause is mergejoinable, else NIL */
 	List	   *mergeopfamilies;	/* opfamilies containing clause operator */
-	List	   *rangeleftopfamilies;
-	List	   *rangerightopfamilies;
+	List	   *rangeleftopfamilies;	/* comment? */
+	List	   *rangerightopfamilies;	/* comment? */
 
 	/* cache space for mergeclause processing; NULL if not yet set */
 	EquivalenceClass *left_ec;	/* EquivalenceClass containing lefthand */
-- 
2.31.1

