[PATCH] Move clause_sides_match_join() into pathnode.h
We had two almost-identical copies of the utility function
clause_sides_match_join() -- one in joinpath.c, and one in
analyzejoins.c. Both copies were marked "inline," so we might as well
just move the (inline) function definition into a common header file.
I chose pathnode.h, because it is already #included by both of the .c
files.
This change doesn't help very much, on its own (since the
almost-duplicate functions are both "inline"), but it allows us to use
the same utility function from other source files, without making a
third copy.
James
Attachments:
0001-Move-clause_sides_match_join-into-pathnode.h.patchapplication/octet-stream; name=0001-Move-clause_sides_match_join-into-pathnode.h.patchDownload
From de406423a8814882dc1f747c3bc0d892ec440c65 Mon Sep 17 00:00:00 2001
From: James Hunter <james.hunter.pg@gmail.com>
Date: Wed, 9 Oct 2024 19:13:55 +0000
Subject: [PATCH] Move clause_sides_match_join() into pathnode.h
We had two almost-identical copies of the utility function
clause_sides_match_join() -- one in joinpath.c, and one in analyzejoins.c.
Both copies were marked "inline," so we might as well just move the
(inline) function definition into a common header file. I chose
pathnode.h, because it is already #included by both of the .c files.
This change doesn't help very much, on its own (since the almost-duplicate
functions are both "inline"), but it allows us to use the same utility
function from other source files, without making a third copy.
---
src/backend/optimizer/path/joinpath.c | 43 ++++-------------------
src/backend/optimizer/plan/analyzejoins.c | 31 ----------------
src/include/optimizer/pathnode.h | 31 ++++++++++++++++
3 files changed, 37 insertions(+), 68 deletions(-)
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index a244300463..f1315606b8 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -58,9 +58,6 @@ static void try_partial_mergejoin_path(PlannerInfo *root,
static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
JoinType jointype, JoinPathExtraData *extra);
-static inline bool clause_sides_match_join(RestrictInfo *rinfo,
- RelOptInfo *outerrel,
- RelOptInfo *innerrel);
static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
JoinType jointype, JoinPathExtraData *extra);
@@ -470,7 +467,8 @@ paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
* with 2 args.
*/
if (!IsA(opexpr, OpExpr) || list_length(opexpr->args) != 2 ||
- !clause_sides_match_join(rinfo, outerrel, innerrel))
+ !clause_sides_match_join(rinfo, outerrel->relids,
+ innerrel->relids))
{
list_free(*operators);
list_free(*param_exprs);
@@ -1320,37 +1318,6 @@ try_partial_hashjoin_path(PlannerInfo *root,
hashclauses));
}
-/*
- * clause_sides_match_join
- * Determine whether a join clause is of the right form to use in this join.
- *
- * We already know that the clause is a binary opclause referencing only the
- * rels in the current join. The point here is to check whether it has the
- * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
- * rather than mixing outer and inner vars on either side. If it matches,
- * we set the transient flag outer_is_left to identify which side is which.
- */
-static inline bool
-clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel,
- RelOptInfo *innerrel)
-{
- if (bms_is_subset(rinfo->left_relids, outerrel->relids) &&
- bms_is_subset(rinfo->right_relids, innerrel->relids))
- {
- /* lefthand side is outer */
- rinfo->outer_is_left = true;
- return true;
- }
- else if (bms_is_subset(rinfo->left_relids, innerrel->relids) &&
- bms_is_subset(rinfo->right_relids, outerrel->relids))
- {
- /* righthand side is outer */
- rinfo->outer_is_left = false;
- return true;
- }
- return false; /* no good for these input relations */
-}
-
/*
* sort_inner_and_outer
* Create mergejoin join paths by explicitly sorting both the outer and
@@ -2264,7 +2231,8 @@ hash_inner_and_outer(PlannerInfo *root,
/*
* Check if clause has the form "outer op inner" or "inner op outer".
*/
- if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
+ if (!clause_sides_match_join(restrictinfo, outerrel->relids,
+ innerrel->relids))
continue; /* no good for these input relations */
/*
@@ -2549,7 +2517,8 @@ select_mergejoin_clauses(PlannerInfo *root,
/*
* Check if clause has the form "outer op inner" or "inner op outer".
*/
- if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
+ if (!clause_sides_match_join(restrictinfo, outerrel->relids,
+ innerrel->relids))
{
have_nonmergeable_joinclause = true;
continue; /* no good for these input relations */
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index 928d926645..5bc16c4bfc 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -115,37 +115,6 @@ restart:
return joinlist;
}
-/*
- * clause_sides_match_join
- * Determine whether a join clause is of the right form to use in this join.
- *
- * We already know that the clause is a binary opclause referencing only the
- * rels in the current join. The point here is to check whether it has the
- * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
- * rather than mixing outer and inner vars on either side. If it matches,
- * we set the transient flag outer_is_left to identify which side is which.
- */
-static inline bool
-clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids,
- Relids innerrelids)
-{
- if (bms_is_subset(rinfo->left_relids, outerrelids) &&
- bms_is_subset(rinfo->right_relids, innerrelids))
- {
- /* lefthand side is outer */
- rinfo->outer_is_left = true;
- return true;
- }
- else if (bms_is_subset(rinfo->left_relids, innerrelids) &&
- bms_is_subset(rinfo->right_relids, outerrelids))
- {
- /* righthand side is outer */
- rinfo->outer_is_left = false;
- return true;
- }
- return false; /* no good for these input relations */
-}
-
/*
* join_is_removable
* Check whether we need not perform this special join at all, because
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 1035e6560c..ccedda5103 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -18,6 +18,37 @@
#include "nodes/pathnodes.h"
+/*
+ * clause_sides_match_join
+ * Determine whether a join clause is of the right form to use in this join.
+ *
+ * We already know that the clause is a binary opclause referencing only the
+ * rels in the current join. The point here is to check whether it has the
+ * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
+ * rather than mixing outer and inner vars on either side. If it matches,
+ * we set the transient flag outer_is_left to identify which side is which.
+ */
+static inline bool
+clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids,
+ Relids innerrelids)
+{
+ if (bms_is_subset(rinfo->left_relids, outerrelids) &&
+ bms_is_subset(rinfo->right_relids, innerrelids))
+ {
+ /* lefthand side is outer */
+ rinfo->outer_is_left = true;
+ return true;
+ }
+ else if (bms_is_subset(rinfo->left_relids, innerrelids) &&
+ bms_is_subset(rinfo->right_relids, outerrelids))
+ {
+ /* righthand side is outer */
+ rinfo->outer_is_left = false;
+ return true;
+ }
+ return false; /* no good for these input relations */
+}
+
/*
* prototypes for pathnode.c
*/
--
2.40.1
On Thu, 10 Oct 2024 at 08:38, James Hunter <james.hunter.pg@gmail.com> wrote:
We had two almost-identical copies of the utility function
clause_sides_match_join() -- one in joinpath.c, and one in
analyzejoins.c. Both copies were marked "inline," so we might as well
just move the (inline) function definition into a common header file.
I chose pathnode.h, because it is already #included by both of the .c
files.
I'm in favour of the deduplication. pathnode.h seems like a strange
choice. restrictinfo.h seems more suited.
(I really wonder how much the inlining is giving us given that the
function itself calls other non-inlineable functions)
David
On Wed, Oct 9, 2024 at 5:26 PM David Rowley <dgrowleyml@gmail.com> wrote:
On Thu, 10 Oct 2024 at 08:38, James Hunter <james.hunter.pg@gmail.com> wrote:
We had two almost-identical copies of the utility function
clause_sides_match_join() -- one in joinpath.c, and one in
analyzejoins.c. Both copies were marked "inline," so we might as well
just move the (inline) function definition into a common header file.
I chose pathnode.h, because it is already #included by both of the .c
files.I'm in favour of the deduplication. pathnode.h seems like a strange
choice. restrictinfo.h seems more suited.
Moved into restrictinfo.h, instead, in next revision. (The only reason
I initially chose pathnode.h is because it was already #included by
the two .c files; I agree, after glancing over restrictinfo.h, that it
seems like a better fit.)
(I really wonder how much the inlining is giving us given that the
function itself calls other non-inlineable functions)
I wondered the same! But, at least the branch can be inlined?
Thanks for looking over the patch,
James
Attachments:
0001-Move-clause_sides_match_join-into-restrictinfo.h.patchapplication/octet-stream; name=0001-Move-clause_sides_match_join-into-restrictinfo.h.patchDownload
From b6b8212a85d2b446dc6fc788c18f7ea3bac425f2 Mon Sep 17 00:00:00 2001
From: James Hunter <james.hunter.pg@gmail.com>
Date: Wed, 9 Oct 2024 19:13:55 +0000
Subject: [PATCH] Move clause_sides_match_join() into restrictinfo.h
We had two almost-identical copies of the utility function
clause_sides_match_join() -- one in joinpath.c, and one in analyzejoins.c.
Both copies were marked "inline," so we might as well just move the
(inline) function definition into a common header file, restrictinfo.h.
This change doesn't help very much, on its own (since the almost-duplicate
functions are both "inline"), but it allows us to use the same utility
function from other source files, without making a third copy.
---
src/backend/optimizer/path/joinpath.c | 44 ++++-------------------
src/backend/optimizer/plan/analyzejoins.c | 31 ----------------
src/include/optimizer/restrictinfo.h | 31 ++++++++++++++++
3 files changed, 38 insertions(+), 68 deletions(-)
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index a244300463..3971138480 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -25,6 +25,7 @@
#include "optimizer/paths.h"
#include "optimizer/placeholder.h"
#include "optimizer/planmain.h"
+#include "optimizer/restrictinfo.h"
#include "utils/lsyscache.h"
#include "utils/typcache.h"
@@ -58,9 +59,6 @@ static void try_partial_mergejoin_path(PlannerInfo *root,
static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
JoinType jointype, JoinPathExtraData *extra);
-static inline bool clause_sides_match_join(RestrictInfo *rinfo,
- RelOptInfo *outerrel,
- RelOptInfo *innerrel);
static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
JoinType jointype, JoinPathExtraData *extra);
@@ -470,7 +468,8 @@ paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
* with 2 args.
*/
if (!IsA(opexpr, OpExpr) || list_length(opexpr->args) != 2 ||
- !clause_sides_match_join(rinfo, outerrel, innerrel))
+ !clause_sides_match_join(rinfo, outerrel->relids,
+ innerrel->relids))
{
list_free(*operators);
list_free(*param_exprs);
@@ -1320,37 +1319,6 @@ try_partial_hashjoin_path(PlannerInfo *root,
hashclauses));
}
-/*
- * clause_sides_match_join
- * Determine whether a join clause is of the right form to use in this join.
- *
- * We already know that the clause is a binary opclause referencing only the
- * rels in the current join. The point here is to check whether it has the
- * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
- * rather than mixing outer and inner vars on either side. If it matches,
- * we set the transient flag outer_is_left to identify which side is which.
- */
-static inline bool
-clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel,
- RelOptInfo *innerrel)
-{
- if (bms_is_subset(rinfo->left_relids, outerrel->relids) &&
- bms_is_subset(rinfo->right_relids, innerrel->relids))
- {
- /* lefthand side is outer */
- rinfo->outer_is_left = true;
- return true;
- }
- else if (bms_is_subset(rinfo->left_relids, innerrel->relids) &&
- bms_is_subset(rinfo->right_relids, outerrel->relids))
- {
- /* righthand side is outer */
- rinfo->outer_is_left = false;
- return true;
- }
- return false; /* no good for these input relations */
-}
-
/*
* sort_inner_and_outer
* Create mergejoin join paths by explicitly sorting both the outer and
@@ -2264,7 +2232,8 @@ hash_inner_and_outer(PlannerInfo *root,
/*
* Check if clause has the form "outer op inner" or "inner op outer".
*/
- if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
+ if (!clause_sides_match_join(restrictinfo, outerrel->relids,
+ innerrel->relids))
continue; /* no good for these input relations */
/*
@@ -2549,7 +2518,8 @@ select_mergejoin_clauses(PlannerInfo *root,
/*
* Check if clause has the form "outer op inner" or "inner op outer".
*/
- if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
+ if (!clause_sides_match_join(restrictinfo, outerrel->relids,
+ innerrel->relids))
{
have_nonmergeable_joinclause = true;
continue; /* no good for these input relations */
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index 928d926645..5bc16c4bfc 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -115,37 +115,6 @@ restart:
return joinlist;
}
-/*
- * clause_sides_match_join
- * Determine whether a join clause is of the right form to use in this join.
- *
- * We already know that the clause is a binary opclause referencing only the
- * rels in the current join. The point here is to check whether it has the
- * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
- * rather than mixing outer and inner vars on either side. If it matches,
- * we set the transient flag outer_is_left to identify which side is which.
- */
-static inline bool
-clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids,
- Relids innerrelids)
-{
- if (bms_is_subset(rinfo->left_relids, outerrelids) &&
- bms_is_subset(rinfo->right_relids, innerrelids))
- {
- /* lefthand side is outer */
- rinfo->outer_is_left = true;
- return true;
- }
- else if (bms_is_subset(rinfo->left_relids, innerrelids) &&
- bms_is_subset(rinfo->right_relids, outerrelids))
- {
- /* righthand side is outer */
- rinfo->outer_is_left = false;
- return true;
- }
- return false; /* no good for these input relations */
-}
-
/*
* join_is_removable
* Check whether we need not perform this special join at all, because
diff --git a/src/include/optimizer/restrictinfo.h b/src/include/optimizer/restrictinfo.h
index 1b42c832c5..440ab040a9 100644
--- a/src/include/optimizer/restrictinfo.h
+++ b/src/include/optimizer/restrictinfo.h
@@ -22,6 +22,37 @@
make_restrictinfo(root, clause, true, false, false, false, 0, \
NULL, NULL, NULL)
+/*
+ * clause_sides_match_join
+ * Determine whether a join clause is of the right form to use in this join.
+ *
+ * We already know that the clause is a binary opclause referencing only the
+ * rels in the current join. The point here is to check whether it has the
+ * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
+ * rather than mixing outer and inner vars on either side. If it matches,
+ * we set the transient flag outer_is_left to identify which side is which.
+ */
+static inline bool
+clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids,
+ Relids innerrelids)
+{
+ if (bms_is_subset(rinfo->left_relids, outerrelids) &&
+ bms_is_subset(rinfo->right_relids, innerrelids))
+ {
+ /* lefthand side is outer */
+ rinfo->outer_is_left = true;
+ return true;
+ }
+ else if (bms_is_subset(rinfo->left_relids, innerrelids) &&
+ bms_is_subset(rinfo->right_relids, outerrelids))
+ {
+ /* righthand side is outer */
+ rinfo->outer_is_left = false;
+ return true;
+ }
+ return false; /* no good for these input relations */
+}
+
extern RestrictInfo *make_restrictinfo(PlannerInfo *root,
Expr *clause,
bool is_pushed_down,
--
2.40.1
On Fri, 11 Oct 2024 at 04:25, James Hunter <james.hunter.pg@gmail.com> wrote:
Moved into restrictinfo.h, instead, in next revision.
Thanks. Pushed.
(I really wonder how much the inlining is giving us given that the
function itself calls other non-inlineable functions)I wondered the same! But, at least the branch can be inlined?
Do you mean function call? As far as I see it, that's the only
advantage. There are no branches that can be eliminated from
constant-folding.
I imagine the original version was only inlined to help coax the
compiler into inlining, which I imagine it would have done anyway if
there was just a single usage of the static function. Probably the
analyzejoins.c version just copied the original joinpath.c version. I
didn't check the history to know if that's actually true.
David