Inconsistency between try_mergejoin_path and create_mergejoin_plan

Started by Alexander Pyhalovover 1 year ago11 messages
#1Alexander Pyhalov
a.pyhalov@postgrespro.ru
1 attachment(s)

Hi.

There's the following inconsistency between try_mergejoin_path() and
create_mergejoin_plan().
When clause operator has no commutator, we can end up with mergejoin
path.
Later create_mergejoin_plan() will call get_switched_clauses(). This
function can error out with

ERROR: could not find commutator for operator XXX

The similar behavior seems to be present also for hash join.

Attaching a test case (in patch) and a possible fix.

--
Best regards,
Alexander Pyhalov,
Postgres Professional

Attachments:

0001-Merge-and-hash-joins-need-more-checks-with-custom-op.patchtext/x-diff; name=0001-Merge-and-hash-joins-need-more-checks-with-custom-op.patchDownload
From ea9497c9f62b3613482d5b267c7907070ba8fcd4 Mon Sep 17 00:00:00 2001
From: Alexander Pyhalov <a.pyhalov@postgrespro.ru>
Date: Mon, 17 Jun 2024 10:33:10 +0300
Subject: [PATCH] Merge and hash joins need more checks with custom operators

---
 src/backend/optimizer/path/joinpath.c    | 46 +++++++++++++++++++++++-
 src/test/regress/expected/equivclass.out | 45 +++++++++++++++++++++++
 src/test/regress/sql/equivclass.sql      | 20 +++++++++++
 3 files changed, 110 insertions(+), 1 deletion(-)

diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 5be8da9e095..dae06117569 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -95,7 +95,7 @@ static void generate_mergejoin_paths(PlannerInfo *root,
 									 Path *inner_cheapest_total,
 									 List *merge_pathkeys,
 									 bool is_partial);
-
+static bool all_clauses_compatible(Relids outerrelids, List *clauses);
 
 /*
  * add_paths_to_joinrel
@@ -972,6 +972,10 @@ try_mergejoin_path(PlannerInfo *root,
 		return;
 	}
 
+	/* Sometimes can't create merjejoin plan if we have non-commutable clauses */
+	if (!all_clauses_compatible(outer_path->parent->relids, mergeclauses))
+		return;
+
 	/*
 	 * If the given paths are already well enough ordered, we can skip doing
 	 * an explicit sort.
@@ -1036,6 +1040,10 @@ try_partial_mergejoin_path(PlannerInfo *root,
 {
 	JoinCostWorkspace workspace;
 
+	/* Sometimes can't create merjejoin plan if we have non-commutable clauses */
+	if (!all_clauses_compatible(outer_path->parent->relids, mergeclauses))
+		return;
+
 	/*
 	 * See comments in try_partial_hashjoin_path().
 	 */
@@ -1129,6 +1137,10 @@ try_hashjoin_path(PlannerInfo *root,
 		return;
 	}
 
+	/* Sometimes can't create hashjoin plan if we have non-commutable clauses */
+	if (!all_clauses_compatible(outer_path->parent->relids, hashclauses))
+		return;
+
 	/*
 	 * See comments in try_nestloop_path().  Also note that hashjoin paths
 	 * never have any output pathkeys, per comments in create_hashjoin_path.
@@ -2431,3 +2443,35 @@ select_mergejoin_clauses(PlannerInfo *root,
 
 	return result_list;
 }
+
+/*
+ * all_clauses_compatible
+ *
+ * Determine that all clauses' operators have commutator
+ * when necessary.  While constructing merge join plan or
+ * hash join plan, optimizer can call get_switched_clauses(),
+ * which can error out if clauses are not commutable.
+ * The logic here should match one in get_switched_clauses().
+ */
+static bool
+all_clauses_compatible(Relids outer_relids, List *clauses)
+{
+	ListCell   *l;
+
+	foreach(l, clauses)
+	{
+		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
+		OpExpr	   *clause = (OpExpr *) restrictinfo->clause;
+		Oid			opoid;
+
+		if (bms_is_subset(restrictinfo->right_relids, outer_relids))
+		{
+			opoid = get_commutator(clause->opno);
+
+			if (!OidIsValid(opoid))
+				return false;
+		}
+	}
+
+	return true;
+}
diff --git a/src/test/regress/expected/equivclass.out b/src/test/regress/expected/equivclass.out
index 126f7047fed..c8c85fd6c2d 100644
--- a/src/test/regress/expected/equivclass.out
+++ b/src/test/regress/expected/equivclass.out
@@ -323,6 +323,51 @@ explain (costs off)
                            Index Cond: (ff = '42'::bigint)
 (19 rows)
 
+-- merge join should check if conditions are commutable
+explain (costs off)
+with ss1 as materialized (
+	select ff + 1 as x from
+       (select ff + 2 as ff from ec1
+        union all
+        select ff + 3 as ff from ec1) ss0
+     union all
+     select ff + 4 as x from ec1
+)
+  select * from ec1,
+    (select ff + 1 as x from
+       (select ff + 2 as ff from ec1
+        union all
+        select ff + 3 as ff from ec1) ss0
+     union all
+     select ff + 4 as x from ec1) as ss2,
+  ss1
+  where ss1.x = ec1.f1 and ss1.x = ss2.x and ec1.ff = 42::int8;
+                        QUERY PLAN                         
+-----------------------------------------------------------
+ Merge Join
+   Merge Cond: ((((ec1_1.ff + 2) + 1)) = ss1.x)
+   CTE ss1
+     ->  Append
+           ->  Seq Scan on ec1 ec1_4
+           ->  Seq Scan on ec1 ec1_5
+           ->  Seq Scan on ec1 ec1_6
+   ->  Merge Append
+         Sort Key: (((ec1_1.ff + 2) + 1))
+         ->  Index Scan using ec1_expr2 on ec1 ec1_1
+         ->  Index Scan using ec1_expr3 on ec1 ec1_2
+         ->  Index Scan using ec1_expr4 on ec1 ec1_3
+   ->  Materialize
+         ->  Merge Join
+               Merge Cond: (ss1.x = ec1.f1)
+               ->  Sort
+                     Sort Key: ss1.x
+                     ->  CTE Scan on ss1
+               ->  Sort
+                     Sort Key: ec1.f1 USING <
+                     ->  Index Scan using ec1_pkey on ec1
+                           Index Cond: (ff = '42'::bigint)
+(22 rows)
+
 -- check partially indexed scan
 set enable_nestloop = on;
 set enable_mergejoin = off;
diff --git a/src/test/regress/sql/equivclass.sql b/src/test/regress/sql/equivclass.sql
index 247b0a31055..5f74987dbdc 100644
--- a/src/test/regress/sql/equivclass.sql
+++ b/src/test/regress/sql/equivclass.sql
@@ -193,6 +193,26 @@ explain (costs off)
      select ff + 4 as x from ec1) as ss2
   where ss1.x = ec1.f1 and ss1.x = ss2.x and ec1.ff = 42::int8;
 
+-- merge join should check if conditions are commutable
+explain (costs off)
+with ss1 as materialized (
+	select ff + 1 as x from
+       (select ff + 2 as ff from ec1
+        union all
+        select ff + 3 as ff from ec1) ss0
+     union all
+     select ff + 4 as x from ec1
+)
+  select * from ec1,
+    (select ff + 1 as x from
+       (select ff + 2 as ff from ec1
+        union all
+        select ff + 3 as ff from ec1) ss0
+     union all
+     select ff + 4 as x from ec1) as ss2,
+  ss1
+  where ss1.x = ec1.f1 and ss1.x = ss2.x and ec1.ff = 42::int8;
+
 -- check partially indexed scan
 set enable_nestloop = on;
 set enable_mergejoin = off;
-- 
2.34.1

#2Richard Guo
guofenglinux@gmail.com
In reply to: Alexander Pyhalov (#1)
Re: Inconsistency between try_mergejoin_path and create_mergejoin_plan

On Mon, Jun 17, 2024 at 10:51 PM Alexander Pyhalov
<a.pyhalov@postgrespro.ru> wrote:

There's the following inconsistency between try_mergejoin_path() and
create_mergejoin_plan().
When clause operator has no commutator, we can end up with mergejoin
path.
Later create_mergejoin_plan() will call get_switched_clauses(). This
function can error out with

ERROR: could not find commutator for operator XXX

Interesting. This error can be reproduced with table 'ec1' from
sql/equivclass.sql.

set enable_indexscan to off;

explain select * from ec1 t1 join ec1 t2 on t2.ff = t1.f1;
ERROR: could not find commutator for operator 30450

The column ec1.f1 has a type of 'int8alias1', a new data type created in
this test file. Additionally, there is also a newly created operator
'int8 = int8alias1' which is mergejoinable but lacks a valid commutator.
Therefore, there is no problem generating the mergejoin path, but when
we create the mergejoin plan, get_switched_clauses would notice the
absence of a valid commutator needed to commute the clause.

It seems to me that the new operator is somewhat artificial, since it is
designed to support a mergejoin but lacks a valid commutator. So before
we proceed to discuss the fix, I'd like to know whether this is a valid
issue that needs fixing.

Any thoughts?

Thanks
Richard

#3Tom Lane
tgl@sss.pgh.pa.us
In reply to: Richard Guo (#2)
Re: Inconsistency between try_mergejoin_path and create_mergejoin_plan

Richard Guo <guofenglinux@gmail.com> writes:

On Mon, Jun 17, 2024 at 10:51 PM Alexander Pyhalov
<a.pyhalov@postgrespro.ru> wrote:

ERROR: could not find commutator for operator XXX

It seems to me that the new operator is somewhat artificial, since it is
designed to support a mergejoin but lacks a valid commutator. So before
we proceed to discuss the fix, I'd like to know whether this is a valid
issue that needs fixing.

Well, there's no doubt that the case is artificial: nobody who knew
what they were doing would install an incomplete opclass like this
in a production setting. However, there are several parts of the
planner that take pains to avoid this type of failure. I am pretty
sure that we are careful about flipping around candidate indexscan
quals for instance. And the "broken equivalence class" mechanism
is all about that, which is what equivclass.sql is setting up this
opclass to test. So I find it a bit sad if mergejoin creation is
tripping over this case.

I do not think we should add a great deal of complexity or extra
planner cycles to deal with this; but if it can be fixed at low
cost, we should.

regards, tom lane

#4Richard Guo
guofenglinux@gmail.com
In reply to: Tom Lane (#3)
1 attachment(s)
Re: Inconsistency between try_mergejoin_path and create_mergejoin_plan

On Wed, Jun 19, 2024 at 12:49 PM Tom Lane <tgl@sss.pgh.pa.us> wrote:

Richard Guo <guofenglinux@gmail.com> writes:

It seems to me that the new operator is somewhat artificial, since it is
designed to support a mergejoin but lacks a valid commutator. So before
we proceed to discuss the fix, I'd like to know whether this is a valid
issue that needs fixing.

I do not think we should add a great deal of complexity or extra
planner cycles to deal with this; but if it can be fixed at low
cost, we should.

I think we can simply verify the validity of commutators for clauses in
the form "inner op outer" when selecting mergejoin/hash clauses. If a
clause lacks a commutator, we should consider it unusable for the given
pair of outer and inner rels. Please see the attached patch.

Thanks
Richard

Attachments:

v2-0001-Check-the-validity-of-commutators-for-merge-hash-clauses.patchapplication/octet-stream; name=v2-0001-Check-the-validity-of-commutators-for-merge-hash-clauses.patchDownload
From 45e2aa24c3b5d550e8be6ace105f653f6d52e826 Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Wed, 19 Jun 2024 19:57:40 +0900
Subject: [PATCH v2] Check the validity of commutators for merge/hash clauses

When creating merge or hash join plans in createplan.c, the merge or
hash clauses may need to get commuted to ensure that the outer var is on
the left and the inner var is on the right if they are not already in
the expected form.  This requires that their operators have commutators.
Failing to find a commutator at this stage would result in 'ERROR: could
not find commutator for operator xxx', with no opportunity to select an
alternative plan.

Typically, this is not an issue because mergejoinable or hashable
operators are expected to always have valid commutators.  But in some
artificial cases this assumption may not hold true.  So this patch adds
checks to verify the validity of commutators for clauses in the form
"inner op outer" when selecting mergejoin/hash clauses.
---
 src/backend/optimizer/path/joinpath.c | 27 ++++++++++
 src/test/regress/expected/join.out    | 71 +++++++++++++++++++++++++++
 src/test/regress/sql/join.sql         | 58 ++++++++++++++++++++++
 3 files changed, 156 insertions(+)

diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 5be8da9e09..bef1944196 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -24,6 +24,7 @@
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "optimizer/planmain.h"
+#include "utils/lsyscache.h"
 #include "utils/typcache.h"
 
 /* Hook for plugins to get control in add_paths_to_joinrel() */
@@ -2131,6 +2132,17 @@ hash_inner_and_outer(PlannerInfo *root,
 		if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
 			continue;			/* no good for these input relations */
 
+		/*
+		 * If clause has the form "inner op outer", check if its operator has
+		 * valid commutator.  This is necessary because hashclauses will get
+		 * commuted if needed in createplan.c to put the outer var on the left
+		 * (see get_switched_clauses).  This probably shouldn't ever fail,
+		 * since hashable operators ought to have commutators, but be paranoid.
+		 */
+		if (!restrictinfo->outer_is_left &&
+			!OidIsValid(get_commutator(((OpExpr *) restrictinfo->clause)->opno)))
+			continue;
+
 		hashclauses = lappend(hashclauses, restrictinfo);
 	}
 
@@ -2394,6 +2406,21 @@ select_mergejoin_clauses(PlannerInfo *root,
 			continue;			/* no good for these input relations */
 		}
 
+		/*
+		 * If clause has the form "inner op outer", check if its operator has
+		 * valid commutator.  This is necessary because mergejoin clauses will
+		 * get commuted if needed in createplan.c to put the outer var on the
+		 * left (see get_switched_clauses).  This probably shouldn't ever fail,
+		 * since mergejoinable operators ought to have commutators, but be
+		 * paranoid.
+		 */
+		if (!restrictinfo->outer_is_left &&
+			!OidIsValid(get_commutator(((OpExpr *) restrictinfo->clause)->opno)))
+		{
+			have_nonmergeable_joinclause = true;
+			continue;
+		}
+
 		/*
 		 * Insist that each side have a non-redundant eclass.  This
 		 * restriction is needed because various bits of the planner expect
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 6b16c3a676..fb2c88d2a6 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -7960,3 +7960,74 @@ SELECT t1.a FROM skip_fetch t1 LEFT JOIN skip_fetch t2 ON t2.a = 1 WHERE t2.a IS
 
 RESET enable_indexonlyscan;
 RESET enable_seqscan;
+--
+-- test handling of merge/hash clauses that do not have valid commutators
+--
+-- Typically there are not any such operators that are mergejoinable or
+-- hashable but have no commutators; so we have to hack things up to create one.
+create type int8nocommutator;
+create function int8nocommutatorin(cstring) returns int8nocommutator
+  strict immutable language internal as 'int8in';
+NOTICE:  return type int8nocommutator is only a shell
+create function int8nocommutatorout(int8nocommutator) returns cstring
+  strict immutable language internal as 'int8out';
+NOTICE:  argument type int8nocommutator is only a shell
+create type int8nocommutator (
+    input = int8nocommutatorin,
+    output = int8nocommutatorout,
+    like = int8
+);
+create function int8nocommutatoreq(int8, int8nocommutator) returns bool
+  strict immutable language internal as 'int8eq';
+create operator = (
+    procedure = int8nocommutatoreq,
+    leftarg = int8, rightarg = int8nocommutator,
+    restrict = eqsel, join = eqjoinsel,
+    hashes, merges
+);
+alter operator family integer_ops using btree add
+  operator 3 = (int8, int8nocommutator);
+create function int8nocommutatorlt(int8nocommutator, int8nocommutator) returns bool
+  strict immutable language internal as 'int8lt';
+create operator < (
+    procedure = int8nocommutatorlt,
+    leftarg = int8nocommutator, rightarg = int8nocommutator
+);
+alter operator family integer_ops using btree add
+  operator 1 < (int8nocommutator, int8nocommutator);
+create function int8nocommutatorcmp(int8, int8nocommutator) returns int
+  strict immutable language internal as 'btint8cmp';
+alter operator family integer_ops using btree add
+  function 1 int8nocommutatorcmp (int8, int8nocommutator);
+create temp table tbl_nocom(a int8, b int8nocommutator);
+-- check that non-commutable merge clauses do not lead to error
+set enable_hashjoin to off;
+explain (costs off)
+select * from tbl_nocom t1 full join tbl_nocom t2 on t2.a = t1.b;
+              QUERY PLAN              
+--------------------------------------
+ Merge Full Join
+   Merge Cond: (t2.a = t1.b)
+   ->  Sort
+         Sort Key: t2.a
+         ->  Seq Scan on tbl_nocom t2
+   ->  Sort
+         Sort Key: t1.b USING <
+         ->  Seq Scan on tbl_nocom t1
+(8 rows)
+
+-- check that non-commutable hash clauses do not lead to error
+reset enable_hashjoin;
+set enable_mergejoin to off;
+explain (costs off)
+select * from tbl_nocom t1 full join tbl_nocom t2 on t2.a = t1.b;
+              QUERY PLAN              
+--------------------------------------
+ Hash Full Join
+   Hash Cond: (t2.a = t1.b)
+   ->  Seq Scan on tbl_nocom t2
+   ->  Hash
+         ->  Seq Scan on tbl_nocom t1
+(5 rows)
+
+reset enable_mergejoin;
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 8bfe3b7ba6..2a07d30a1e 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -2928,3 +2928,61 @@ SELECT t1.a FROM skip_fetch t1 LEFT JOIN skip_fetch t2 ON t2.a = 1 WHERE t2.a IS
 
 RESET enable_indexonlyscan;
 RESET enable_seqscan;
+
+--
+-- test handling of merge/hash clauses that do not have valid commutators
+--
+
+-- Typically there are not any such operators that are mergejoinable or
+-- hashable but have no commutators; so we have to hack things up to create one.
+
+create type int8nocommutator;
+create function int8nocommutatorin(cstring) returns int8nocommutator
+  strict immutable language internal as 'int8in';
+create function int8nocommutatorout(int8nocommutator) returns cstring
+  strict immutable language internal as 'int8out';
+create type int8nocommutator (
+    input = int8nocommutatorin,
+    output = int8nocommutatorout,
+    like = int8
+);
+
+create function int8nocommutatoreq(int8, int8nocommutator) returns bool
+  strict immutable language internal as 'int8eq';
+create operator = (
+    procedure = int8nocommutatoreq,
+    leftarg = int8, rightarg = int8nocommutator,
+    restrict = eqsel, join = eqjoinsel,
+    hashes, merges
+);
+alter operator family integer_ops using btree add
+  operator 3 = (int8, int8nocommutator);
+
+create function int8nocommutatorlt(int8nocommutator, int8nocommutator) returns bool
+  strict immutable language internal as 'int8lt';
+create operator < (
+    procedure = int8nocommutatorlt,
+    leftarg = int8nocommutator, rightarg = int8nocommutator
+);
+alter operator family integer_ops using btree add
+  operator 1 < (int8nocommutator, int8nocommutator);
+
+create function int8nocommutatorcmp(int8, int8nocommutator) returns int
+  strict immutable language internal as 'btint8cmp';
+alter operator family integer_ops using btree add
+  function 1 int8nocommutatorcmp (int8, int8nocommutator);
+
+create temp table tbl_nocom(a int8, b int8nocommutator);
+
+-- check that non-commutable merge clauses do not lead to error
+set enable_hashjoin to off;
+explain (costs off)
+select * from tbl_nocom t1 full join tbl_nocom t2 on t2.a = t1.b;
+
+-- check that non-commutable hash clauses do not lead to error
+reset enable_hashjoin;
+set enable_mergejoin to off;
+explain (costs off)
+select * from tbl_nocom t1 full join tbl_nocom t2 on t2.a = t1.b;
+
+reset enable_mergejoin;
-- 
2.43.0

#5Alexander Pyhalov
a.pyhalov@postgrespro.ru
In reply to: Richard Guo (#4)
Re: Inconsistency between try_mergejoin_path and create_mergejoin_plan

Richard Guo писал(а) 2024-06-19 16:30:

On Wed, Jun 19, 2024 at 12:49 PM Tom Lane <tgl@sss.pgh.pa.us> wrote:

Richard Guo <guofenglinux@gmail.com> writes:

It seems to me that the new operator is somewhat artificial, since it is
designed to support a mergejoin but lacks a valid commutator. So before
we proceed to discuss the fix, I'd like to know whether this is a valid
issue that needs fixing.

I do not think we should add a great deal of complexity or extra
planner cycles to deal with this; but if it can be fixed at low
cost, we should.

I think we can simply verify the validity of commutators for clauses in
the form "inner op outer" when selecting mergejoin/hash clauses. If a
clause lacks a commutator, we should consider it unusable for the given
pair of outer and inner rels. Please see the attached patch.

This seems to be working for my test cases.
--
Best regards,
Alexander Pyhalov,
Postgres Professional

#6Richard Guo
guofenglinux@gmail.com
In reply to: Alexander Pyhalov (#5)
1 attachment(s)
Re: Inconsistency between try_mergejoin_path and create_mergejoin_plan

On Wed, Jun 19, 2024 at 10:24 PM Alexander Pyhalov
<a.pyhalov@postgrespro.ru> wrote:

Richard Guo писал(а) 2024-06-19 16:30:

I think we can simply verify the validity of commutators for clauses in
the form "inner op outer" when selecting mergejoin/hash clauses. If a
clause lacks a commutator, we should consider it unusable for the given
pair of outer and inner rels. Please see the attached patch.

This seems to be working for my test cases.

Thank you for confirming. Here is an updated patch with some tweaks to
the comments and commit message. I've parked this patch in the July
commitfest.

Thanks
Richard

Attachments:

v3-0001-Check-the-validity-of-commutators-for-merge-hash-clauses.patchapplication/octet-stream; name=v3-0001-Check-the-validity-of-commutators-for-merge-hash-clauses.patchDownload
From 9ccf0fd191fadee911e470c277d16fd470fe225b Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Wed, 19 Jun 2024 19:57:40 +0900
Subject: [PATCH v3] Check the validity of commutators for merge/hash clauses

When creating merge or hash join plans in createplan.c, the merge or
hash clauses may need to get commuted to ensure that the outer var is on
the left and the inner var is on the right if they are not already in
the expected form.  This requires that their operators have commutators.
Failing to find a commutator at this stage would result in 'ERROR: could
not find commutator for operator xxx', with no opportunity to select an
alternative plan.

Typically, this is not an issue because mergejoinable or hashable
operators are expected to always have valid commutators.  But in some
artificial cases this assumption may not hold true.  Therefore, here in
this patch we check the validity of commutators for clauses in the form
"inner op outer" when selecting mergejoin/hash clauses, and consider a
clause unusable for the current pair of outer and inner relations if it
lacks a commutator.
---
 src/backend/optimizer/path/joinpath.c | 27 ++++++++++
 src/test/regress/expected/join.out    | 72 +++++++++++++++++++++++++++
 src/test/regress/sql/join.sql         | 59 ++++++++++++++++++++++
 3 files changed, 158 insertions(+)

diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 5be8da9e09..5c8f436367 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -24,6 +24,7 @@
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "optimizer/planmain.h"
+#include "utils/lsyscache.h"
 #include "utils/typcache.h"
 
 /* Hook for plugins to get control in add_paths_to_joinrel() */
@@ -2131,6 +2132,17 @@ hash_inner_and_outer(PlannerInfo *root,
 		if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
 			continue;			/* no good for these input relations */
 
+		/*
+		 * If clause has the form "inner op outer", check if its operator has
+		 * valid commutator.  This is necessary because hashclauses in this
+		 * form will get commuted in createplan.c to put the outer var on the
+		 * left (see get_switched_clauses).  This probably shouldn't ever fail,
+		 * since hashable operators ought to have commutators, but be paranoid.
+		 */
+		if (!restrictinfo->outer_is_left &&
+			!OidIsValid(get_commutator(((OpExpr *) restrictinfo->clause)->opno)))
+			continue;
+
 		hashclauses = lappend(hashclauses, restrictinfo);
 	}
 
@@ -2394,6 +2406,21 @@ select_mergejoin_clauses(PlannerInfo *root,
 			continue;			/* no good for these input relations */
 		}
 
+		/*
+		 * If clause has the form "inner op outer", check if its operator has
+		 * valid commutator.  This is necessary because mergejoin clauses in
+		 * this form will get commuted in createplan.c to put the outer var on
+		 * the left (see get_switched_clauses).  This probably shouldn't ever
+		 * fail, since mergejoinable operators ought to have commutators, but
+		 * be paranoid.
+		 */
+		if (!restrictinfo->outer_is_left &&
+			!OidIsValid(get_commutator(((OpExpr *) restrictinfo->clause)->opno)))
+		{
+			have_nonmergeable_joinclause = true;
+			continue;
+		}
+
 		/*
 		 * Insist that each side have a non-redundant eclass.  This
 		 * restriction is needed because various bits of the planner expect
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 6b16c3a676..77d1df17d0 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -7960,3 +7960,75 @@ SELECT t1.a FROM skip_fetch t1 LEFT JOIN skip_fetch t2 ON t2.a = 1 WHERE t2.a IS
 
 RESET enable_indexonlyscan;
 RESET enable_seqscan;
+--
+-- test handling of merge/hash clauses that do not have valid commutators
+--
+-- There are not (and should not be) any such operators built into Postgres
+-- that are mergejoinable or hashable but have no commutators; so we have to
+-- hack things up to create one.
+create type int8nocommutator;
+create function int8nocommutatorin(cstring) returns int8nocommutator
+  strict immutable language internal as 'int8in';
+NOTICE:  return type int8nocommutator is only a shell
+create function int8nocommutatorout(int8nocommutator) returns cstring
+  strict immutable language internal as 'int8out';
+NOTICE:  argument type int8nocommutator is only a shell
+create type int8nocommutator (
+    input = int8nocommutatorin,
+    output = int8nocommutatorout,
+    like = int8
+);
+create function int8nocommutatoreq(int8, int8nocommutator) returns bool
+  strict immutable language internal as 'int8eq';
+create operator = (
+    procedure = int8nocommutatoreq,
+    leftarg = int8, rightarg = int8nocommutator,
+    restrict = eqsel, join = eqjoinsel,
+    hashes, merges
+);
+alter operator family integer_ops using btree add
+  operator 3 = (int8, int8nocommutator);
+create function int8nocommutatorlt(int8nocommutator, int8nocommutator) returns bool
+  strict immutable language internal as 'int8lt';
+create operator < (
+    procedure = int8nocommutatorlt,
+    leftarg = int8nocommutator, rightarg = int8nocommutator
+);
+alter operator family integer_ops using btree add
+  operator 1 < (int8nocommutator, int8nocommutator);
+create function int8nocommutatorcmp(int8, int8nocommutator) returns int
+  strict immutable language internal as 'btint8cmp';
+alter operator family integer_ops using btree add
+  function 1 int8nocommutatorcmp (int8, int8nocommutator);
+create temp table tbl_nocom(a int8, b int8nocommutator);
+-- check that non-commutable merge clauses do not lead to error
+set enable_hashjoin to off;
+explain (costs off)
+select * from tbl_nocom t1 full join tbl_nocom t2 on t2.a = t1.b;
+              QUERY PLAN              
+--------------------------------------
+ Merge Full Join
+   Merge Cond: (t2.a = t1.b)
+   ->  Sort
+         Sort Key: t2.a
+         ->  Seq Scan on tbl_nocom t2
+   ->  Sort
+         Sort Key: t1.b USING <
+         ->  Seq Scan on tbl_nocom t1
+(8 rows)
+
+-- check that non-commutable hash clauses do not lead to error
+reset enable_hashjoin;
+set enable_mergejoin to off;
+explain (costs off)
+select * from tbl_nocom t1 full join tbl_nocom t2 on t2.a = t1.b;
+              QUERY PLAN              
+--------------------------------------
+ Hash Full Join
+   Hash Cond: (t2.a = t1.b)
+   ->  Seq Scan on tbl_nocom t2
+   ->  Hash
+         ->  Seq Scan on tbl_nocom t1
+(5 rows)
+
+reset enable_mergejoin;
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 8bfe3b7ba6..9405654d54 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -2928,3 +2928,62 @@ SELECT t1.a FROM skip_fetch t1 LEFT JOIN skip_fetch t2 ON t2.a = 1 WHERE t2.a IS
 
 RESET enable_indexonlyscan;
 RESET enable_seqscan;
+
+--
+-- test handling of merge/hash clauses that do not have valid commutators
+--
+
+-- There are not (and should not be) any such operators built into Postgres
+-- that are mergejoinable or hashable but have no commutators; so we have to
+-- hack things up to create one.
+
+create type int8nocommutator;
+create function int8nocommutatorin(cstring) returns int8nocommutator
+  strict immutable language internal as 'int8in';
+create function int8nocommutatorout(int8nocommutator) returns cstring
+  strict immutable language internal as 'int8out';
+create type int8nocommutator (
+    input = int8nocommutatorin,
+    output = int8nocommutatorout,
+    like = int8
+);
+
+create function int8nocommutatoreq(int8, int8nocommutator) returns bool
+  strict immutable language internal as 'int8eq';
+create operator = (
+    procedure = int8nocommutatoreq,
+    leftarg = int8, rightarg = int8nocommutator,
+    restrict = eqsel, join = eqjoinsel,
+    hashes, merges
+);
+alter operator family integer_ops using btree add
+  operator 3 = (int8, int8nocommutator);
+
+create function int8nocommutatorlt(int8nocommutator, int8nocommutator) returns bool
+  strict immutable language internal as 'int8lt';
+create operator < (
+    procedure = int8nocommutatorlt,
+    leftarg = int8nocommutator, rightarg = int8nocommutator
+);
+alter operator family integer_ops using btree add
+  operator 1 < (int8nocommutator, int8nocommutator);
+
+create function int8nocommutatorcmp(int8, int8nocommutator) returns int
+  strict immutable language internal as 'btint8cmp';
+alter operator family integer_ops using btree add
+  function 1 int8nocommutatorcmp (int8, int8nocommutator);
+
+create temp table tbl_nocom(a int8, b int8nocommutator);
+
+-- check that non-commutable merge clauses do not lead to error
+set enable_hashjoin to off;
+explain (costs off)
+select * from tbl_nocom t1 full join tbl_nocom t2 on t2.a = t1.b;
+
+-- check that non-commutable hash clauses do not lead to error
+reset enable_hashjoin;
+set enable_mergejoin to off;
+explain (costs off)
+select * from tbl_nocom t1 full join tbl_nocom t2 on t2.a = t1.b;
+
+reset enable_mergejoin;
-- 
2.43.0

#7Tom Lane
tgl@sss.pgh.pa.us
In reply to: Richard Guo (#6)
Re: Inconsistency between try_mergejoin_path and create_mergejoin_plan

Richard Guo <guofenglinux@gmail.com> writes:

Thank you for confirming. Here is an updated patch with some tweaks to
the comments and commit message. I've parked this patch in the July
commitfest.

I took a brief look at this. I think the basic idea is sound,
but I have a couple of nits:

* It's not entirely obvious that the checks preceding these additions
are sufficient to ensure that the clauses are OpExprs. They probably
are, since the clauses are marked hashable or mergeable, but that test
is mighty far away. More to the point, if they ever weren't OpExprs
the result would likely be to pass a bogus OID to get_commutator and
thus silently fail, allowing the problem to go undetected for a long
time. I'd suggest using castNode() rather than a hard-wired
assumption that the clause is an OpExpr.

* Do we really need to construct a whole new set of bogus operators
and opclasses to test this? As you noted, the regression tests
already set up an incomplete opclass for other purposes. Why can't
we extend that test, to reduce the amount of cycles wasted forevermore
on this rather trivial point?

(I'm actually wondering whether we really need to memorialize this
with a regression test case at all. But I'd settle for minimizing
the amount of test cycles added.)

regards, tom lane

#8Richard Guo
guofenglinux@gmail.com
In reply to: Tom Lane (#7)
1 attachment(s)
Re: Inconsistency between try_mergejoin_path and create_mergejoin_plan

On Thu, Jul 25, 2024 at 12:07 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:

I took a brief look at this. I think the basic idea is sound,
but I have a couple of nits:

Thank you for reviewing this patch!

* It's not entirely obvious that the checks preceding these additions
are sufficient to ensure that the clauses are OpExprs. They probably
are, since the clauses are marked hashable or mergeable, but that test
is mighty far away. More to the point, if they ever weren't OpExprs
the result would likely be to pass a bogus OID to get_commutator and
thus silently fail, allowing the problem to go undetected for a long
time. I'd suggest using castNode() rather than a hard-wired
assumption that the clause is an OpExpr.

Good point. I've modified the code to use castNode(), and added
comment accordingly.

* Do we really need to construct a whole new set of bogus operators
and opclasses to test this? As you noted, the regression tests
already set up an incomplete opclass for other purposes. Why can't
we extend that test, to reduce the amount of cycles wasted forevermore
on this rather trivial point?

(I'm actually wondering whether we really need to memorialize this
with a regression test case at all. But I'd settle for minimizing
the amount of test cycles added.)

At first I planned to use the alias type 'int8alias1' created in
equivclass.sql for this test. However, I found that this type is not
created yet when running join.sql. Perhaps it's more convenient to
place this test in equivclass.sql to leverage the bogus operators
created there, but the test does not seem to be related to 'broken'
ECs. I'm not sure if equivclass.sql is the appropriate place.

Do you think it works if we place this test in equivclass.sql and
write a comment explaining why it's there, like attached? Now I’m
also starting to wonder if this change actually warrants such a test.

BTW, do you think this patch is worth backpatching?

Thanks
Richard

Attachments:

v4-0001-Check-the-validity-of-commutators-for-merge-hash-clauses.patchapplication/octet-stream; name=v4-0001-Check-the-validity-of-commutators-for-merge-hash-clauses.patchDownload
From 53a39d91f08d169a328d67c906e70f62ff156407 Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Wed, 19 Jun 2024 19:57:40 +0900
Subject: [PATCH v4] Check the validity of commutators for merge/hash clauses

When creating merge or hash join plans in createplan.c, the merge or
hash clauses may need to get commuted to ensure that the outer var is on
the left and the inner var is on the right if they are not already in
the expected form.  This requires that their operators have commutators.
Failing to find a commutator at this stage would result in 'ERROR: could
not find commutator for operator xxx', with no opportunity to select an
alternative plan.

Typically, this is not an issue because mergejoinable or hashable
operators are expected to always have valid commutators.  But in some
artificial cases this assumption may not hold true.  Therefore, here in
this patch we check the validity of commutators for clauses in the form
"inner op outer" when selecting mergejoin/hash clauses, and consider a
clause unusable for the current pair of outer and inner relations if it
lacks a commutator.
---
 src/backend/optimizer/path/joinpath.c    | 32 ++++++++++++++++++
 src/test/regress/expected/equivclass.out | 42 ++++++++++++++++++++++++
 src/test/regress/sql/equivclass.sql      | 28 ++++++++++++++++
 3 files changed, 102 insertions(+)

diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 7a2c20b145..72bdf2f50a 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -25,6 +25,7 @@
 #include "optimizer/paths.h"
 #include "optimizer/placeholder.h"
 #include "optimizer/planmain.h"
+#include "utils/lsyscache.h"
 #include "utils/typcache.h"
 
 /* Hook for plugins to get control in add_paths_to_joinrel() */
@@ -2267,6 +2268,20 @@ hash_inner_and_outer(PlannerInfo *root,
 		if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
 			continue;			/* no good for these input relations */
 
+		/*
+		 * If clause has the form "inner op outer", check if its operator has
+		 * valid commutator.  This is necessary because hashclauses in this
+		 * form will get commuted in createplan.c to put the outer var on the
+		 * left (see get_switched_clauses).  This probably shouldn't ever
+		 * fail, since hashable operators ought to have commutators, but be
+		 * paranoid.
+		 *
+		 * The clause being hashjoinable indicates that it's an OpExpr.
+		 */
+		if (!restrictinfo->outer_is_left &&
+			!OidIsValid(get_commutator(castNode(OpExpr, restrictinfo->clause)->opno)))
+			continue;
+
 		hashclauses = lappend(hashclauses, restrictinfo);
 	}
 
@@ -2541,6 +2556,23 @@ select_mergejoin_clauses(PlannerInfo *root,
 			continue;			/* no good for these input relations */
 		}
 
+		/*
+		 * If clause has the form "inner op outer", check if its operator has
+		 * valid commutator.  This is necessary because mergejoin clauses in
+		 * this form will get commuted in createplan.c to put the outer var on
+		 * the left (see get_switched_clauses).  This probably shouldn't ever
+		 * fail, since mergejoinable operators ought to have commutators, but
+		 * be paranoid.
+		 *
+		 * The clause being mergejoinable indicates that it's an OpExpr.
+		 */
+		if (!restrictinfo->outer_is_left &&
+			!OidIsValid(get_commutator(castNode(OpExpr, restrictinfo->clause)->opno)))
+		{
+			have_nonmergeable_joinclause = true;
+			continue;
+		}
+
 		/*
 		 * Insist that each side have a non-redundant eclass.  This
 		 * restriction is needed because various bits of the planner expect
diff --git a/src/test/regress/expected/equivclass.out b/src/test/regress/expected/equivclass.out
index 126f7047fe..5635ef2141 100644
--- a/src/test/regress/expected/equivclass.out
+++ b/src/test/regress/expected/equivclass.out
@@ -451,3 +451,45 @@ explain (costs off)  -- this should not require a sort
    Filter: (f1 = 'foo'::name)
 (2 rows)
 
+--
+-- test handling of merge/hash clauses that do not have valid commutators
+--
+-- There are not (and should not be) any such operators built into Postgres
+-- that are mergejoinable or hashable but have no commutators; so we leverage
+-- the alias type 'int8alias1' created in this file to conduct the tests.
+-- That's why this test is included here rather than in join.sql.
+begin;
+create table tbl_nocom(a int8, b int8alias1);
+-- check that non-commutable merge clauses do not lead to error
+set enable_hashjoin to off;
+set enable_mergejoin to on;
+explain (costs off)
+select * from tbl_nocom t1 full join tbl_nocom t2 on t2.a = t1.b;
+              QUERY PLAN              
+--------------------------------------
+ Merge Full Join
+   Merge Cond: (t2.a = t1.b)
+   ->  Sort
+         Sort Key: t2.a
+         ->  Seq Scan on tbl_nocom t2
+   ->  Sort
+         Sort Key: t1.b USING <
+         ->  Seq Scan on tbl_nocom t1
+(8 rows)
+
+-- check that non-commutable hash clauses do not lead to error
+alter operator = (int8, int8alias1) set (hashes);
+set enable_hashjoin to on;
+set enable_mergejoin to off;
+explain (costs off)
+select * from tbl_nocom t1 full join tbl_nocom t2 on t2.a = t1.b;
+              QUERY PLAN              
+--------------------------------------
+ Hash Full Join
+   Hash Cond: (t2.a = t1.b)
+   ->  Seq Scan on tbl_nocom t2
+   ->  Hash
+         ->  Seq Scan on tbl_nocom t1
+(5 rows)
+
+abort;
diff --git a/src/test/regress/sql/equivclass.sql b/src/test/regress/sql/equivclass.sql
index 247b0a3105..87daf6c5f7 100644
--- a/src/test/regress/sql/equivclass.sql
+++ b/src/test/regress/sql/equivclass.sql
@@ -269,3 +269,31 @@ create temp view overview as
   select f1::information_schema.sql_identifier as sqli, f2 from undername;
 explain (costs off)  -- this should not require a sort
   select * from overview where sqli = 'foo' order by sqli;
+
+--
+-- test handling of merge/hash clauses that do not have valid commutators
+--
+
+-- There are not (and should not be) any such operators built into Postgres
+-- that are mergejoinable or hashable but have no commutators; so we leverage
+-- the alias type 'int8alias1' created in this file to conduct the tests.
+-- That's why this test is included here rather than in join.sql.
+
+begin;
+
+create table tbl_nocom(a int8, b int8alias1);
+
+-- check that non-commutable merge clauses do not lead to error
+set enable_hashjoin to off;
+set enable_mergejoin to on;
+explain (costs off)
+select * from tbl_nocom t1 full join tbl_nocom t2 on t2.a = t1.b;
+
+-- check that non-commutable hash clauses do not lead to error
+alter operator = (int8, int8alias1) set (hashes);
+set enable_hashjoin to on;
+set enable_mergejoin to off;
+explain (costs off)
+select * from tbl_nocom t1 full join tbl_nocom t2 on t2.a = t1.b;
+
+abort;
-- 
2.43.0

#9Richard Guo
guofenglinux@gmail.com
In reply to: Richard Guo (#8)
1 attachment(s)
Re: Inconsistency between try_mergejoin_path and create_mergejoin_plan

On Fri, Jul 26, 2024 at 3:56 PM Richard Guo <guofenglinux@gmail.com> wrote:

Do you think it works if we place this test in equivclass.sql and
write a comment explaining why it's there, like attached? Now I’m
also starting to wonder if this change actually warrants such a test.

The new test case fails starting from adf97c156, and we have to
install a hash opfamily and a hash function for the hacked int8alias1
type to make the test case work again.

Now, I'm more dubious about whether we really need to add a test case
for this change.

Thanks
Richard

Attachments:

v5-0001-Check-the-validity-of-commutators-for-merge-hash-clauses.patchapplication/octet-stream; name=v5-0001-Check-the-validity-of-commutators-for-merge-hash-clauses.patchDownload
From bc3b2a22b8397a445d3407cf3129c5980d175d9e Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Wed, 19 Jun 2024 19:57:40 +0900
Subject: [PATCH v5] Check the validity of commutators for merge/hash clauses

When creating merge or hash join plans in createplan.c, the merge or
hash clauses may need to get commuted to ensure that the outer var is on
the left and the inner var is on the right if they are not already in
the expected form.  This requires that their operators have commutators.
Failing to find a commutator at this stage would result in 'ERROR: could
not find commutator for operator xxx', with no opportunity to select an
alternative plan.

Typically, this is not an issue because mergejoinable or hashable
operators are expected to always have valid commutators.  But in some
artificial cases this assumption may not hold true.  Therefore, here in
this patch we check the validity of commutators for clauses in the form
"inner op outer" when selecting mergejoin/hash clauses, and consider a
clause unusable for the current pair of outer and inner relations if it
lacks a commutator.
---
 src/backend/optimizer/path/joinpath.c    | 32 ++++++++++++++++
 src/test/regress/expected/equivclass.out | 48 ++++++++++++++++++++++++
 src/test/regress/sql/equivclass.sql      | 34 +++++++++++++++++
 3 files changed, 114 insertions(+)

diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index b0e8c94dfc..a244300463 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -25,6 +25,7 @@
 #include "optimizer/paths.h"
 #include "optimizer/placeholder.h"
 #include "optimizer/planmain.h"
+#include "utils/lsyscache.h"
 #include "utils/typcache.h"
 
 /* Hook for plugins to get control in add_paths_to_joinrel() */
@@ -2266,6 +2267,20 @@ hash_inner_and_outer(PlannerInfo *root,
 		if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
 			continue;			/* no good for these input relations */
 
+		/*
+		 * If clause has the form "inner op outer", check if its operator has
+		 * valid commutator.  This is necessary because hashclauses in this
+		 * form will get commuted in createplan.c to put the outer var on the
+		 * left (see get_switched_clauses).  This probably shouldn't ever
+		 * fail, since hashable operators ought to have commutators, but be
+		 * paranoid.
+		 *
+		 * The clause being hashjoinable indicates that it's an OpExpr.
+		 */
+		if (!restrictinfo->outer_is_left &&
+			!OidIsValid(get_commutator(castNode(OpExpr, restrictinfo->clause)->opno)))
+			continue;
+
 		hashclauses = lappend(hashclauses, restrictinfo);
 	}
 
@@ -2540,6 +2555,23 @@ select_mergejoin_clauses(PlannerInfo *root,
 			continue;			/* no good for these input relations */
 		}
 
+		/*
+		 * If clause has the form "inner op outer", check if its operator has
+		 * valid commutator.  This is necessary because mergejoin clauses in
+		 * this form will get commuted in createplan.c to put the outer var on
+		 * the left (see get_switched_clauses).  This probably shouldn't ever
+		 * fail, since mergejoinable operators ought to have commutators, but
+		 * be paranoid.
+		 *
+		 * The clause being mergejoinable indicates that it's an OpExpr.
+		 */
+		if (!restrictinfo->outer_is_left &&
+			!OidIsValid(get_commutator(castNode(OpExpr, restrictinfo->clause)->opno)))
+		{
+			have_nonmergeable_joinclause = true;
+			continue;
+		}
+
 		/*
 		 * Insist that each side have a non-redundant eclass.  This
 		 * restriction is needed because various bits of the planner expect
diff --git a/src/test/regress/expected/equivclass.out b/src/test/regress/expected/equivclass.out
index 126f7047fe..a328164fe0 100644
--- a/src/test/regress/expected/equivclass.out
+++ b/src/test/regress/expected/equivclass.out
@@ -451,3 +451,51 @@ explain (costs off)  -- this should not require a sort
    Filter: (f1 = 'foo'::name)
 (2 rows)
 
+--
+-- test handling of merge/hash clauses that do not have valid commutators
+--
+-- There are not (and should not be) any such operators built into Postgres
+-- that are mergejoinable or hashable but have no commutators; so we leverage
+-- the alias type 'int8alias1' created in this file to conduct the tests.
+-- That's why this test is included here rather than in join.sql.
+begin;
+create table tbl_nocom(a int8, b int8alias1);
+-- check that non-commutable merge clauses do not lead to error
+set enable_hashjoin to off;
+set enable_mergejoin to on;
+explain (costs off)
+select * from tbl_nocom t1 full join tbl_nocom t2 on t2.a = t1.b;
+              QUERY PLAN              
+--------------------------------------
+ Merge Full Join
+   Merge Cond: (t2.a = t1.b)
+   ->  Sort
+         Sort Key: t2.a
+         ->  Seq Scan on tbl_nocom t2
+   ->  Sort
+         Sort Key: t1.b USING <
+         ->  Seq Scan on tbl_nocom t1
+(8 rows)
+
+-- check that non-commutable hash clauses do not lead to error
+alter operator = (int8, int8alias1) set (hashes);
+alter operator family integer_ops using hash add
+  operator 1 = (int8, int8alias1);
+create function hashint8alias1(int8alias1) returns int
+  strict immutable language internal as 'hashint8';
+alter operator family integer_ops using hash add
+  function 1 hashint8alias1(int8alias1);
+set enable_hashjoin to on;
+set enable_mergejoin to off;
+explain (costs off)
+select * from tbl_nocom t1 full join tbl_nocom t2 on t2.a = t1.b;
+              QUERY PLAN              
+--------------------------------------
+ Hash Full Join
+   Hash Cond: (t2.a = t1.b)
+   ->  Seq Scan on tbl_nocom t2
+   ->  Hash
+         ->  Seq Scan on tbl_nocom t1
+(5 rows)
+
+abort;
diff --git a/src/test/regress/sql/equivclass.sql b/src/test/regress/sql/equivclass.sql
index 247b0a3105..28ed7910d0 100644
--- a/src/test/regress/sql/equivclass.sql
+++ b/src/test/regress/sql/equivclass.sql
@@ -269,3 +269,37 @@ create temp view overview as
   select f1::information_schema.sql_identifier as sqli, f2 from undername;
 explain (costs off)  -- this should not require a sort
   select * from overview where sqli = 'foo' order by sqli;
+
+--
+-- test handling of merge/hash clauses that do not have valid commutators
+--
+
+-- There are not (and should not be) any such operators built into Postgres
+-- that are mergejoinable or hashable but have no commutators; so we leverage
+-- the alias type 'int8alias1' created in this file to conduct the tests.
+-- That's why this test is included here rather than in join.sql.
+
+begin;
+
+create table tbl_nocom(a int8, b int8alias1);
+
+-- check that non-commutable merge clauses do not lead to error
+set enable_hashjoin to off;
+set enable_mergejoin to on;
+explain (costs off)
+select * from tbl_nocom t1 full join tbl_nocom t2 on t2.a = t1.b;
+
+-- check that non-commutable hash clauses do not lead to error
+alter operator = (int8, int8alias1) set (hashes);
+alter operator family integer_ops using hash add
+  operator 1 = (int8, int8alias1);
+create function hashint8alias1(int8alias1) returns int
+  strict immutable language internal as 'hashint8';
+alter operator family integer_ops using hash add
+  function 1 hashint8alias1(int8alias1);
+set enable_hashjoin to on;
+set enable_mergejoin to off;
+explain (costs off)
+select * from tbl_nocom t1 full join tbl_nocom t2 on t2.a = t1.b;
+
+abort;
-- 
2.43.0

#10Richard Guo
guofenglinux@gmail.com
In reply to: Richard Guo (#9)
Re: Inconsistency between try_mergejoin_path and create_mergejoin_plan

On Tue, Sep 3, 2024 at 5:51 PM Richard Guo <guofenglinux@gmail.com> wrote:

The new test case fails starting from adf97c156, and we have to
install a hash opfamily and a hash function for the hacked int8alias1
type to make the test case work again.

Now, I'm more dubious about whether we really need to add a test case
for this change.

I pushed this patch with the test case remaining, as it adds only a
minimal number of test cycles. I explained in the commit message why
the test case is included in equivclass.sql rather than in join.sql.

I did not do backpatch because this bug cannot be reproduced without
installing an incomplete opclass, which is unlikely to happen in
practice.

Thanks for the report and review.

Thanks
Richard

#11Alexander Lakhin
exclusion@gmail.com
In reply to: Richard Guo (#10)
Re: Inconsistency between try_mergejoin_path and create_mergejoin_plan

Hello Richard and Tom,

04.09.2024 06:50, Richard Guo wrote:

I pushed this patch with the test case remaining, as it adds only a
minimal number of test cycles. I explained in the commit message why
the test case is included in equivclass.sql rather than in join.sql.

While playing with the equivclass test, I've discovered that the next step
to define a complete set of operators in the test:
@@ -65,6 +65,7 @@
     procedure = int8alias1eq,
     leftarg = int8, rightarg = int8alias1,
     restrict = eqsel, join = eqjoinsel,
+commutator = =,
     merges
 );

produces an internal error:
ERROR:  XX000: operator 32312 is not a member of opfamily 1976
LOCATION:  get_op_opfamily_properties, lsyscache.c:149

pg_regress/equivclass BACKTRACE:
get_op_opfamily_properties at lsyscache.c:149:3
MJExamineQuals at nodeMergejoin.c:228:19
ExecInitMergeJoin at nodeMergejoin.c:1608:25
ExecInitNode at execProcnode.c:303:27
InitPlan at execMain.c:964:14

Maybe the error itself is not that unexpected, but I'm confused by a
comment above the function:
 * Caller should already have verified that opno is a member of opfamily,
 * therefore we raise an error if the tuple is not found.
 */
void
get_op_opfamily_properties(Oid opno, Oid opfamily, bool ordering_op,
                           int *strategy,
                           Oid *lefttype,
                           Oid *righttype)
{
    HeapTuple   tp;
    Form_pg_amop amop_tup;

    tp = SearchSysCache3(AMOPOPID,
                         ObjectIdGetDatum(opno),
                         CharGetDatum(ordering_op ? AMOP_ORDER : AMOP_SEARCH),
                         ObjectIdGetDatum(opfamily));
    if (!HeapTupleIsValid(tp))
        elog(ERROR, "operator %u is not a member of opfamily %u",
             opno, opfamily);

This behavior reproduced on commit a33cf1041, dated 2007-01-23, which
added "ALTER OPERATOR FAMILY", but I think it also can be reproduced on
previous commits with manual catalog editing. (The comment was added by
a78fcfb51 from 2006-12-23, which introduced operator families.)

Best regards,
Alexander