Exists pull-up application with JoinExpr

Started by Alena Rybakinaabout 1 year ago16 messages

a.rybakina@postgrespro.ru

about 1 year ago

1 attachment(s)

Hi, hackers!

I found one pull-up that works if the inner join condition is written
through the where condition,

|create temp table ta (id int primary key, val int); insert into ta
values(1,1); insert into ta values(2,2); ||insert into ta values(3,3);|

|create temp table tb (id int primary key, aval int); insert into tb
values(4,1); insert into tb values(5,1); insert into tb values(1,2);
create temp table tc (id int primary key, aid int); insert into tc
values(6,1); insert into tc values(7,2);|

|EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM ta
WHERE EXISTS (SELECT * FROM tb, tc WHERE ta.id = tb.id);|
QUERY PLAN
-------------------------------------------------------------------------
Nested Loop Semi Join (actual rows=1 loops=1)
Buffers: local hit=6
-> Seq Scan on ta (actual rows=3 loops=1)
Buffers: local hit=1
-> Nested Loop (actual rows=0 loops=3)
Buffers: local hit=5
-> Index Only Scan using tb_pkey on tb (actual rows=0 loops=3)
Index Cond: (id = ta.id)
Heap Fetches: 1
Buffers: local hit=4
-> Seq Scan on tc (actual rows=1 loops=1)
Buffers: local hit=1
Planning:
Buffers: shared hit=67 read=12
(14 rows)

but it doesn't work if it is written through the outside condition.

|alena@postgres=# EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
SELECT * FROM ta WHERE EXISTS (SELECT * FROM tb JOIN tc ON ta.id =
tb.id); QUERY PLAN
------------------------------------------------------ Seq Scan on ta
(actual rows=1 loops=1) Filter: EXISTS(SubPlan 1) Rows Removed by
Filter: 2 Buffers: local hit=5 SubPlan 1 -> Nested Loop (actual rows=0
loops=3) Buffers: local hit=4 -> Seq Scan on tb (actual rows=0 loops=3)
Filter: (ta.id = id) Rows Removed by Filter: 3 Buffers: local hit=3 ->
Seq Scan on tc (actual rows=1 loops=1) Buffers: local hit=1 Planning:
Buffers: shared hit=16 read=9 (15 rows) |

|I have written a patch to add this functionality and now it gives an
query plan: |

|alena@postgres=# EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
SELECT *
   FROM ta
WHERE EXISTS (SELECT *
                  FROM tb JOIN tc
                  ON ta.id = tb.id);
                     QUERY PLAN
-------------------------------------------------------------------------
Nested Loop Semi Join (actual rows=1 loops=1)
   Buffers: local hit=6
   -> Seq Scan on ta (actual rows=3 loops=1)
         Buffers: local hit=1
   -> Nested Loop (actual rows=0 loops=3)
         Buffers: local hit=5
         -> Index Only Scan using tb_pkey on tb (actual rows=0 loops=3)
               Index Cond: (id = ta.id)
               Heap Fetches: 1
               Buffers: local hit=4
         -> Seq Scan on tc (actual rows=1 loops=1)
               Buffers: local hit=1
(12 rows)|

tb and tc form a Cartesian product, but in the case of the intersection
condition with tuples from the table ta (ta.id = tb.id). So, according
to the join condition, tb intersects only with 1, and only it gets into
the result, but at the same time they appear twice - this is because of
the Cartesian product of tb with tc

|*How it works:*
|

I rewrote the code a bit so that it considers not only the quals in
jointree->quals, but also those in join expression
(subselect->jointree->fromlist). If they satisfy the conditions for
using pull up, I add them to the list of clauses and form a "Bool"
expression from them, joined by an "AND" operation.

Regards, Alena Rybakina Postgres Professional

Attachments:

0001-Add-EXISTS-pull-up-if-subquery-join-expressions.patchtext/x-patch; charset=UTF-8; name=0001-Add-EXISTS-pull-up-if-subquery-join-expressions.patchDownload

From 3b3d761cd4d67e299cdbd3c2e6cf5256f27da5eb Mon Sep 17 00:00:00 2001
From: Alena Rybakina <a.rybakina@postgrespro.ru>
Date: Tue, 24 Dec 2024 07:29:42 +0300
Subject: [PATCH] Add EXISTS pull up if subquery join expressions  are
 independent and have a reference only to the outer part of the subquery  or
 they are constant. We should transform expression onto subselect query  with
 using on the referenced table. We need only to know that the table is  not
 empty.

This query:
SELECT *
   FROM ta
  WHERE EXISTS (SELECT *
                  FROM tb
                   left outer JOIN tc
                    ON ta.id = tb.id);
can be transformed to:
select * from ta where exists (select * from tb limit 1);
---
 src/backend/optimizer/plan/subselect.c  |  94 ++++++++++++++-
 src/test/regress/expected/subselect.out | 151 ++++++++++++++++++++++++
 src/test/regress/sql/subselect.sql      |  65 ++++++++++
 3 files changed, 305 insertions(+), 5 deletions(-)

diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index ed62e3a0fcf..9b2b6addfaf 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -1376,6 +1376,18 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	int			varno;
 	Relids		clause_varnos;
 	Relids		upper_varnos;
+	ListCell *lc;
+	List *clauses = NIL;
+	List *all_clauses = NIL;
+	int first_elem = true;
+	Const *const_var = makeConst(BOOLOID,
+									-1,
+									InvalidOid,
+									sizeof(bool),
+									(Datum) 1,
+									false,
+									true);
+
 
 	Assert(sublink->subLinkType == EXISTS_SUBLINK);
 
@@ -1403,14 +1415,86 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	 * with noplace to evaluate the targetlist.
 	 */
 	if (!simplify_EXISTS_query(root, subselect))
-		return NULL;
+	{
+			return NULL;
+	}
+
+	if (subselect->jointree->quals)
+		all_clauses = lappend(all_clauses, subselect->jointree->quals);
+
+	subselect->jointree->quals = NULL;
+
+	/* Gather all clauses in main list for the further consideration */
+	all_clauses = list_concat(all_clauses, subselect->jointree->fromlist);
 
 	/*
-	 * Separate out the WHERE clause.  (We could theoretically also remove
-	 * top-level plain JOIN/ON clauses, but it's probably not worth the
-	 * trouble.)
+	 * We will able to remove top-level plain JOIN/ON clauses if they are not outer join.
 	 */
-	whereClause = subselect->jointree->quals;
+	foreach (lc, all_clauses)
+	{
+		Node *je = ((Node *) lfirst(lc));
+
+		whereClause = je;
+
+		if (IsA(je, RangeTblRef))
+		{
+			goto end;
+		}
+
+		if ((IsA(je, JoinExpr) && ((JoinExpr *)je)->jointype != JOIN_INNER))
+		{
+			goto end;
+		}
+
+		if (IsA(je, JoinExpr) && ((JoinExpr *)je)->quals != NULL)
+			whereClause = ((JoinExpr *)je)->quals;
+
+		/*
+		* On the other hand, the WHERE clause must contain some Vars of the
+		* parent query, else it's not gonna be a join.
+		*/
+		if (!contain_vars_of_level(whereClause, 1))
+		{
+			goto end;
+		}
+
+		/*
+		* We don't risk optimizing if the WHERE clause is volatile, either.
+		*/
+		if (contain_volatile_functions(whereClause))
+		{
+			goto end;
+		}
+
+		/*
+		 * In case of a successful attempt, replaces it with the correct condition
+		 */
+		if (IsA(je, JoinExpr))
+			((JoinExpr *)je)->quals = (Node *) const_var;
+
+		clauses = lappend(clauses, whereClause);
+
+		first_elem = false;
+		subselect->jointree->fromlist = list_delete_ptr(subselect->jointree->fromlist, lc);
+
+		end:
+			if (first_elem)
+				return NULL;
+			continue;
+	}
+
+	list_free(all_clauses);
+
+	/* We don't have any cluses for pull-up creation */
+	if (clauses == NIL)
+		return NULL;
+	else
+		/* We can easily combine clauses through AND operator because they are independent */
+		whereClause = list_length(clauses) > 1 ?
+							(Node *) makeBoolExpr(AND_EXPR, clauses, -1) :
+							(Node *) linitial(clauses);
+
+
 	subselect->jointree->quals = NULL;
 
 	/*
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index ebc545e2461..6806fa9bb06 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -812,6 +812,157 @@ where exists (
       from text_tbl ) ss
   where road.name = ss.f1 );
 rollback;
+-- Test case for exist sublink where we can consider some undependent expression
+-- with outer link
+--
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tb.id);
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Nested Loop Semi Join (actual rows=2 loops=1)
+   ->  Seq Scan on ta (actual rows=2 loops=1)
+   ->  Nested Loop (actual rows=1 loops=2)
+         ->  Index Only Scan using tb_pkey on tb (actual rows=1 loops=2)
+               Index Cond: (id = ta.id)
+               Heap Fetches: 2
+         ->  Seq Scan on tc (actual rows=1 loops=2)
+(7 rows)
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id);
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Nested Loop Semi Join (actual rows=2 loops=1)
+   ->  Seq Scan on ta (actual rows=2 loops=1)
+   ->  Nested Loop (actual rows=1 loops=2)
+         ->  Index Only Scan using tc_pkey on tc (actual rows=1 loops=2)
+               Index Cond: (id = ta.id)
+               Heap Fetches: 2
+         ->  Seq Scan on tb (actual rows=1 loops=2)
+(7 rows)
+
+-- Join compound expression
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = tb.id);
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Hash Right Semi Join (actual rows=2 loops=1)
+   Hash Cond: (tc.id = ta.id)
+   ->  Hash Join (actual rows=2 loops=1)
+         Hash Cond: (tb.id = tc.id)
+         ->  Seq Scan on tb (actual rows=4 loops=1)
+         ->  Hash (actual rows=2 loops=1)
+               Buckets: 4096  Batches: 1  Memory Usage: 33kB
+               ->  Seq Scan on tc (actual rows=2 loops=1)
+   ->  Hash (actual rows=2 loops=1)
+         Buckets: 4096  Batches: 1  Memory Usage: 33kB
+         ->  Seq Scan on ta (actual rows=2 loops=1)
+(11 rows)
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = tb.id);
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Hash Right Semi Join (actual rows=2 loops=1)
+   Hash Cond: (tc.id = ta.id)
+   ->  Hash Join (actual rows=2 loops=1)
+         Hash Cond: (tb.id = tc.id)
+         ->  Seq Scan on tb (actual rows=4 loops=1)
+         ->  Hash (actual rows=2 loops=1)
+               Buckets: 4096  Batches: 1  Memory Usage: 33kB
+               ->  Seq Scan on tc (actual rows=2 loops=1)
+   ->  Hash (actual rows=2 loops=1)
+         Buckets: 4096  Batches: 1  Memory Usage: 33kB
+         ->  Seq Scan on ta (actual rows=2 loops=1)
+(11 rows)
+
+-- Compound expression with const type
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = 1);
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Nested Loop Semi Join (actual rows=1 loops=1)
+   ->  Index Only Scan using ta_pkey on ta (actual rows=1 loops=1)
+         Index Cond: (id = 1)
+         Heap Fetches: 1
+   ->  Nested Loop (actual rows=1 loops=1)
+         ->  Index Only Scan using tc_pkey on tc (actual rows=1 loops=1)
+               Index Cond: (id = 1)
+               Heap Fetches: 1
+         ->  Seq Scan on tb (actual rows=1 loops=1)
+(9 rows)
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON tb.id = 1 and
+                       ta.id = 1);
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Nested Loop Semi Join (actual rows=1 loops=1)
+   ->  Index Only Scan using ta_pkey on ta (actual rows=1 loops=1)
+         Index Cond: (id = 1)
+         Heap Fetches: 1
+   ->  Nested Loop (actual rows=1 loops=1)
+         ->  Index Only Scan using tb_pkey on tb (actual rows=1 loops=1)
+               Index Cond: (id = 1)
+               Heap Fetches: 1
+         ->  Seq Scan on tc (actual rows=1 loops=1)
+(9 rows)
+
+-- Disabled pull up because it is applcapable for INNER JOIN connection
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  RIGHT JOIN tc
+                    ON ta.id = tc.id);
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Seq Scan on ta (actual rows=2 loops=1)
+   Filter: EXISTS(SubPlan 1)
+   SubPlan 1
+     ->  Nested Loop Left Join (actual rows=1 loops=2)
+           Join Filter: (ta.id = tc.id)
+           Rows Removed by Join Filter: 2
+           ->  Seq Scan on tc (actual rows=1 loops=2)
+           ->  Materialize (actual rows=2 loops=2)
+                 Storage: Memory  Maximum Storage: 17kB
+                 ->  Seq Scan on tb (actual rows=4 loops=1)
+(10 rows)
+
 --
 -- Test case for sublinks pushed down into subselects via join alias expansion
 --
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql
index 6ed3636a9e4..f73eb65aaa0 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -439,6 +439,71 @@ where exists (
 
 rollback;
 
+-- Test case for exist sublink where we can consider some undependent expression
+-- with outer link
+--
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tb.id);
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id);
+
+-- Join compound expression
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = tb.id);
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = tb.id);
+
+-- Compound expression with const type
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = 1);
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON tb.id = 1 and
+                       ta.id = 1);
+-- Disabled pull up because it is applcapable for INNER JOIN connection
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  RIGHT JOIN tc
+                    ON ta.id = tc.id);
+
 --
 -- Test case for sublinks pushed down into subselects via join alias expansion
 --
-- 
2.34.1

Ranier Vilela

ranier.vf@gmail.com

about 1 year ago

In reply to: Alena Rybakina (#1)

1 attachment(s)

Re: Exists pull-up application with JoinExpr

Hi Alena.

Em ter., 24 de dez. de 2024 às 01:44, Alena Rybakina <
a.rybakina@postgrespro.ru> escreveu:

Hi, hackers!

I found one pull-up that works if the inner join condition is written
through the where condition,

create temp table ta (id int primary key, val int);
insert into ta values(1,1);
insert into ta values(2,2);insert into ta values(3,3);
create temp table tb (id int primary key, aval int);
insert into tb values(4,1);
insert into tb values(5,1);
insert into tb values(1,2);

create temp table tc (id int primary key, aid int);
insert into tc values(6,1);
insert into tc values(7,2);

EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
SELECT *
FROM ta
WHERE EXISTS (SELECT *
FROM tb, tc
WHERE ta.id = tb.id);
QUERY PLAN
-------------------------------------------------------------------------
Nested Loop Semi Join (actual rows=1 loops=1)
Buffers: local hit=6
-> Seq Scan on ta (actual rows=3 loops=1)
Buffers: local hit=1
-> Nested Loop (actual rows=0 loops=3)
Buffers: local hit=5
-> Index Only Scan using tb_pkey on tb (actual rows=0 loops=3)
Index Cond: (id = ta.id)
Heap Fetches: 1
Buffers: local hit=4
-> Seq Scan on tc (actual rows=1 loops=1)
Buffers: local hit=1
Planning:
Buffers: shared hit=67 read=12
(14 rows)

but it doesn't work if it is written through the outside condition.

alena@postgres=# EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
SELECT *
FROM ta
WHERE EXISTS (SELECT *
FROM tb JOIN tc
ON ta.id = tb.id);
QUERY PLAN
------------------------------------------------------
Seq Scan on ta (actual rows=1 loops=1)
Filter: EXISTS(SubPlan 1)
Rows Removed by Filter: 2
Buffers: local hit=5
SubPlan 1
-> Nested Loop (actual rows=0 loops=3)
Buffers: local hit=4
-> Seq Scan on tb (actual rows=0 loops=3)
Filter: (ta.id = id)
Rows Removed by Filter: 3
Buffers: local hit=3
-> Seq Scan on tc (actual rows=1 loops=1)
Buffers: local hit=1
Planning:
Buffers: shared hit=16 read=9
(15 rows)

I have written a patch to add this functionality and now it gives an query
plan:

alena@postgres=# EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
SELECT *
FROM ta
WHERE EXISTS (SELECT *
FROM tb JOIN tc
ON ta.id = tb.id);
QUERY PLAN
-------------------------------------------------------------------------
Nested Loop Semi Join (actual rows=1 loops=1)
Buffers: local hit=6
-> Seq Scan on ta (actual rows=3 loops=1)
Buffers: local hit=1
-> Nested Loop (actual rows=0 loops=3)
Buffers: local hit=5
-> Index Only Scan using tb_pkey on tb (actual rows=0 loops=3)
Index Cond: (id = ta.id)
Heap Fetches: 1
Buffers: local hit=4
-> Seq Scan on tc (actual rows=1 loops=1)
Buffers: local hit=1
(12 rows)

tb and tc form a Cartesian product, but in the case of the intersection
condition with tuples from the table ta (ta.id = tb.id). So, according to
the join condition, tb intersects only with 1, and only it gets into the
result, but at the same time they appear twice - this is because of the
Cartesian product of tb with tc
*How it works:*

I rewrote the code a bit so that it considers not only the quals in
jointree->quals, but also those in join expression
(subselect->jointree->fromlist). If they satisfy the conditions for using
pull up, I add them to the list of clauses and form a "Bool" expression
from them, joined by an "AND" operation.

I took a look at this patch and I did a little polishing on it.

And I believe that in testing, you need to set it to BUFFERS OFF,
because of the recent change made to ANALYZE.

The tests are failing, like this:
QUERY PLAN
 -------------------------------------------------------------------------
 Nested Loop Semi Join (actual rows=2 loops=1)
+ Buffers: local hit=7
 -> Seq Scan on ta (actual rows=2 loops=1)
+ Buffers: local hit=1
 -> Nested Loop (actual rows=1 loops=2)
+ Buffers: local hit=6
 -> Index Only Scan using tb_pkey on tb (actual rows=1 loops=2)
 Index Cond: (id = ta.id)
 Heap Fetches: 2
+ Buffers: local hit=4
 -> Seq Scan on tc (actual rows=1 loops=2)
-(7 rows)
+ Buffers: local hit=2
+(12 rows)

best regards,
Ranier Vilela

Attachments:

v1-0001-Add-EXISTS-pull-up-if-subquery-join-expressions.patchapplication/octet-stream; name=v1-0001-Add-EXISTS-pull-up-if-subquery-join-expressions.patchDownload

diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index ed62e3a0fc..f5042b6952 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -1376,6 +1376,11 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	int			varno;
 	Relids		clause_varnos;
 	Relids		upper_varnos;
+	ListCell	*lc;
+	List		*clauses;
+	List		*all_clauses = NIL;
+	Const		*const_var;
+	bool		first_elem;
 
 	Assert(sublink->subLinkType == EXISTS_SUBLINK);
 
@@ -1405,12 +1410,80 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	if (!simplify_EXISTS_query(root, subselect))
 		return NULL;
 
+	if (subselect->jointree->quals)
+		all_clauses = lappend(all_clauses, subselect->jointree->quals);
+
+	subselect->jointree->quals = NULL;
+
+	/* Gather all clauses in main list for the further consideration */
+	all_clauses = list_concat(all_clauses, subselect->jointree->fromlist);
+
 	/*
-	 * Separate out the WHERE clause.  (We could theoretically also remove
-	 * top-level plain JOIN/ON clauses, but it's probably not worth the
-	 * trouble.)
+	 * We will able to remove top-level plain JOIN/ON clauses if they are not outer join.
 	 */
-	whereClause = subselect->jointree->quals;
+	clauses = NIL;
+	first_elem = true;
+	const_var = makeConst(BOOLOID,
+								-1,
+								InvalidOid,
+								sizeof(bool),
+								(Datum) 1,
+								false,
+								true);
+	foreach (lc, all_clauses)
+	{
+		Node *je = ((Node *) lfirst(lc));
+
+		whereClause = je;
+		if (IsA(je, RangeTblRef))
+			goto end;
+
+		if ((IsA(je, JoinExpr) && ((JoinExpr *)je)->jointype != JOIN_INNER))
+			goto end;
+
+		if (IsA(je, JoinExpr) && ((JoinExpr *)je)->quals != NULL)
+			whereClause = ((JoinExpr *)je)->quals;
+
+		/*
+		 * On the other hand, the WHERE clause must contain some Vars of the
+		 * parent query, else it's not gonna be a join.
+		 */
+		if (!contain_vars_of_level(whereClause, 1))
+			goto end;
+
+		/*
+		 * We don't risk optimizing if the WHERE clause is volatile, either.
+		 */
+		if (contain_volatile_functions(whereClause))
+			goto end;
+
+		/*
+		 * In case of a successful attempt, replaces it with the correct condition
+		 */
+		if (IsA(je, JoinExpr))
+			((JoinExpr *)je)->quals = (Node *) const_var;
+
+		clauses = lappend(clauses, whereClause);
+
+		first_elem = false;
+		subselect->jointree->fromlist = list_delete_ptr(subselect->jointree->fromlist, lc);
+
+		end:
+			if (first_elem)
+				return NULL;
+	}
+
+	list_free(all_clauses);
+
+	/* We don't have any uses for pull-up creation */
+	if (clauses == NIL)
+		return NULL;
+	else
+		/* We can easily combine clauses through AND operator because they are independent */
+		whereClause = list_length(clauses) > 1 ?
+							(Node *) makeBoolExpr(AND_EXPR, clauses, -1) :
+							(Node *) linitial(clauses);
+
 	subselect->jointree->quals = NULL;
 
 	/*
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index ebc545e246..ab0d716a70 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -812,6 +812,157 @@ where exists (
       from text_tbl ) ss
   where road.name = ss.f1 );
 rollback;
+-- Test case for exist sublink where we can consider some undependent expression
+-- with outer link
+--
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tb.id);
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Nested Loop Semi Join (actual rows=2 loops=1)
+   ->  Seq Scan on ta (actual rows=2 loops=1)
+   ->  Nested Loop (actual rows=1 loops=2)
+         ->  Index Only Scan using tb_pkey on tb (actual rows=1 loops=2)
+               Index Cond: (id = ta.id)
+               Heap Fetches: 2
+         ->  Seq Scan on tc (actual rows=1 loops=2)
+(7 rows)
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id);
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Nested Loop Semi Join (actual rows=2 loops=1)
+   ->  Seq Scan on ta (actual rows=2 loops=1)
+   ->  Nested Loop (actual rows=1 loops=2)
+         ->  Index Only Scan using tc_pkey on tc (actual rows=1 loops=2)
+               Index Cond: (id = ta.id)
+               Heap Fetches: 2
+         ->  Seq Scan on tb (actual rows=1 loops=2)
+(7 rows)
+
+-- Join compound expression
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = tb.id);
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Hash Right Semi Join (actual rows=2 loops=1)
+   Hash Cond: (tc.id = ta.id)
+   ->  Hash Join (actual rows=2 loops=1)
+         Hash Cond: (tb.id = tc.id)
+         ->  Seq Scan on tb (actual rows=4 loops=1)
+         ->  Hash (actual rows=2 loops=1)
+               Buckets: 4096  Batches: 1  Memory Usage: 33kB
+               ->  Seq Scan on tc (actual rows=2 loops=1)
+   ->  Hash (actual rows=2 loops=1)
+         Buckets: 4096  Batches: 1  Memory Usage: 33kB
+         ->  Seq Scan on ta (actual rows=2 loops=1)
+(11 rows)
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = tb.id);
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Hash Right Semi Join (actual rows=2 loops=1)
+   Hash Cond: (tc.id = ta.id)
+   ->  Hash Join (actual rows=2 loops=1)
+         Hash Cond: (tb.id = tc.id)
+         ->  Seq Scan on tb (actual rows=4 loops=1)
+         ->  Hash (actual rows=2 loops=1)
+               Buckets: 4096  Batches: 1  Memory Usage: 33kB
+               ->  Seq Scan on tc (actual rows=2 loops=1)
+   ->  Hash (actual rows=2 loops=1)
+         Buckets: 4096  Batches: 1  Memory Usage: 33kB
+         ->  Seq Scan on ta (actual rows=2 loops=1)
+(11 rows)
+
+-- Compound expression with const type
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = 1);
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Nested Loop Semi Join (actual rows=1 loops=1)
+   ->  Index Only Scan using ta_pkey on ta (actual rows=1 loops=1)
+         Index Cond: (id = 1)
+         Heap Fetches: 1
+   ->  Nested Loop (actual rows=1 loops=1)
+         ->  Index Only Scan using tc_pkey on tc (actual rows=1 loops=1)
+               Index Cond: (id = 1)
+               Heap Fetches: 1
+         ->  Seq Scan on tb (actual rows=1 loops=1)
+(9 rows)
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON tb.id = 1 and
+                       ta.id = 1);
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Nested Loop Semi Join (actual rows=1 loops=1)
+   ->  Index Only Scan using ta_pkey on ta (actual rows=1 loops=1)
+         Index Cond: (id = 1)
+         Heap Fetches: 1
+   ->  Nested Loop (actual rows=1 loops=1)
+         ->  Index Only Scan using tb_pkey on tb (actual rows=1 loops=1)
+               Index Cond: (id = 1)
+               Heap Fetches: 1
+         ->  Seq Scan on tc (actual rows=1 loops=1)
+(9 rows)
+
+-- Disabled pull up because it is applcapable for INNER JOIN connection
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  RIGHT JOIN tc
+                    ON ta.id = tc.id);
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Seq Scan on ta (actual rows=2 loops=1)
+   Filter: EXISTS(SubPlan 1)
+   SubPlan 1
+     ->  Nested Loop Left Join (actual rows=1 loops=2)
+           Join Filter: (ta.id = tc.id)
+           Rows Removed by Join Filter: 2
+           ->  Seq Scan on tc (actual rows=1 loops=2)
+           ->  Materialize (actual rows=2 loops=2)
+                 Storage: Memory  Maximum Storage: 17kB
+                 ->  Seq Scan on tb (actual rows=4 loops=1)
+(10 rows)
+
 --
 -- Test case for sublinks pushed down into subselects via join alias expansion
 --
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql
index 6ed3636a9e..bd842d627c 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -439,6 +439,71 @@ where exists (
 
 rollback;
 
+-- Test case for exist sublink where we can consider some undependent expression
+-- with outer link
+--
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tb.id);
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id);
+
+-- Join compound expression
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = tb.id);
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = tb.id);
+
+-- Compound expression with const type
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = 1);
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON tb.id = 1 and
+                       ta.id = 1);
+-- Disabled pull up because it is applcapable for INNER JOIN connection
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  RIGHT JOIN tc
+                    ON ta.id = tc.id);
+
 --
 -- Test case for sublinks pushed down into subselects via join alias expansion
 --

Alena Rybakina

a.rybakina@postgrespro.ru

about 1 year ago

In reply to: Ranier Vilela (#2)

Re: Exists pull-up application with JoinExpr

On 24.12.2024 13:25, Ranier Vilela wrote:

Hi Alena.

Em ter., 24 de dez. de 2024 às 01:44, Alena Rybakina
<a.rybakina@postgrespro.ru> escreveu:

Hi, hackers!

I found one pull-up that works if the inner join condition is
written through the where condition,

|create temp table ta (id int primary key, val int); insert into
ta values(1,1); insert into ta values(2,2); ||insert into ta values(3,3);|

|create temp table tb (id int primary key, aval int); insert into
tb values(4,1); insert into tb values(5,1); insert into tb
values(1,2); create temp table tc (id int primary key, aid int);
insert into tc values(6,1); insert into tc values(7,2);|

|EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT *
FROM ta WHERE EXISTS (SELECT * FROM tb, tc WHERE ta.id
<http://ta.id> = tb.id <http://tb.id>);|
QUERY PLAN
-------------------------------------------------------------------------
Nested Loop Semi Join (actual rows=1 loops=1)
Buffers: local hit=6
-> Seq Scan on ta (actual rows=3 loops=1)
Buffers: local hit=1
-> Nested Loop (actual rows=0 loops=3)
Buffers: local hit=5
-> Index Only Scan using tb_pkey on tb (actual rows=0 loops=3)
Index Cond: (id =ta.id <http://ta.id>)
Heap Fetches: 1
Buffers: local hit=4
-> Seq Scan on tc (actual rows=1 loops=1)
Buffers: local hit=1
Planning:
Buffers: shared hit=67 read=12
(14 rows)

but it doesn't work if it is written through the outside condition.

|alena@postgres=# EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING
OFF) SELECT * FROM ta WHERE EXISTS (SELECT * FROM tb JOIN tc ON
ta.id <http://ta.id> = tb.id <http://tb.id>); QUERY PLAN
------------------------------------------------------ Seq Scan on
ta (actual rows=1 loops=1) Filter: EXISTS(SubPlan 1) Rows Removed
by Filter: 2 Buffers: local hit=5 SubPlan 1 -> Nested Loop (actual
rows=0 loops=3) Buffers: local hit=4 -> Seq Scan on tb (actual
rows=0 loops=3) Filter: (ta.id <http://ta.id> = id) Rows Removed
by Filter: 3 Buffers: local hit=3 -> Seq Scan on tc (actual rows=1
loops=1) Buffers: local hit=1 Planning: Buffers: shared hit=16
read=9 (15 rows) |

|I have written a patch to add this functionality and now it gives
an query plan: |

|alena@postgres=# EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING
OFF)
SELECT *
   FROM ta
WHERE EXISTS (SELECT *
                  FROM tb JOIN tc
                  ON ta.id <http://ta.id> = tb.id <http://tb.id>);
                     QUERY PLAN
-------------------------------------------------------------------------
Nested Loop Semi Join (actual rows=1 loops=1)
   Buffers: local hit=6
   -> Seq Scan on ta (actual rows=3 loops=1)
         Buffers: local hit=1
   -> Nested Loop (actual rows=0 loops=3)
         Buffers: local hit=5
         -> Index Only Scan using tb_pkey on tb (actual rows=0
loops=3)
               Index Cond: (id = ta.id <http://ta.id>)
               Heap Fetches: 1
               Buffers: local hit=4
         -> Seq Scan on tc (actual rows=1 loops=1)
               Buffers: local hit=1
(12 rows)|

tb and tc form a Cartesian product, but in the case of the
intersection condition with tuples from the table ta (ta.id
<http://ta.id> = tb.id <http://tb.id>). So, according to the join
condition, tb intersects only with 1, and only it gets into the
result, but at the same time they appear twice - this is because
of the Cartesian product of tb with tc

|*How it works:*
|

I rewrote the code a bit so that it considers not only the quals
in jointree->quals, but also those in join expression
(subselect->jointree->fromlist). If they satisfy the conditions
for using pull up, I add them to the list of clauses and form a
"Bool" expression from them, joined by an "AND" operation.

I took a look at this patch and I did a little polishing on it.

And I believe that in testing, you need to set it to BUFFERS OFF,
because of the recent change made to ANALYZE.

The tests are failing, like this:
QUERY PLAN
-------------------------------------------------------------------------
Nested Loop Semi Join (actual rows=2 loops=1)
+ Buffers: local hit=7
-> Seq Scan on ta (actual rows=2 loops=1)
+ Buffers: local hit=1
-> Nested Loop (actual rows=1 loops=2)
+ Buffers: local hit=6
-> Index Only Scan using tb_pkey on tb (actual rows=1 loops=2)
Index Cond: (id = ta.id <http://ta.id>)
Heap Fetches: 2
+ Buffers: local hit=4
-> Seq Scan on tc (actual rows=1 loops=2)
-(7 rows)
+ Buffers: local hit=2
+(12 rows)

Yes, you are right) Thank you for your interest to this thread)

--
Regards,
Alena Rybakina
Postgres Professional

Ilia Evdokimov

ilya.evdokimov@tantorlabs.com

about 1 year ago

In reply to: Alena Rybakina (#3)

Re: Exists pull-up application with JoinExpr

Hi Alena,

Thank you for your work on subqueries with JOIN.

Have you considered the scenario where in subquery includes a qual like
(tc.aid = 1)? When I tried executing those queries I receive different
results. In my opinion, to prevent this, we should add filters for such
quals within the loop 'foreach (lc, all_clauses)'

EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM ta
WHERE EXISTS (SELECT * FROM tb, tc WHERE ta.id = tb.id AND tc.aid = 1);
                              QUERY PLAN
----------------------------------------------------------------------
Hash Join (actual rows=1 loops=1)
   Hash Cond: (ta.id = tb.id)
   Buffers: local hit=3
   -> Seq Scan on ta (actual rows=3 loops=1)
         Buffers: local hit=1
   -> Hash (actual rows=3 loops=1)
         Buckets: 4096 Batches: 1 Memory Usage: 33kB
         Buffers: local hit=2
         -> HashAggregate (actual rows=3 loops=1)
               Group Key: tb.id
               Batches: 1 Memory Usage: 121kB
               Buffers: local hit=2
               -> Nested Loop (actual rows=3 loops=1)
                     Buffers: local hit=2
                     -> Seq Scan on tb (actual rows=3 loops=1)
                           Buffers: local hit=1
                     -> Materialize (actual rows=1 loops=3)
                           Storage: Memory Maximum Storage: 17kB
                           Buffers: local hit=1
                           -> Seq Scan on tc (actual rows=1 loops=1)
                                 Filter: (aid = 1)
                                 Rows Removed by Filter: 1
                                 Buffers: local hit=1
(23 rows)

============================

EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
SELECT * FROM ta WHERE EXISTS (SELECT * FROM tb JOIN tc ON ta.id =
tb.id WHERE tc.aid = 1);
                                QUERY PLAN
---------------------------------------------------------------------------
Seq Scan on ta (actual rows=1 loops=1)
   Filter: EXISTS(SubPlan 1)
   Rows Removed by Filter: 2
   Buffers: local hit=6
   SubPlan 1
     -> Nested Loop (actual rows=0 loops=3)
           Buffers: local hit=5
           -> Index Only Scan using tb_pkey on tb (actual rows=0 loops=3)
                 Index Cond: (id = ta.id)
                 Heap Fetches: 1
                 Buffers: local hit=4
           -> Seq Scan on tc (actual rows=1 loops=1)
                 Filter: (aid = 1)
                 Buffers: local hit=1
(14 rows)

--
Best regards,
Ilia Evdokimov,
Tantor Labs LLC.

Alena Rybakina

a.rybakina@postgrespro.ru

about 1 year ago

In reply to: Ilia Evdokimov (#4)

Re: Exists pull-up application with JoinExpr

Hi! Thank you for your interest to this subject!

On 27.12.2024 15:53, Ilia Evdokimov wrote:

Hi Alena,

Thank you for your work on subqueries with JOIN.

Have you considered the scenario where in subquery includes a qual
like (tc.aid = 1)? When I tried executing those queries I receive
different results. In my opinion, to prevent this, we should add
filters for such quals within the loop 'foreach (lc, all_clauses)'

EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM ta
WHERE EXISTS (SELECT * FROM tb, tc WHERE ta.id = tb.id AND tc.aid = 1);
                              QUERY PLAN
----------------------------------------------------------------------
Hash Join (actual rows=1 loops=1)
   Hash Cond: (ta.id = tb.id)
   Buffers: local hit=3
   -> Seq Scan on ta (actual rows=3 loops=1)
         Buffers: local hit=1
   -> Hash (actual rows=3 loops=1)
         Buckets: 4096 Batches: 1 Memory Usage: 33kB
         Buffers: local hit=2
         -> HashAggregate (actual rows=3 loops=1)
               Group Key: tb.id
               Batches: 1 Memory Usage: 121kB
               Buffers: local hit=2
               -> Nested Loop (actual rows=3 loops=1)
                     Buffers: local hit=2
                     -> Seq Scan on tb (actual rows=3 loops=1)
                           Buffers: local hit=1
                     -> Materialize (actual rows=1 loops=3)
                           Storage: Memory Maximum Storage: 17kB
                           Buffers: local hit=1
                           -> Seq Scan on tc (actual rows=1 loops=1)
                                 Filter: (aid = 1)
                                 Rows Removed by Filter: 1
                                 Buffers: local hit=1
(23 rows)

============================

EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
SELECT * FROM ta WHERE EXISTS (SELECT * FROM tb JOIN tc ON ta.id =
tb.id WHERE tc.aid = 1);
                                QUERY PLAN
---------------------------------------------------------------------------

Seq Scan on ta (actual rows=1 loops=1)
   Filter: EXISTS(SubPlan 1)
   Rows Removed by Filter: 2
   Buffers: local hit=6
   SubPlan 1
     -> Nested Loop (actual rows=0 loops=3)
           Buffers: local hit=5
           -> Index Only Scan using tb_pkey on tb (actual rows=0
loops=3)
                 Index Cond: (id = ta.id)
                 Heap Fetches: 1
                 Buffers: local hit=4
           -> Seq Scan on tc (actual rows=1 loops=1)
                 Filter: (aid = 1)
                 Buffers: local hit=1
(14 rows)

You are right, at the moment the code is not processed if there is a
constant qual in the subquery (like t1.x1=1 in the example below) and
this problem is not only related to the current patch.

For example you can get such a query plan if you complete this request
to the master:

create table t (xint);
create table t1 (x1int);
create table t2 (x2int);
EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
SELECT 1
FROM t
WHERE EXISTS (SELECT 1
FROM t1
where t1.x1 = 1);
QUERY PLAN
------------------------------------------------
Result (actual rows=0 loops=1)
One-Time Filter: (InitPlan 1).col1
InitPlan 1
-> Seq Scan on t1 (actual rows=0 loops=1)
Filter: (x1 = 1)
-> Seq Scan on t (never executed)
(6 rows)

It's all because of the check in this function - this qual has levelsoup
= 0, not 1 (see (!contain_vars_of_level(whereClause, 1)), but I already
found out that by changing this, the logic of correction there is
required a little more complicated. At the moment, I'm working to add
this processing to the patch.

Thanks for the case!

--
Regards,
Alena Rybakina
Postgres Professional

Alena Rybakina

a.rybakina@postgrespro.ru

12 months ago

In reply to: Alena Rybakina (#5)

1 attachment(s)

Re: Exists pull-up application with JoinExpr

Hi! I have solved it.

On 30.12.2024 11:24, Alena Rybakina wrote:

Hi! Thank you for your interest to this subject!

On 27.12.2024 15:53, Ilia Evdokimov wrote:

Hi Alena,

Thank you for your work on subqueries with JOIN.

Have you considered the scenario where in subquery includes a qual
like (tc.aid = 1)? When I tried executing those queries I receive
different results. In my opinion, to prevent this, we should add
filters for such quals within the loop 'foreach (lc, all_clauses)'

EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM ta
WHERE EXISTS (SELECT * FROM tb, tc WHERE ta.id = tb.id AND tc.aid = 1);
                              QUERY PLAN
----------------------------------------------------------------------
Hash Join (actual rows=1 loops=1)
   Hash Cond: (ta.id = tb.id)
   Buffers: local hit=3
   -> Seq Scan on ta (actual rows=3 loops=1)
         Buffers: local hit=1
   -> Hash (actual rows=3 loops=1)
         Buckets: 4096 Batches: 1 Memory Usage: 33kB
         Buffers: local hit=2
         -> HashAggregate (actual rows=3 loops=1)
               Group Key: tb.id
               Batches: 1 Memory Usage: 121kB
               Buffers: local hit=2
               -> Nested Loop (actual rows=3 loops=1)
                     Buffers: local hit=2
                     -> Seq Scan on tb (actual rows=3 loops=1)
                           Buffers: local hit=1
                     -> Materialize (actual rows=1 loops=3)
                           Storage: Memory Maximum Storage: 17kB
                           Buffers: local hit=1
                           -> Seq Scan on tc (actual rows=1 loops=1)
                                 Filter: (aid = 1)
                                 Rows Removed by Filter: 1
                                 Buffers: local hit=1
(23 rows)

============================

EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
SELECT * FROM ta WHERE EXISTS (SELECT * FROM tb JOIN tc ON ta.id =
tb.id WHERE tc.aid = 1);
                                QUERY PLAN
---------------------------------------------------------------------------

Seq Scan on ta (actual rows=1 loops=1)
   Filter: EXISTS(SubPlan 1)
   Rows Removed by Filter: 2
   Buffers: local hit=6
   SubPlan 1
     -> Nested Loop (actual rows=0 loops=3)
           Buffers: local hit=5
           -> Index Only Scan using tb_pkey on tb (actual rows=0
loops=3)
                 Index Cond: (id = ta.id)
                 Heap Fetches: 1
                 Buffers: local hit=4
           -> Seq Scan on tc (actual rows=1 loops=1)
                 Filter: (aid = 1)
                 Buffers: local hit=1
(14 rows)

You are right, at the moment the code is not processed if there is a
constant qual in the subquery (like t1.x1=1 in the example below) and
this problem is not only related to the current patch.

For example you can get such a query plan if you complete this request
to the master:

create table t (xint);
create table t1 (x1int);
create table t2 (x2int);
EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
SELECT 1
FROM t
WHERE EXISTS (SELECT 1
FROM t1
where t1.x1 = 1);
QUERY PLAN
------------------------------------------------
Result (actual rows=0 loops=1)
One-Time Filter: (InitPlan 1).col1
InitPlan 1
-> Seq Scan on t1 (actual rows=0 loops=1)
Filter: (x1 = 1)
-> Seq Scan on t (never executed)
(6 rows)

It's all because of the check in this function - this qual has
levelsoup = 0, not 1 (see (!contain_vars_of_level(whereClause, 1)),
but I already found out that by changing this, the logic of correction
there is required a little more complicated. At the moment, I'm
working to add this processing to the patch.

Thanks for the case!

The logic is the same, but extended to constants. I added a few more
tests that not only cover this case, but also NOT EXISTS, which will be
converted to ANTI JOIN.

--
Regards,
Alena Rybakina
Postgres Professional

Attachments:

v2-0001-Teach-the-planner-to-convert-EXISTS-and-NOT-EXISTS-s.patchtext/x-patch; charset=UTF-8; name=v2-0001-Teach-the-planner-to-convert-EXISTS-and-NOT-EXISTS-s.patchDownload

From b911333078fad71d4509adab1b0473828409b000 Mon Sep 17 00:00:00 2001
From: Alena Rybakina <a.rybakina@postgrespro.ru>
Date: Tue, 12 Nov 2024 12:16:42 +0300
Subject: [PATCH] Teach the planner to convert EXISTS and NOT EXISTS subqueries
 into semi and anti joins, respectively, if subquery's join expressions are
 independent and vars have the level no more than the parent. In addtion the
 transformation will be alowed if the expressions are constant. To do this, we
 put all potential expressions from the qual list and join list into the
 common list and check each expression one by one to see if they are suitable
 for transformation. In particular, we need to increment the level of
 expresions's vars to the parent query level. We condider expressions only for
 INNER JOIN type of join in subquery, otherwice the transformation is not
 available.
 
 Authors: Alena Rybakina <lena.ribackina@yandex.ru>
 Reviewed-by: Ranier Vilela <ranier.vf@gmail.com>, Ilia Evdokimov <ilya.evdokimov@tantorlabs.com>

---
 src/backend/optimizer/plan/subselect.c  | 135 ++++++++++----
 src/test/regress/expected/subselect.out | 232 ++++++++++++++++++++++++
 src/test/regress/sql/subselect.sql      |  99 ++++++++++
 3 files changed, 433 insertions(+), 33 deletions(-)

diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index eaaf8c1b49a..957d6cd36be 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -1376,6 +1376,18 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	int			varno;
 	Relids		clause_varnos;
 	Relids		upper_varnos;
+	ListCell *lc;
+	List *clauses = NIL;
+	List *all_clauses = NIL;
+	int first_elem = true;
+	Const *const_var = makeConst(BOOLOID,
+									-1,
+									InvalidOid,
+									sizeof(bool),
+									(Datum) 1,
+									false,
+									true);
+
 
 	Assert(sublink->subLinkType == EXISTS_SUBLINK);
 
@@ -1403,40 +1415,15 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	 * with noplace to evaluate the targetlist.
 	 */
 	if (!simplify_EXISTS_query(root, subselect))
-		return NULL;
+			return NULL;
 
-	/*
-	 * Separate out the WHERE clause.  (We could theoretically also remove
-	 * top-level plain JOIN/ON clauses, but it's probably not worth the
-	 * trouble.)
-	 */
-	whereClause = subselect->jointree->quals;
-	subselect->jointree->quals = NULL;
+	if (subselect->jointree->quals)
+		all_clauses = lappend(all_clauses, subselect->jointree->quals);
 
-	/*
-	 * The rest of the sub-select must not refer to any Vars of the parent
-	 * query.  (Vars of higher levels should be okay, though.)
-	 */
-	if (contain_vars_of_level((Node *) subselect, 1))
-		return NULL;
-
-	/*
-	 * On the other hand, the WHERE clause must contain some Vars of the
-	 * parent query, else it's not gonna be a join.
-	 */
-	if (!contain_vars_of_level(whereClause, 1))
-		return NULL;
-
-	/*
-	 * We don't risk optimizing if the WHERE clause is volatile, either.
-	 */
-	if (contain_volatile_functions(whereClause))
-		return NULL;
+	subselect->jointree->quals = NULL;
 
-	/*
-	 * The subquery must have a nonempty jointree, but we can make it so.
-	 */
-	replace_empty_jointree(subselect);
+	/* Gather all clauses in main list for the further consideration */
+	all_clauses = list_concat(all_clauses, subselect->jointree->fromlist);
 
 	/*
 	 * Prepare to pull up the sub-select into top range table.
@@ -1455,7 +1442,90 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	 */
 	rtoffset = list_length(parse->rtable);
 	OffsetVarNodes((Node *) subselect, rtoffset, 0);
-	OffsetVarNodes(whereClause, rtoffset, 0);
+
+	/*
+	 * We will able to remove top-level plain JOIN/ON clauses if they are not outer join.
+	 */
+	foreach (lc, all_clauses)
+	{
+		Node *je = ((Node *) lfirst(lc));
+		bool const_type_expr = false;
+
+		whereClause = je;
+
+		if (IsA(je, RangeTblRef))
+			goto end;
+
+		if ((IsA(je, JoinExpr) && ((JoinExpr *)je)->jointype != JOIN_INNER))
+			goto end;
+
+		if (IsA(je, JoinExpr) && ((JoinExpr *)je)->quals != NULL)
+			whereClause = ((JoinExpr *)je)->quals;
+
+		if (IsA(whereClause, OpExpr) &&
+		   (IsA(get_rightop(whereClause), Const) || IsA(get_leftop(whereClause), Const)))
+			const_type_expr = true;
+
+		/*
+		* On the other hand, the WHERE clause must contain some Vars of the
+		* parent query, else it's not gonna be a join.
+		*/
+		if (!const_type_expr && !contain_vars_of_level(whereClause, 1))
+			goto end;
+
+		/*
+		* We don't risk optimizing if the WHERE clause is volatile, either.
+		*/
+		if (contain_volatile_functions(whereClause))
+			goto end;
+
+		/*
+		 * In case of a successful attempt, replaces it with the correct condition
+		 */
+		if (IsA(je, JoinExpr))
+			((JoinExpr *)je)->quals = (Node *) const_var;
+
+		if(!const_type_expr || (const_type_expr && contain_vars_of_level(whereClause, 1)))
+		{
+			OffsetVarNodes(whereClause, rtoffset, 0);
+			IncrementVarSublevelsUp(whereClause, -1, 1);
+		}
+
+		clauses = lappend(clauses, whereClause);
+
+		first_elem = false;
+		subselect->jointree->fromlist = list_delete_ptr(subselect->jointree->fromlist, lc);
+
+		end:
+			if (first_elem)
+				return NULL;
+	}
+
+	list_free(all_clauses);
+
+	/* We don't have any cluses for pull-up creation */
+	if (clauses == NIL)
+		return NULL;
+	else
+		/* We can easily combine clauses through AND operator because they are independent */
+		whereClause = list_length(clauses) > 1 ?
+							(Node *) makeBoolExpr(AND_EXPR, clauses, -1) :
+							(Node *) linitial(clauses);
+
+
+	subselect->jointree->quals = NULL;
+
+	/*
+	 * The rest of the sub-select must not refer to any Vars of the parent
+	 * query.  (Vars of higher levels should be okay, though.)
+	 */
+	if (contain_vars_of_level((Node *) subselect, 1))
+		return NULL;
+
+	/*
+	 * The subquery must have a nonempty jointree, but we can make it so.
+	 */
+	replace_empty_jointree(subselect);
 
 	/*
 	 * Upper-level vars in subquery will now be one level closer to their
@@ -1463,7 +1533,6 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	 * becomes level zero.
 	 */
 	IncrementVarSublevelsUp((Node *) subselect, -1, 1);
-	IncrementVarSublevelsUp(whereClause, -1, 1);
 
 	/*
 	 * Now that the WHERE clause is adjusted to match the parent query
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index ebc545e2461..f6dbe99df90 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -812,6 +812,238 @@ where exists (
       from text_tbl ) ss
   where road.name = ss.f1 );
 rollback;
+-- Test case for exist sublink where we can consider some undependent expression
+-- with outer link
+--
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tb.id);
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Nested Loop Semi Join (actual rows=2 loops=1)
+   ->  Seq Scan on ta (actual rows=2 loops=1)
+   ->  Nested Loop (actual rows=1 loops=2)
+         ->  Index Only Scan using tc_pkey on tc (actual rows=1 loops=2)
+               Index Cond: (id = ta.id)
+               Heap Fetches: 2
+         ->  Seq Scan on tb (actual rows=1 loops=2)
+(7 rows)
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE NOT EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tb.id);
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Nested Loop Anti Join (actual rows=0 loops=1)
+   ->  Seq Scan on ta (actual rows=2 loops=1)
+   ->  Nested Loop (actual rows=1 loops=2)
+         ->  Index Only Scan using tc_pkey on tc (actual rows=1 loops=2)
+               Index Cond: (id = ta.id)
+               Heap Fetches: 2
+         ->  Seq Scan on tb (actual rows=1 loops=2)
+(7 rows)
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id);
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Nested Loop Semi Join (actual rows=2 loops=1)
+   ->  Seq Scan on ta (actual rows=2 loops=1)
+   ->  Nested Loop (actual rows=1 loops=2)
+         ->  Index Only Scan using tb_pkey on tb (actual rows=1 loops=2)
+               Index Cond: (id = ta.id)
+               Heap Fetches: 2
+         ->  Seq Scan on tc (actual rows=1 loops=2)
+(7 rows)
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE NOT EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tb.id);
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Nested Loop Anti Join (actual rows=0 loops=1)
+   ->  Seq Scan on ta (actual rows=2 loops=1)
+   ->  Nested Loop (actual rows=1 loops=2)
+         ->  Index Only Scan using tc_pkey on tc (actual rows=1 loops=2)
+               Index Cond: (id = ta.id)
+               Heap Fetches: 2
+         ->  Seq Scan on tb (actual rows=1 loops=2)
+(7 rows)
+
+-- Join compound expression
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = tb.id);
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Hash Right Semi Join (actual rows=2 loops=1)
+   Hash Cond: (tb.id = ta.id)
+   ->  Hash Join (actual rows=2 loops=1)
+         Hash Cond: (tb.id = tc.id)
+         ->  Seq Scan on tb (actual rows=4 loops=1)
+         ->  Hash (actual rows=2 loops=1)
+               Buckets: 4096  Batches: 1  Memory Usage: 33kB
+               ->  Seq Scan on tc (actual rows=2 loops=1)
+   ->  Hash (actual rows=2 loops=1)
+         Buckets: 4096  Batches: 1  Memory Usage: 33kB
+         ->  Seq Scan on ta (actual rows=2 loops=1)
+(11 rows)
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE NOT EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = tb.id);
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Nested Loop Anti Join (actual rows=0 loops=1)
+   ->  Seq Scan on ta (actual rows=2 loops=1)
+   ->  Nested Loop (actual rows=1 loops=2)
+         ->  Index Only Scan using tb_pkey on tb (actual rows=1 loops=2)
+               Index Cond: (id = ta.id)
+               Heap Fetches: 2
+         ->  Index Only Scan using tc_pkey on tc (actual rows=1 loops=2)
+               Index Cond: (id = ta.id)
+               Heap Fetches: 2
+(9 rows)
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = tb.id);
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Hash Right Semi Join (actual rows=2 loops=1)
+   Hash Cond: (tb.id = ta.id)
+   ->  Hash Join (actual rows=2 loops=1)
+         Hash Cond: (tb.id = tc.id)
+         ->  Seq Scan on tb (actual rows=4 loops=1)
+         ->  Hash (actual rows=2 loops=1)
+               Buckets: 4096  Batches: 1  Memory Usage: 33kB
+               ->  Seq Scan on tc (actual rows=2 loops=1)
+   ->  Hash (actual rows=2 loops=1)
+         Buckets: 4096  Batches: 1  Memory Usage: 33kB
+         ->  Seq Scan on ta (actual rows=2 loops=1)
+(11 rows)
+
+-- Compound expression with const type
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = 1);
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Nested Loop Semi Join (actual rows=1 loops=1)
+   ->  Index Only Scan using ta_pkey on ta (actual rows=1 loops=1)
+         Index Cond: (id = 1)
+         Heap Fetches: 1
+   ->  Nested Loop (actual rows=1 loops=1)
+         ->  Index Only Scan using tb_pkey on tb (actual rows=1 loops=1)
+               Index Cond: (id = 1)
+               Heap Fetches: 1
+         ->  Seq Scan on tc (actual rows=1 loops=1)
+(9 rows)
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE NOT EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = 1);
+                        QUERY PLAN                        
+----------------------------------------------------------
+ Hash Right Anti Join (actual rows=1 loops=1)
+   Hash Cond: (tb.id = ta.id)
+   Join Filter: (ta.id = 1)
+   Rows Removed by Join Filter: 2
+   ->  Nested Loop (actual rows=8 loops=1)
+         ->  Seq Scan on tb (actual rows=4 loops=1)
+         ->  Materialize (actual rows=2 loops=4)
+               Storage: Memory  Maximum Storage: 17kB
+               ->  Seq Scan on tc (actual rows=2 loops=1)
+   ->  Hash (actual rows=2 loops=1)
+         Buckets: 4096  Batches: 1  Memory Usage: 33kB
+         ->  Seq Scan on ta (actual rows=2 loops=1)
+(12 rows)
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON tb.id = 1 and
+                       ta.id = 1);
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Nested Loop Semi Join (actual rows=1 loops=1)
+   ->  Index Only Scan using ta_pkey on ta (actual rows=1 loops=1)
+         Index Cond: (id = 1)
+         Heap Fetches: 1
+   ->  Nested Loop (actual rows=1 loops=1)
+         ->  Index Only Scan using tc_pkey on tc (actual rows=1 loops=1)
+               Index Cond: (id = 1)
+               Heap Fetches: 1
+         ->  Seq Scan on tb (actual rows=1 loops=1)
+(9 rows)
+
+-- Disabled pull up because it is applcapable for INNER JOIN connection
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  RIGHT JOIN tc
+                    ON ta.id = tc.id);
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Seq Scan on ta (actual rows=2 loops=1)
+   Filter: EXISTS(SubPlan 1)
+   SubPlan 1
+     ->  Nested Loop Left Join (actual rows=1 loops=2)
+           Join Filter: (ta.id = tc.id)
+           Rows Removed by Join Filter: 2
+           ->  Seq Scan on tc (actual rows=1 loops=2)
+           ->  Materialize (actual rows=2 loops=2)
+                 Storage: Memory  Maximum Storage: 17kB
+                 ->  Seq Scan on tb (actual rows=4 loops=1)
+(10 rows)
+
 --
 -- Test case for sublinks pushed down into subselects via join alias expansion
 --
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql
index 6ed3636a9e4..13d7066a823 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -439,6 +439,105 @@ where exists (
 
 rollback;
 
+-- Test case for exist sublink where we can consider some undependent expression
+-- with outer link
+--
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tb.id);
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE NOT EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tb.id);
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id);
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE NOT EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tb.id);
+
+-- Join compound expression
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = tb.id);
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE NOT EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = tb.id);
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = tb.id);
+
+-- Compound expression with const type
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = 1);
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE NOT EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = 1);
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON tb.id = 1 and
+                       ta.id = 1);
+-- Disabled pull up because it is applcapable for INNER JOIN connection
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  RIGHT JOIN tc
+                    ON ta.id = tc.id);
+
 --
 -- Test case for sublinks pushed down into subselects via join alias expansion
 --
-- 
2.34.1

Alena Rybakina

a.rybakina@postgrespro.ru

11 months ago

In reply to: Alena Rybakina (#6)

1 attachment(s)

Re: pull-up subquery if JOIN-ON contains refs to upper-query

Hi! I found another example where the transformation worked incorrectly
and reconsidered the idea.

As for conversion of exists_sublink_to_ANY, we need to get the flattened
implicit-AND list of clauses and pull out the chunks of the WHERE clause
that belong to the parent query,
since we are called halfway through the parent's preprocess_expression()
and earlier steps of preprocess_expression() wouldn't get applied to the
pulled-up stuff unless we do them here.
We also do some processing for vars depending on which side the var is
on - if it's in a subquery, we only need to lower its level (varlevel)
because subquery will be flatted, while
for other vars that belong to the parent query, we need to do
preparation to pull up the sub-select into top range table.

For those expressions that we couldn't assign to either list, we define
newWhere and apply both cases.

--
Regards,
Alena Rybakina
Postgres Professional

Attachments:

v3-0001-Add-appling-the-pull-up-transformation-for-expressio.patchtext/x-patch; charset=UTF-8; name=v3-0001-Add-appling-the-pull-up-transformation-for-expressio.patchDownload

From 9f42dc0bbe1899751892a67355fe447d917096a4 Mon Sep 17 00:00:00 2001
From: Alena Rybakina <a.rybakina@postgrespro.ru>
Date: Mon, 27 Jan 2025 22:31:47 +0300
Subject: [PATCH] Add appling the pull-up transformation for expressions. We
 examine all expressions and need to increase the varno of vars which belong
 to subquery, because they will be belonged to another part of the query. But
 we need to decrease the sublevel number for vars which belong the upper level
 of the query because the subquery will be plat, so they will be the same
 sublevel number. That's why we separate such vars on leftargs and rightargs
 lists. leftargs list contain vars belonged to upper level part of query,
 rightargs contain vars belonged to subquery. newWhere list contain
 expressions for which we can't determine what part of query they belonged to,
 probably constants from subquery.

---
 src/backend/optimizer/plan/subselect.c        | 259 ++++++++++++++----
 src/test/regress/expected/partition_join.out  |  12 +-
 src/test/regress/expected/subselect.out       | 232 ++++++++++++++++
 src/test/regress/expected/updatable_views.out |  10 +-
 src/test/regress/sql/subselect.sql            |  99 +++++++
 5 files changed, 550 insertions(+), 62 deletions(-)

diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 8230cbea3c3..afa97cf3c5c 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -1378,6 +1378,16 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	int			varno;
 	Relids		clause_varnos;
 	Relids		upper_varnos;
+	List *testlist;
+	ListCell *lc;
+	List *newWhere = NIL;
+	List *opcollations;
+	List *opids;
+	List *rightargs;
+	List *leftargs;
+	List *all_clauses = NIL;
+	ListCell *rc, *oc, *cc;
+
 
 	Assert(sublink->subLinkType == EXISTS_SUBLINK);
 
@@ -1390,7 +1400,9 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	 * get flattened into the parent query.
 	 */
 	if (subselect->cteList)
-		return NULL;
+	{
+			return NULL;
+	}
 
 	/*
 	 * Copy the subquery so we can modify it safely (see comments in
@@ -1405,35 +1417,171 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	 * with noplace to evaluate the targetlist.
 	 */
 	if (!simplify_EXISTS_query(root, subselect))
-		return NULL;
+	{
+			return NULL;
+	}
+
+	if (subselect->jointree->quals)
+		all_clauses = lappend(all_clauses, subselect->jointree->quals);
 
-	/*
-	 * Separate out the WHERE clause.  (We could theoretically also remove
-	 * top-level plain JOIN/ON clauses, but it's probably not worth the
-	 * trouble.)
-	 */
-	whereClause = subselect->jointree->quals;
 	subselect->jointree->quals = NULL;
 
+	/* Gather all clauses in main list for the further consideration */
+	all_clauses = list_concat(all_clauses, subselect->jointree->fromlist);
+
+	leftargs = rightargs = opids = opcollations = newWhere = NIL;
+
 	/*
-	 * The rest of the sub-select must not refer to any Vars of the parent
-	 * query.  (Vars of higher levels should be okay, though.)
+	 * We will able to remove top-level plain JOIN/ON clauses if they are not outer join.
 	 */
-	if (contain_vars_of_level((Node *) subselect, 1))
+	foreach (lc, all_clauses)
+	{
+		Node *je = ((Node *) lfirst(lc));
+		ListCell *lc1;
+
+		whereClause = copyObject(je);
+
+		if (IsA(whereClause, RangeTblRef))
+			continue;
+
+		if ((IsA(whereClause, JoinExpr) && ((JoinExpr *)whereClause)->jointype != JOIN_INNER))
+			continue;
+
+		if (IsA(whereClause, JoinExpr) && ((JoinExpr *)whereClause)->quals != NULL)
+			whereClause = ((JoinExpr *)whereClause)->quals;
+
+		/*
+		* We don't risk optimizing if the WHERE clause is volatile, either.
+		*/
+		if (contain_volatile_functions(whereClause))
+		{
+			return NULL;
+		}
+
+		/*
+		* Clean up the WHERE clause by doing const-simplification etc on it.
+		* Aside from simplifying the processing we're about to do, this is
+		* important for being able to pull chunks of the WHERE clause up into the
+		* parent query.  Since we are invoked partway through the parent's
+		* preprocess_expression() work, earlier steps of preprocess_expression()
+		* wouldn't get applied to the pulled-up stuff unless we do them here. For
+		* the parts of the WHERE clause that get put back into the child query,
+		* this work is partially duplicative, but it shouldn't hurt.
+		*
+		* Note: we do not run flatten_join_alias_vars.  This is OK because any
+		* parent aliases were flattened already, and we're not going to pull any
+		* child Vars (of any description) into the parent.
+		*
+		* Note: passing the parent's root to eval_const_expressions is
+		* technically wrong, but we can get away with it since only the
+		* boundParams (if any) are used, and those would be the same in a
+		* subroot.
+		*/
+		whereClause = eval_const_expressions(root, whereClause);
+		whereClause = (Node *) canonicalize_qual((Expr *) whereClause, false);
+		whereClause = (Node *) make_ands_implicit((Expr *) whereClause);
+
+		foreach(lc1, (List *) whereClause)
+		{
+			OpExpr	   *expr = (OpExpr *) lfirst(lc1);
+
+			if (IsA(expr, OpExpr))
+			{
+
+				Node	   *leftarg = (Node *) linitial(expr->args);
+				Node	   *rightarg = (Node *) lsecond(expr->args);
+
+				leftarg =(Node *) eval_const_expressions(root, leftarg);
+				rightarg =(Node *) eval_const_expressions(root, rightarg);
+
+				/*
+				 * We need to separate outer and inner vars.
+				 * Since outer var will need to be level up only its
+				 * its sublevel number yet inner var will need to be
+				 * updated.
+				 * Their varno will be increased due to elimination of
+				 * subquery and consequently rebuilding query tree.
+				 * Later expressions will be formed again and for this
+				 * reason their operate id and collacations need to be
+				 * saved too.
+				 * newWhere list contains inner constant expressions.
+				*/
+				if (contain_vars_of_level(leftarg, 1))
+				{
+					leftargs = lappend(leftargs, leftarg);
+					rightargs = lappend(rightargs, rightarg);
+					opids = lappend_oid(opids, expr->opno);
+					opcollations = lappend_oid(opcollations, expr->inputcollid);
+					continue;
+				}
+				else if (contain_vars_of_level(rightarg, 1))
+				{
+					expr->opno = get_commutator(expr->opno);
+					if (OidIsValid(expr->opno))
+					{
+						leftargs = lappend(leftargs, rightarg);
+						rightargs = lappend(rightargs, leftarg);
+						opids = lappend_oid(opids, expr->opno);
+						opcollations = lappend_oid(opcollations, expr->inputcollid);
+						continue;
+					}
+					/* If no commutator, no chance to optimize the WHERE clause */
+					Assert(0);
+				}
+				newWhere = lappend(newWhere, expr);
+			}
+			/* We only need to be sure that it contains some var which
+			 * is acceptable for pull-up application.
+			*/
+			else if (contain_vars_of_level((Node *) expr, 1))
+				newWhere = lappend(newWhere, expr);
+			else
+				return NULL;
+		}
+
+		/*
+		 * In case of a successful attempt, replaces it with the correct condition.
+		 * We were sure that inner relations are independent, so we confidently
+		 * can replace their join condition on true.
+		 * ToDo: operate connected inner join expressions.
+		 */
+		if ((List *) whereClause != NIL && IsA(je, JoinExpr))
+			((JoinExpr *)je)->quals = (Node *) makeConst(BOOLOID,
+														-1,
+														InvalidOid,
+														sizeof(bool),
+														(Datum) 1,
+														false,
+														true);
+	}
+
+	list_free(all_clauses);
+
+	/* We don't have any clauses for pull-up creation */
+	if (newWhere == NIL && leftargs== NIL)
+	{
 		return NULL;
+	}
 
 	/*
-	 * On the other hand, the WHERE clause must contain some Vars of the
-	 * parent query, else it's not gonna be a join.
+	 * And there can't be any child Vars in the stuff we intend to pull up.
+	 * (Note: we'd need to check for child Aggs too, except we know the child
+	 * has no aggs at all because of simplify_EXISTS_query's check. The same
+	 * goes for window functions.)
 	 */
-	if (!contain_vars_of_level(whereClause, 1))
+	if (contain_vars_of_level((Node *) subselect, 1))
 		return NULL;
 
 	/*
-	 * We don't risk optimizing if the WHERE clause is volatile, either.
+	 * Prepare to pull up the sub-select into top range table.
+	 *
+	 * We need to adjust all level-zero varnos in the subquery
+	 * to account for the rtable merger.
 	 */
-	if (contain_volatile_functions(whereClause))
-		return NULL;
+	rtoffset = list_length(parse->rtable);
+	OffsetVarNodes((Node *) subselect, rtoffset, 0);
+	OffsetVarNodes((Node *)newWhere, rtoffset, 0);
+	OffsetVarNodes((Node *)rightargs, rtoffset, 0);
 
 	/*
 	 * The subquery must have a nonempty jointree, but we can make it so.
@@ -1441,23 +1589,11 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	replace_empty_jointree(subselect);
 
 	/*
-	 * Prepare to pull up the sub-select into top range table.
-	 *
-	 * We rely here on the assumption that the outer query has no references
-	 * to the inner (necessarily true). Therefore this is a lot easier than
-	 * what pull_up_subqueries has to go through.
-	 *
-	 * In fact, it's even easier than what convert_ANY_sublink_to_join has to
-	 * do.  The machinations of simplify_EXISTS_query ensured that there is
-	 * nothing interesting in the subquery except an rtable and jointree, and
-	 * even the jointree FromExpr no longer has quals.  So we can just append
-	 * the rtable to our own and use the FromExpr in our jointree. But first,
-	 * adjust all level-zero varnos in the subquery to account for the rtable
-	 * merger.
+	 * Also reject sublinks in the stuff we intend to pull up.  (It might be
+	 * possible to support this, but doesn't seem worth the complication.)
 	 */
-	rtoffset = list_length(parse->rtable);
-	OffsetVarNodes((Node *) subselect, rtoffset, 0);
-	OffsetVarNodes(whereClause, rtoffset, 0);
+	if (contain_subplans((Node *) leftargs) || contain_subplans((Node *) rightargs))
+		return NULL;
 
 	/*
 	 * Upper-level vars in subquery will now be one level closer to their
@@ -1465,31 +1601,38 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	 * becomes level zero.
 	 */
 	IncrementVarSublevelsUp((Node *) subselect, -1, 1);
-	IncrementVarSublevelsUp(whereClause, -1, 1);
+	IncrementVarSublevelsUp((Node *) leftargs, -1, 1);
+	IncrementVarSublevelsUp((Node *) newWhere, -1, 1);
 
 	/*
 	 * Now that the WHERE clause is adjusted to match the parent query
 	 * environment, we can easily identify all the level-zero rels it uses.
 	 * The ones <= rtoffset belong to the upper query; the ones > rtoffset do
 	 * not.
+	 * XXX: Should we check newWhere?
 	 */
-	clause_varnos = pull_varnos(root, whereClause);
-	upper_varnos = NULL;
-	varno = -1;
-	while ((varno = bms_next_member(clause_varnos, varno)) >= 0)
+	if(leftargs != NIL)
 	{
-		if (varno <= rtoffset)
-			upper_varnos = bms_add_member(upper_varnos, varno);
-	}
-	bms_free(clause_varnos);
-	Assert(!bms_is_empty(upper_varnos));
+		clause_varnos = pull_varnos(root, (Node *) leftargs);
+		upper_varnos = NULL;
+		varno = -1;
+		while ((varno = bms_next_member(clause_varnos, varno)) >= 0)
+		{
+			if (varno <= rtoffset)
+				upper_varnos = bms_add_member(upper_varnos, varno);
+		}
+		bms_free(clause_varnos);
 
-	/*
-	 * Now that we've got the set of upper-level varnos, we can make the last
-	 * check: only available_rels can be referenced.
-	 */
-	if (!bms_is_subset(upper_varnos, available_rels))
-		return NULL;
+		Assert(!bms_is_empty(upper_varnos) || bms_is_empty(clause_varnos));
+
+
+		/*
+		* Now that we've got the set of upper-level varnos, we can make the last
+		* check: only available_rels can be referenced.
+		*/
+		if (!bms_is_subset(upper_varnos, available_rels) && !bms_is_empty(clause_varnos))
+			return NULL;
+	}
 
 	/*
 	 * Now we can attach the modified subquery rtable to the parent. This also
@@ -1498,6 +1641,22 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	CombineRangeTables(&parse->rtable, &parse->rteperminfos,
 					   subselect->rtable, subselect->rteperminfos);
 
+	testlist = NIL;
+	forfour(lc, leftargs, rc, rightargs, oc, opids, cc, opcollations)
+	{
+		Node	   *leftarg = (Node *) lfirst(lc);
+		Node	   *rightarg = (Node *) lfirst(rc);
+		Oid			opid = lfirst_oid(oc);
+		Oid			opcollation = lfirst_oid(cc);
+
+		testlist = lappend(testlist,
+							(Node *) make_opclause(opid, BOOLOID, false,
+											(Expr *) leftarg, (Expr *) rightarg,
+											InvalidOid, opcollation));
+	}
+
+	testlist = list_concat(testlist, newWhere);
+
 	/*
 	 * And finally, build the JoinExpr node.
 	 */
@@ -1512,7 +1671,7 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 		result->rarg = (Node *) subselect->jointree;
 	result->usingClause = NIL;
 	result->join_using_alias = NULL;
-	result->quals = whereClause;
+	result->quals = (Node *) make_ands_explicit(testlist);
 	result->alias = NULL;
 	result->rtindex = 0;		/* we don't need an RTE for it */
 
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index af468682a2d..8cb4cda388e 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -2492,8 +2492,8 @@ where not exists (select 1 from prtx2
          ->  Seq Scan on prtx1_1
                Filter: ((a < 20) AND (c = 120))
          ->  Bitmap Heap Scan on prtx2_1
-               Recheck Cond: ((b = prtx1_1.b) AND (c = 123))
-               Filter: (a = prtx1_1.a)
+               Recheck Cond: ((prtx1_1.b = b) AND (c = 123))
+               Filter: (prtx1_1.a = a)
                ->  BitmapAnd
                      ->  Bitmap Index Scan on prtx2_1_b_idx
                            Index Cond: (b = prtx1_1.b)
@@ -2503,8 +2503,8 @@ where not exists (select 1 from prtx2
          ->  Seq Scan on prtx1_2
                Filter: ((a < 20) AND (c = 120))
          ->  Bitmap Heap Scan on prtx2_2
-               Recheck Cond: ((b = prtx1_2.b) AND (c = 123))
-               Filter: (a = prtx1_2.a)
+               Recheck Cond: ((prtx1_2.b = b) AND (c = 123))
+               Filter: (prtx1_2.a = a)
                ->  BitmapAnd
                      ->  Bitmap Index Scan on prtx2_2_b_idx
                            Index Cond: (b = prtx1_2.b)
@@ -2534,7 +2534,7 @@ where not exists (select 1 from prtx2
                Filter: ((a < 20) AND (c = 91))
          ->  Bitmap Heap Scan on prtx2_1
                Recheck Cond: ((c = 99) OR (b = (prtx1_1.b + 1)))
-               Filter: (a = prtx1_1.a)
+               Filter: (prtx1_1.a = a)
                ->  BitmapOr
                      ->  Bitmap Index Scan on prtx2_1_c_idx
                            Index Cond: (c = 99)
@@ -2545,7 +2545,7 @@ where not exists (select 1 from prtx2
                Filter: ((a < 20) AND (c = 91))
          ->  Bitmap Heap Scan on prtx2_2
                Recheck Cond: ((c = 99) OR (b = (prtx1_2.b + 1)))
-               Filter: (a = prtx1_2.a)
+               Filter: (prtx1_2.a = a)
                ->  BitmapOr
                      ->  Bitmap Index Scan on prtx2_2_c_idx
                            Index Cond: (c = 99)
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index ebc545e2461..cff5026887f 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -812,6 +812,238 @@ where exists (
       from text_tbl ) ss
   where road.name = ss.f1 );
 rollback;
+-- Test case for exist sublink where we can consider some undependent expression
+-- with outer link
+--
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tb.id);
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Nested Loop Semi Join (actual rows=2 loops=1)
+   ->  Seq Scan on ta (actual rows=2 loops=1)
+   ->  Nested Loop (actual rows=1 loops=2)
+         ->  Index Only Scan using tb_pkey on tb (actual rows=1 loops=2)
+               Index Cond: (id = ta.id)
+               Heap Fetches: 2
+         ->  Seq Scan on tc (actual rows=1 loops=2)
+(7 rows)
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE NOT EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tb.id);
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Nested Loop Anti Join (actual rows=0 loops=1)
+   ->  Seq Scan on ta (actual rows=2 loops=1)
+   ->  Nested Loop (actual rows=1 loops=2)
+         ->  Index Only Scan using tb_pkey on tb (actual rows=1 loops=2)
+               Index Cond: (id = ta.id)
+               Heap Fetches: 2
+         ->  Seq Scan on tc (actual rows=1 loops=2)
+(7 rows)
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id);
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Nested Loop Semi Join (actual rows=2 loops=1)
+   ->  Seq Scan on ta (actual rows=2 loops=1)
+   ->  Nested Loop (actual rows=1 loops=2)
+         ->  Index Only Scan using tc_pkey on tc (actual rows=1 loops=2)
+               Index Cond: (id = ta.id)
+               Heap Fetches: 2
+         ->  Seq Scan on tb (actual rows=1 loops=2)
+(7 rows)
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE NOT EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tb.id);
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Nested Loop Anti Join (actual rows=0 loops=1)
+   ->  Seq Scan on ta (actual rows=2 loops=1)
+   ->  Nested Loop (actual rows=1 loops=2)
+         ->  Index Only Scan using tb_pkey on tb (actual rows=1 loops=2)
+               Index Cond: (id = ta.id)
+               Heap Fetches: 2
+         ->  Seq Scan on tc (actual rows=1 loops=2)
+(7 rows)
+
+-- Join compound expression
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = tb.id);
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Hash Right Semi Join (actual rows=2 loops=1)
+   Hash Cond: (tc.id = ta.id)
+   ->  Hash Join (actual rows=2 loops=1)
+         Hash Cond: (tb.id = tc.id)
+         ->  Seq Scan on tb (actual rows=4 loops=1)
+         ->  Hash (actual rows=2 loops=1)
+               Buckets: 4096  Batches: 1  Memory Usage: 33kB
+               ->  Seq Scan on tc (actual rows=2 loops=1)
+   ->  Hash (actual rows=2 loops=1)
+         Buckets: 4096  Batches: 1  Memory Usage: 33kB
+         ->  Seq Scan on ta (actual rows=2 loops=1)
+(11 rows)
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE NOT EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = tb.id);
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Nested Loop Anti Join (actual rows=0 loops=1)
+   ->  Seq Scan on ta (actual rows=2 loops=1)
+   ->  Nested Loop (actual rows=1 loops=2)
+         ->  Index Only Scan using tb_pkey on tb (actual rows=1 loops=2)
+               Index Cond: (id = ta.id)
+               Heap Fetches: 2
+         ->  Index Only Scan using tc_pkey on tc (actual rows=1 loops=2)
+               Index Cond: (id = ta.id)
+               Heap Fetches: 2
+(9 rows)
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = tb.id);
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Hash Right Semi Join (actual rows=2 loops=1)
+   Hash Cond: (tc.id = ta.id)
+   ->  Hash Join (actual rows=2 loops=1)
+         Hash Cond: (tb.id = tc.id)
+         ->  Seq Scan on tb (actual rows=4 loops=1)
+         ->  Hash (actual rows=2 loops=1)
+               Buckets: 4096  Batches: 1  Memory Usage: 33kB
+               ->  Seq Scan on tc (actual rows=2 loops=1)
+   ->  Hash (actual rows=2 loops=1)
+         Buckets: 4096  Batches: 1  Memory Usage: 33kB
+         ->  Seq Scan on ta (actual rows=2 loops=1)
+(11 rows)
+
+-- Compound expression with const type
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = 1);
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Nested Loop Semi Join (actual rows=1 loops=1)
+   ->  Index Only Scan using ta_pkey on ta (actual rows=1 loops=1)
+         Index Cond: (id = 1)
+         Heap Fetches: 1
+   ->  Nested Loop (actual rows=1 loops=1)
+         ->  Index Only Scan using tc_pkey on tc (actual rows=1 loops=1)
+               Index Cond: (id = 1)
+               Heap Fetches: 1
+         ->  Seq Scan on tb (actual rows=1 loops=1)
+(9 rows)
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE NOT EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = 1);
+                        QUERY PLAN                        
+----------------------------------------------------------
+ Hash Right Anti Join (actual rows=1 loops=1)
+   Hash Cond: (tc.id = ta.id)
+   Join Filter: (ta.id = 1)
+   Rows Removed by Join Filter: 4
+   ->  Nested Loop (actual rows=8 loops=1)
+         ->  Seq Scan on tb (actual rows=4 loops=1)
+         ->  Materialize (actual rows=2 loops=4)
+               Storage: Memory  Maximum Storage: 17kB
+               ->  Seq Scan on tc (actual rows=2 loops=1)
+   ->  Hash (actual rows=2 loops=1)
+         Buckets: 4096  Batches: 1  Memory Usage: 33kB
+         ->  Seq Scan on ta (actual rows=2 loops=1)
+(12 rows)
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON tb.id = 1 and
+                       ta.id = 1);
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Nested Loop Semi Join (actual rows=1 loops=1)
+   ->  Index Only Scan using ta_pkey on ta (actual rows=1 loops=1)
+         Index Cond: (id = 1)
+         Heap Fetches: 1
+   ->  Nested Loop (actual rows=1 loops=1)
+         ->  Index Only Scan using tb_pkey on tb (actual rows=1 loops=1)
+               Index Cond: (id = 1)
+               Heap Fetches: 1
+         ->  Seq Scan on tc (actual rows=1 loops=1)
+(9 rows)
+
+-- Disabled pull up because it is applcapable for INNER JOIN connection
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  RIGHT JOIN tc
+                    ON ta.id = tc.id);
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Seq Scan on ta (actual rows=2 loops=1)
+   Filter: EXISTS(SubPlan 1)
+   SubPlan 1
+     ->  Nested Loop Left Join (actual rows=1 loops=2)
+           Join Filter: (ta.id = tc.id)
+           Rows Removed by Join Filter: 2
+           ->  Seq Scan on tc (actual rows=1 loops=2)
+           ->  Materialize (actual rows=2 loops=2)
+                 Storage: Memory  Maximum Storage: 17kB
+                 ->  Seq Scan on tb (actual rows=4 loops=1)
+(10 rows)
+
 --
 -- Test case for sublinks pushed down into subselects via join alias expansion
 --
diff --git a/src/test/regress/expected/updatable_views.out b/src/test/regress/expected/updatable_views.out
index 095df0a670c..d0762c1299e 100644
--- a/src/test/regress/expected/updatable_views.out
+++ b/src/test/regress/expected/updatable_views.out
@@ -3177,14 +3177,12 @@ EXPLAIN (costs off) INSERT INTO rw_view1 VALUES (2, 'New row 2');
          One-Time Filter: ((InitPlan 1).col1 IS NOT TRUE)
  
  Update on base_tbl
-   InitPlan 1
-     ->  Index Only Scan using base_tbl_pkey on base_tbl t
-           Index Cond: (id = 2)
-   ->  Result
-         One-Time Filter: (InitPlan 1).col1
+   ->  Nested Loop Semi Join
          ->  Index Scan using base_tbl_pkey on base_tbl
                Index Cond: (id = 2)
-(15 rows)
+         ->  Index Scan using base_tbl_pkey on base_tbl t
+               Index Cond: (id = 2)
+(13 rows)
 
 INSERT INTO rw_view1 VALUES (2, 'New row 2');
 SELECT * FROM base_tbl;
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql
index 6ed3636a9e4..13d7066a823 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -439,6 +439,105 @@ where exists (
 
 rollback;
 
+-- Test case for exist sublink where we can consider some undependent expression
+-- with outer link
+--
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tb.id);
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE NOT EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tb.id);
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id);
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE NOT EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tb.id);
+
+-- Join compound expression
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = tb.id);
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE NOT EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = tb.id);
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = tb.id);
+
+-- Compound expression with const type
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = 1);
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE NOT EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON ta.id = tc.id and
+                       ta.id = 1);
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  JOIN tc
+                    ON tb.id = 1 and
+                       ta.id = 1);
+-- Disabled pull up because it is applcapable for INNER JOIN connection
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
+ SELECT 1
+   FROM ta
+  WHERE EXISTS (SELECT 1
+                  FROM tb
+                  RIGHT JOIN tc
+                    ON ta.id = tc.id);
+
 --
 -- Test case for sublinks pushed down into subselects via join alias expansion
 --
-- 
2.34.1

Ilia Evdokimov

ilya.evdokimov@tantorlabs.com

11 months ago

In reply to: Alena Rybakina (#7)

1 attachment(s)

Re: pull-up subquery if JOIN-ON contains refs to upper-query

On 09.02.2025 18:14, Alena Rybakina wrote:

Hi! I found another example where the transformation worked
incorrectly and reconsidered the idea.

As for conversion of exists_sublink_to_ANY, we need to get the
flattened implicit-AND list of clauses and pull out the chunks of the
WHERE clause that belong to the parent query,
since we are called halfway through the parent's
preprocess_expression() and earlier steps of preprocess_expression()
wouldn't get applied to the pulled-up stuff unless we do them here.
We also do some processing for vars depending on which side the var is
on - if it's in a subquery, we only need to lower its level (varlevel)
because subquery will be flatted, while
for other vars that belong to the parent query, we need to do
preparation to pull up the sub-select into top range table.

For those expressions that we couldn't assign to either list, we
define newWhere and apply both cases.

When I run 'make -C contrib/ check', tests of postgres_fdw extension
failed. I might be wrong, but you should be careful with LIMIT.

--
Best regards,
Ilia Evdokimov,
Tantor Labs LLC.

Attachments:

regression.diffstext/plain; charset=UTF-8; name=regression.diffsDownload

diff -U3 /home/ilia/postgres/contrib/postgres_fdw/expected/postgres_fdw.out /home/ilia/postgres/contrib/postgres_fdw/results/postgres_fdw.out
--- /home/ilia/postgres/contrib/postgres_fdw/expected/postgres_fdw.out	2025-02-10 23:31:19.353059650 +0300
+++ /home/ilia/postgres/contrib/postgres_fdw/results/postgres_fdw.out	2025-02-10 23:46:43.249745683 +0300
@@ -4835,13 +4835,15 @@
 	  EXISTS (SELECT 1 FROM ft4 WHERE ft4.c2 = ft2.c2))
   AND ft2.c1 > 900
   ORDER BY ft2.c1 LIMIT 10;
-                                                                                                                                                          QUERY PLAN                                                                                                                                                           
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                                                                                                                     QUERY PLAN                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c7, ft2.c8
-   Relations: (public.ft2) SEMI JOIN ((public.ft2 ft2_1) SEMI JOIN (public.ft4))
-   Remote SQL: SELECT r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8 FROM "S 1"."T 1" r1 WHERE ((r1."C 1" > 900)) AND EXISTS (SELECT NULL FROM "S 1"."T 1" r3 WHERE ((r1."C 1" = r3."C 1")) AND EXISTS (SELECT NULL FROM "S 1"."T 3" r4 WHERE ((r3.c2 = r4.c2)))) ORDER BY r1."C 1" ASC NULLS LAST LIMIT 10::bigint
-(4 rows)
+   ->  Foreign Scan
+         Output: ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c7, ft2.c8
+         Relations: (public.ft2) SEMI JOIN ((public.ft2 ft2_1) SEMI JOIN (public.ft4))
+         Remote SQL: SELECT r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8 FROM "S 1"."T 1" r1 WHERE ((r1."C 1" > 900)) AND EXISTS (SELECT NULL FROM "S 1"."T 1" r3 WHERE ((r1."C 1" = r3."C 1")) AND EXISTS (SELECT NULL FROM "S 1"."T 3" r4 WHERE ((r3.c2 = r4.c2)))) ORDER BY r1."C 1" ASC NULLS LAST
+(6 rows)
 
 SELECT ft2.* FROM ft2 WHERE
   c1 = ANY (
@@ -4871,13 +4873,20 @@
 	  EXISTS (SELECT 1 FROM ft4 WHERE ft4.c2 = ft2.c2) AND c1 = ftupper.c1 )
   AND ftupper.c1 > 900
   ORDER BY ftupper.c1 LIMIT 10;
-                                                                                                                                                          QUERY PLAN                                                                                                                                                           
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Foreign Scan
+                                                               QUERY PLAN                                                               
+----------------------------------------------------------------------------------------------------------------------------------------
+ Limit
    Output: ftupper.c1, ftupper.c2, ftupper.c3, ftupper.c4, ftupper.c5, ftupper.c6, ftupper.c7, ftupper.c8
-   Relations: (public.ft2 ftupper) SEMI JOIN ((public.ft2) SEMI JOIN (public.ft4))
-   Remote SQL: SELECT r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8 FROM "S 1"."T 1" r1 WHERE ((r1."C 1" > 900)) AND EXISTS (SELECT NULL FROM "S 1"."T 1" r2 WHERE ((r1."C 1" = r2."C 1")) AND EXISTS (SELECT NULL FROM "S 1"."T 3" r3 WHERE ((r2.c2 = r3.c2)))) ORDER BY r1."C 1" ASC NULLS LAST LIMIT 10::bigint
-(4 rows)
+   ->  Foreign Scan on public.ft2 ftupper
+         Output: ftupper.c1, ftupper.c2, ftupper.c3, ftupper.c4, ftupper.c5, ftupper.c6, ftupper.c7, ftupper.c8
+         Filter: (ANY (ftupper.c1 = (hashed SubPlan 2).col1))
+         Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" > 900)) ORDER BY "C 1" ASC NULLS LAST
+         SubPlan 2
+           ->  Foreign Scan
+                 Output: ft2.c1
+                 Relations: (public.ft2) SEMI JOIN (public.ft4)
+                 Remote SQL: SELECT r1."C 1" FROM "S 1"."T 1" r1 WHERE EXISTS (SELECT NULL FROM "S 1"."T 3" r2 WHERE ((r1.c2 = r2.c2)))
+(11 rows)
 
 SELECT * FROM ft2 ftupper WHERE
    EXISTS (

Alena Rybakina

a.rybakina@postgrespro.ru

11 months ago

In reply to: Ilia Evdokimov (#8)

Re: pull-up subquery if JOIN-ON contains refs to upper-query

On 10.02.2025 23:51, Ilia Evdokimov wrote:

On 09.02.2025 18:14, Alena Rybakina wrote:

Hi! I found another example where the transformation worked
incorrectly and reconsidered the idea.

As for conversion of exists_sublink_to_ANY, we need to get the
flattened implicit-AND list of clauses and pull out the chunks of the
WHERE clause that belong to the parent query,
since we are called halfway through the parent's
preprocess_expression() and earlier steps of preprocess_expression()
wouldn't get applied to the pulled-up stuff unless we do them here.
We also do some processing for vars depending on which side the var
is on - if it's in a subquery, we only need to lower its level
(varlevel) because subquery will be flatted, while
for other vars that belong to the parent query, we need to do
preparation to pull up the sub-select into top range table.

For those expressions that we couldn't assign to either list, we
define newWhere and apply both cases.

When I run 'make -C contrib/ check', tests of postgres_fdw extension
failed. I might be wrong, but you should be careful with LIMIT.

Thank you for the review, I'm working on it.

--
Regards,
Alena Rybakina
Postgres Professional

#10

Alena Rybakina

a.rybakina@postgrespro.ru

9 months ago

In reply to: Alena Rybakina (#9)

Re: pull-up subquery if JOIN-ON contains refs to upper-query

Hi!

My colleague reviewed my patch and gave feedback on how to improve it -
for some queries with data types that I did not consider, pull-up is not
applied, although it should. Some of them:

EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
SELECT 1
FROM ta
WHERE EXISTS (SELECT 1
                 FROM tb
                 JOIN tc
                   ON ta.id = tb.id
                  AND tb.id = ANY('{1}'::int[])
              );

                                QUERY PLAN
--------------------------------------------------------------------------
Seq Scan on ta (actual rows=1.00 loops=1)
   Filter: EXISTS(SubPlan 1)
   Rows Removed by Filter: 1
   SubPlan 1
     -> Nested Loop (actual rows=0.50 loops=2)
           -> Seq Scan on tb (actual rows=0.50 loops=2)
                 Filter: ((id = ANY ('{1}'::integer[])) AND (ta.id = id))
                 Rows Removed by Filter: 2
           -> Seq Scan on tc (actual rows=1.00 loops=1)

EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
SELECT 1
   FROM ta
WHERE EXISTS (SELECT 1
                  FROM tb
                  JOIN tc
                    ON ta.id = tb.id
                   AND tb.is_active
               );
                       QUERY PLAN
---------------------------------------------------------
Seq Scan on ta (actual rows=2.00 loops=1)
   Filter: EXISTS(SubPlan 1)
   SubPlan 1
     -> Nested Loop (actual rows=1.00 loops=2)
           -> Seq Scan on tb (actual rows=1.00 loops=2)
                 Filter: (is_active AND (ta.id = id))
                 Rows Removed by Filter: 0
           -> Seq Scan on tc (actual rows=1.00 loops=2)

EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
SELECT 1
   FROM ta
WHERE EXISTS (SELECT 1
                  FROM tb
                  JOIN tc
                    ON ta.id = tb.id
                   AND tb.is_active IS NOT NULL
               );

                                   QUERY PLAN
--------------------------------------------------------------------
Seq Scan on ta (actual rows=2.00 loops=1)
   Filter: EXISTS(SubPlan 1)
    SubPlan 1
       -> Nested Loop (actual rows=1.00 loops=2)
             -> Seq Scan on tb (actual rows=1.00 loops=2)
                   Filter: ((is_active IS NOT NULL) AND (ta.id = id))
                   Rows Removed by Filter: 0
             -> Seq Scan on tc (actual rows=1.00 loops=2)

UPDATE tb SET is_active = NULL WHERE id = 2;

EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF)
SELECT 1
   FROM ta
WHERE EXISTS (SELECT 1
                  FROM tb
                  JOIN tc
                    ON ta.id = tb.id
                   AND tb.is_active IS NULL
               );
                              QUERY PLAN
----------------------------------------------------------------
Seq Scan on ta (actual rows=1.00 loops=1)
   Filter: EXISTS(SubPlan 1)
   Rows Removed by Filter: 1
   SubPlan 1
     -> Nested Loop (actual rows=0.50 loops=2)
           -> Seq Scan on tb (actual rows=0.50 loops=2)
                 Filter: ((is_active IS NULL) AND (ta.id = id))
                 Rows Removed by Filter: 4
           -> Seq Scan on tc (actual rows=1.00 loops=1)

I see that I need to add a walker that, when traversing the tree,
determines whether there are conditions under which pull-up is
impossible - the presence of
volatility of functions and other restrictions, and leave the
transformation for the var objects that I added before, I described it
here.

Unfortunately, I need a few days to implement this and need time for a
review, and I think I will not have time to do this before the code
freeze, so
I am moving this to the next commitfest and not changing the status
"awaiting the author".

On 11.02.2025 18:59, Alena Rybakina wrote:

On 10.02.2025 23:51, Ilia Evdokimov wrote:

On 09.02.2025 18:14, Alena Rybakina wrote:

Hi! I found another example where the transformation worked
incorrectly and reconsidered the idea.

As for conversion of exists_sublink_to_ANY, we need to get the
flattened implicit-AND list of clauses and pull out the chunks of
the WHERE clause that belong to the parent query,
since we are called halfway through the parent's
preprocess_expression() and earlier steps of preprocess_expression()
wouldn't get applied to the pulled-up stuff unless we do them here.
We also do some processing for vars depending on which side the var
is on - if it's in a subquery, we only need to lower its level
(varlevel) because subquery will be flatted, while
for other vars that belong to the parent query, we need to do
preparation to pull up the sub-select into top range table.

For those expressions that we couldn't assign to either list, we
define newWhere and apply both cases.

When I run 'make -C contrib/ check', tests of postgres_fdw extension
failed. I might be wrong, but you should be careful with LIMIT.

Thank you for the review, I'm working on it.

Sorry for not responding, but I will fix this bug after I update the
code based on the comments above. Thank you for noticing and writing to
me, your feedback is very important.

--
Regards,
Alena Rybakina
Postgres Professional

#11

Ilia Evdokimov

ilya.evdokimov@tantorlabs.com

9 months ago

In reply to: Alena Rybakina (#10)

Re: pull-up subquery if JOIN-ON contains refs to upper-query

On 02.04.2025 19:39, Alena Rybakina wrote:

I see that I need to add a walker that, when traversing the tree,
determines whether there are conditions under which pull-up is
impossible - the presence of
volatility of functions and other restrictions, and leave the
transformation for the var objects that I added before, I described it
here.

I have some concerns about pulling up every clause from the subquery
with one column. In particular, not every clause is safe or beneficial
to pull up: OR-clauses, CASE expressions, nested sublinks could
significantly change how the planner estimates the number of rows or
applies filters, especially when they are not true join predicates.
Pulling them up might lead to worse plans, or even change the semantics
in subtle ways. I think before applying such transformations, we should
make sure they are not only safe but actually improve the resulting plan.

#12

Alena Rybakina

a.rybakina@postgrespro.ru

7 months ago

In reply to: Ilia Evdokimov (#11)

1 attachment(s)

Re: pull-up subquery if JOIN-ON contains refs to upper-query

Hi, all! I updated the patch and it looks nice. All the problems have
been solved.

On 03.04.2025 16:56, Ilia Evdokimov wrote:

On 02.04.2025 19:39, Alena Rybakina wrote:

I see that I need to add a walker that, when traversing the tree,
determines whether there are conditions under which pull-up is
impossible - the presence of
volatility of functions and other restrictions, and leave the
transformation for the var objects that I added before, I described
it here.

I have some concerns about pulling up every clause from the subquery
with one column. In particular, not every clause is safe or beneficial
to pull up: OR-clauses, CASE expressions, nested sublinks could
significantly change how the planner estimates the number of rows or
applies filters, especially when they are not true join predicates.
Pulling them up might lead to worse plans, or even change the
semantics in subtle ways. I think before applying such
transformations, we should make sure they are not only safe but
actually improve the resulting plan.

There may indeed be cases where a query plan without pull-up is worse
than with pull-up.

For example, as shown below, with pull-up we don't need to scan two
tables and perform a join, since the subquery returns 0 rows (no
matching tuples in the inner sequential scan in a parameterized Nested
Loop).
However, this cannot be detected at the current planning stage - we
simply don't have that information yet.

Do you have any ideas on how to solve this problem? So far, the only
approach I see is to try an alternative plan but I'm still learning this.

For example:

create table t(x int);
create table t1(x int);
create table t2(x int);

insert into t2 select id from generate_series(20001,30000) as id;
insert into t1 select id from generate_series(10001,20000) as id;
insert into t select id from generate_series(1,10000) as id;
vacuum analyze;
explain analyze select * from t where exists (select * from t1 join t2
on t.x = t1.x);

with my patch:
                                                               QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------
Hash Join (cost=1500540.00..1500822.50 rows=10000 width=4) (actual
time=70694.658..70694.662 rows=0.00 loops=1)
   Hash Cond: (t.x = t1.x)
   Buffers: shared hit=135
*->* *Seq Scan on t* (cost=0.00..145.00 rows=10000 width=4) (actual
time=0.009..1.545 *rows=10000.00* loops=1)
         Buffers: shared hit=45
   -> Hash (cost=1500415.00..1500415.00 rows=10000 width=4) (actual
time=70690.524..70690.526 rows=10000.00 loops=1)
         Buckets: 16384 Batches: 1 Memory Usage: 480kB
         Buffers: shared hit=90
         -> HashAggregate (cost=1500315.00..1500415.00 rows=10000
width=4) (actual time=70683.143..70686.590 rows=10000.00 loops=1)
               Group Key: t1.x
               Batches: 1 Memory Usage: 793kB
               Buffers: shared hit=90
*->* *Nested Loop* (cost=0.00..1250315.00 rows=100000000 width=4)
(actual time=0.019..25650.447 *rows=100000000.00* loops=1)
                     Buffers: shared hit=90
*-> Seq Scan on t1* (cost=0.00..145.00 rows=10000 width=4) (actual
time=0.006..4.931 *rows=10000.00* loops=1)
                           Buffers: shared hit=45
                     -> Materialize (cost=0.00..195.00 rows=10000
width=0) (actual time=0.000..0.875 rows=10000.00 loops=10000)
                           Storage: Memory Maximum Storage: 519kB
                           Buffers: shared hit=45
                           -> Seq Scan on t2 (cost=0.00..145.00
rows=10000 width=0) (actual time=0.007..1.246 rows=10000.00 loops=1)
                                 Buffers: shared hit=45
Planning:
   Buffers: shared hit=36 read=3
Planning Time: 0.375 ms
*Execution Time: 70695.154 ms*

without my patch:

                                                        QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------
*Seq Scan on t* (cost=0.00..309.30 rows=5738 width=4) (actual
time=68268.562..68268.565 *rows=0.00* loops=1)
   Filter: EXISTS(SubPlan 1)
   Rows Removed by Filter: 10000
   Buffers: shared hit=900045
   SubPlan 1
     -> *Nested Loop* (cost=0.00..8524.27 rows=654075 width=0) (actual
time=6.823..6.823 *rows=0.00* loops=10000)
           Buffers: shared hit=900000
           -> Seq Scan on t2 (cost=0.00..159.75 rows=11475 width=0)
(actual time=0.011..1.660 rows=10000.00 loops=10000)
                 Buffers: shared hit=450000
           -> Materialize (cost=0.00..188.72 rows=57 width=0) (actual
time=0.000..0.000 rows=0.00 loops=100000000)
                 Storage: Memory Maximum Storage: 17kB
                 Buffers: shared hit=450000
                 -> Seq Scan on t1 (cost=0.00..188.44 rows=57
width=0) (actual time=2.403..2.403 *rows=0.00* loops=10000)
                       Filter: (t.x = x)
                       Rows Removed by Filter: 10000
                       Buffers: shared hit=450000
Planning:
   Buffers: shared hit=40 read=16
Planning Time: 0.487 ms
Execution Time: *68268.600 ms*

--
Regards,
Alena Rybakina
Postgres Professional

Attachments:

v4-0001-Teach-the-planner-to-convert-EXISTS-and-NOT-EXISTS-s.patchtext/x-patch; charset=UTF-8; name=v4-0001-Teach-the-planner-to-convert-EXISTS-and-NOT-EXISTS-s.patchDownload

From 947b5251bc2afb2dfb225316b796ae1e0915d957 Mon Sep 17 00:00:00 2001
From: Alena Rybakina <a.rybakina@postgrespro.ru>
Date: Tue, 3 Jun 2025 18:44:11 +0300
Subject: [PATCH] Teach the planner to convert EXISTS and NOT EXISTS subqueries
 into semi and anti joins. To do this, we put all potential expressions from
 the qual list and join list into the common list and check each expression
 one by one to see if they are suitable for transformation.

Authors: Alena Rybakina <lena.ribackina@yandex.ru>
Reviewed-by: Ranier Vilela <ranier.vf@gmail.com>, Ilia Evdokimov <ilya.evdokimov@tantorlabs.com>,
	     Peter Petrov <p.petrov@postgrespro.ru>
---
 src/backend/optimizer/plan/subselect.c  | 113 ++++-
 src/test/regress/expected/subselect.out | 637 ++++++++++++++++++++++++
 src/test/regress/sql/subselect.sql      | 367 ++++++++++++++
 3 files changed, 1095 insertions(+), 22 deletions(-)

diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index e7cb3fede66..d6d35d3430d 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -1458,6 +1458,10 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	int			varno;
 	Relids		clause_varnos;
 	Relids		upper_varnos;
+	ListCell *lc;
+	List *newWhere = NIL;
+	List *all_clauses = NIL;
+	bool upper_reference_exists = false;
 
 	Assert(sublink->subLinkType == EXISTS_SUBLINK);
 
@@ -1487,32 +1491,71 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	if (!simplify_EXISTS_query(root, subselect))
 		return NULL;
 
-	/*
-	 * Separate out the WHERE clause.  (We could theoretically also remove
-	 * top-level plain JOIN/ON clauses, but it's probably not worth the
-	 * trouble.)
-	 */
-	whereClause = subselect->jointree->quals;
+	if (subselect->jointree->quals)
+		all_clauses = lappend(all_clauses, subselect->jointree->quals);
+
 	subselect->jointree->quals = NULL;
 
-	/*
-	 * The rest of the sub-select must not refer to any Vars of the parent
-	 * query.  (Vars of higher levels should be okay, though.)
-	 */
-	if (contain_vars_of_level((Node *) subselect, 1))
-		return NULL;
+	/* Gather all clauses in main list for the further consideration */
+	all_clauses = list_concat(all_clauses, subselect->jointree->fromlist);
 
 	/*
-	 * On the other hand, the WHERE clause must contain some Vars of the
-	 * parent query, else it's not gonna be a join.
+	 * We will able to remove top-level plain JOIN/ON clauses if they are not outer join.
 	 */
-	if (!contain_vars_of_level(whereClause, 1))
-		return NULL;
+	foreach (lc, all_clauses)
+	{
+		Node *je = ((Node *) lfirst(lc));
+
+		whereClause = copyObject(je);
+
+		if (IsA(whereClause, RangeTblRef))
+			continue;
+
+		if (IsA(whereClause, JoinExpr))
+		{
+			if (((JoinExpr *) whereClause)->jointype != JOIN_INNER)
+			{
+				/*
+	 			 * Clauses must not refer to any Vars of the parent
+	 			 * query.  (Vars of higher levels should be okay, though.)
+	 			 */
+				if (contain_vars_of_level(whereClause, 1))
+					return NULL;
+				else
+					continue;
+			}
+			else if (((JoinExpr *) whereClause)->quals != NULL)
+				whereClause = ((JoinExpr *) whereClause)->quals;
+		}
+
+		/*
+		* We don't risk optimizing if the WHERE clause is volatile, either.
+		*/
+		if (contain_volatile_functions(whereClause))
+			return NULL;
+
+		/*
+		* Clean up the WHERE clause by doing const-simplification etc on it.
+		*/
+		whereClause = eval_const_expressions(root, whereClause);
+		whereClause = (Node *) canonicalize_qual((Expr *) whereClause, false);
+
+		if(!IsA(whereClause, JoinExpr))
+			newWhere = lappend(newWhere, whereClause);
+		else
+			return NULL;
+
+		if (contain_vars_of_level((Node *) whereClause, 1))
+			upper_reference_exists = true;
+	}
+
+	list_free(all_clauses);
 
 	/*
-	 * We don't risk optimizing if the WHERE clause is volatile, either.
+	 * There are no WHERE clause containing some Vars of the
+	 * parent query, so it's not gonna be a join.
 	 */
-	if (contain_volatile_functions(whereClause))
+	if(!upper_reference_exists)
 		return NULL;
 
 	/*
@@ -1537,7 +1580,6 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	 */
 	rtoffset = list_length(parse->rtable);
 	OffsetVarNodes((Node *) subselect, rtoffset, 0);
-	OffsetVarNodes(whereClause, rtoffset, 0);
 
 	/*
 	 * Upper-level vars in subquery will now be one level closer to their
@@ -1545,7 +1587,9 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	 * becomes level zero.
 	 */
 	IncrementVarSublevelsUp((Node *) subselect, -1, 1);
-	IncrementVarSublevelsUp(whereClause, -1, 1);
+
+	OffsetVarNodes((Node *) newWhere, rtoffset, 0);
+	IncrementVarSublevelsUp((Node *) newWhere, -1, 1);
 
 	/*
 	 * Now that the WHERE clause is adjusted to match the parent query
@@ -1553,7 +1597,7 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	 * The ones <= rtoffset belong to the upper query; the ones > rtoffset do
 	 * not.
 	 */
-	clause_varnos = pull_varnos(root, whereClause);
+	clause_varnos = pull_varnos(root, (Node *) newWhere);
 	upper_varnos = NULL;
 	varno = -1;
 	while ((varno = bms_next_member(clause_varnos, varno)) >= 0)
@@ -1571,6 +1615,31 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	if (!bms_is_subset(upper_varnos, available_rels))
 		return NULL;
 
+	/*
+	 * In case of a successful attempt, replaces it with the correct condition.
+	 * We were sure that inner relations are independent, so we confidently
+	 * can replace their join condition on true.
+	 */
+	foreach(lc, subselect->jointree->fromlist)
+	{
+		Node *node = lfirst(lc);
+
+		if (IsA(node, RangeTblRef))
+			continue;
+
+		if ((IsA(node, JoinExpr) && ((JoinExpr *)node)->jointype != JOIN_INNER))
+			continue;
+
+		if (IsA(node, JoinExpr) && ((JoinExpr *) node)->quals != NULL)
+			((JoinExpr *) node)->quals = (Node *) makeConst(BOOLOID,
+																-1,
+																InvalidOid,
+																sizeof(bool),
+																(Datum) 1,
+																false,
+																true);
+	}
+
 	/*
 	 * Now we can attach the modified subquery rtable to the parent. This also
 	 * adds subquery's RTEPermissionInfos into the upper query.
@@ -1592,7 +1661,7 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 		result->rarg = (Node *) subselect->jointree;
 	result->usingClause = NIL;
 	result->join_using_alias = NULL;
-	result->quals = whereClause;
+	result->quals = (Node *) make_ands_explicit(newWhere);
 	result->alias = NULL;
 	result->rtindex = 0;		/* we don't need an RTE for it */
 
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 40d8056fcea..965c609b045 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -813,6 +813,643 @@ where exists (
   where road.name = ss.f1 );
 rollback;
 --
+-- Test case for exist sublink where we can consider some undependent expression
+-- with outer link
+--
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+   ->  Nested Loop
+         ->  Index Only Scan using tb_pkey on tb
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tc
+(6 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+   ->  Nested Loop
+         ->  Index Only Scan using tc_pkey on tc
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tb
+(6 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON 1 = 1
+  WHERE ta.id = tc.id
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+   ->  Nested Loop
+         ->  Index Only Scan using tc_pkey on tc
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tb
+(6 rows)
+
+-- Join compound expression
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+           AND ta.id = tb.id
+);
+             QUERY PLAN             
+------------------------------------
+ Hash Right Semi Join
+   Hash Cond: (tc.id = ta.id)
+   ->  Hash Join
+         Hash Cond: (tb.id = tc.id)
+         ->  Seq Scan on tb
+         ->  Hash
+               ->  Seq Scan on tc
+   ->  Hash
+         ->  Seq Scan on ta
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON tb.aval = tc.aid
+           AND tb.aval = ta1.id
+);
+                     QUERY PLAN                     
+----------------------------------------------------
+ Hash Join
+   Hash Cond: (ta1.id = tb.aval)
+   ->  Seq Scan on ta ta1
+   ->  Hash
+         ->  HashAggregate
+               Group Key: tb.aval
+               ->  Merge Join
+                     Merge Cond: (tb.aval = tc.aid)
+                     ->  Sort
+                           Sort Key: tb.aval
+                           ->  Seq Scan on tb
+                     ->  Sort
+                           Sort Key: tc.aid
+                           ->  Seq Scan on tc
+(14 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+JOIN tb ON true
+WHERE EXISTS (
+  SELECT 1
+  FROM tb tb1
+  JOIN tc ON ta.id = tb.id
+);
+             QUERY PLAN             
+------------------------------------
+ Nested Loop Semi Join
+   ->  Hash Join
+         Hash Cond: (ta.id = tb.id)
+         ->  Seq Scan on ta
+         ->  Hash
+               ->  Seq Scan on tb
+   ->  Nested Loop
+         ->  Seq Scan on tb tb1
+         ->  Materialize
+               ->  Seq Scan on tc
+(10 rows)
+
+-- Compound expression with const type or other type of expressions
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+           AND ta.id = 1
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Index Only Scan using ta_pkey on ta
+         Index Cond: (id = 1)
+   ->  Nested Loop
+         ->  Index Only Scan using tc_pkey on tc
+               Index Cond: (id = 1)
+         ->  Seq Scan on tb
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+           AND tb.id = 1
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Hash Right Semi Join
+   Hash Cond: (tc.id = ta.id)
+   ->  Nested Loop
+         ->  Index Only Scan using tb_pkey on tb
+               Index Cond: (id = 1)
+         ->  Seq Scan on tc
+   ->  Hash
+         ->  Seq Scan on ta
+(8 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  RIGHT JOIN tc ON ta.id = tc.id
+  WHERE ta.val = 1
+);
+                  QUERY PLAN                  
+----------------------------------------------
+ Seq Scan on ta
+   Filter: EXISTS(SubPlan 1)
+   SubPlan 1
+     ->  Result
+           One-Time Filter: (ta.val = 1)
+           ->  Nested Loop Left Join
+                 Join Filter: (ta.id = tc.id)
+                 ->  Seq Scan on tc
+                 ->  Materialize
+                       ->  Seq Scan on tb
+(10 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+           AND tb.aval = ANY ('{1}'::int[])
+);
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Hash Join
+   Hash Cond: (ta.id = tb.id)
+   ->  Seq Scan on ta
+   ->  Hash
+         ->  HashAggregate
+               Group Key: tb.id
+               ->  Nested Loop
+                     ->  Seq Scan on tc
+                     ->  Materialize
+                           ->  Seq Scan on tb
+                                 Filter: (aval = ANY ('{1}'::integer[]))
+(11 rows)
+
+-- Exists SubLink expression within expression
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tb ON ta.id = ta1.id
+           AND ta1.val = 1
+  WHERE EXISTS (
+    SELECT 1
+    FROM ta ta2
+    WHERE ta2.id = ta1.id
+  )
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta ta2
+         Filter: (val = 1)
+   ->  Nested Loop
+         ->  Index Only Scan using ta_pkey on ta
+               Index Cond: (id = ta2.id)
+         ->  Seq Scan on tb
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tb ON ta.val = ta1.id
+           AND ta1.id = 1
+  WHERE EXISTS (
+    SELECT 1
+    FROM ta ta2
+    WHERE ta2.id = ta.id
+  )
+);
+                  QUERY PLAN                   
+-----------------------------------------------
+ Nested Loop Semi Join
+   ->  Index Only Scan using ta_pkey on ta ta1
+         Index Cond: (id = 1)
+   ->  Nested Loop
+         ->  Seq Scan on tb
+         ->  Materialize
+               ->  Seq Scan on ta ta2
+                     Filter: (val = 1)
+(8 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  WHERE ta.id = tb.id
+    AND EXISTS (
+      SELECT 1
+      FROM tc
+      WHERE tc.id = tb.id
+        AND tc.aid + tb.aval > 0
+    )
+);
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Hash Semi Join
+   Hash Cond: (ta.id = tc.id)
+   ->  Seq Scan on ta
+   ->  Hash
+         ->  Hash Join
+               Hash Cond: (tb.id = tc.id)
+               Join Filter: ((tc.aid + tb.aval) > 0)
+               ->  Seq Scan on tb
+               ->  Hash
+                     ->  Seq Scan on tc
+(10 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  WHERE ta.id = tb.id
+    AND EXISTS (
+      SELECT 1
+      FROM tc
+      WHERE tc.id = tb.id
+        AND tc.aid + ta.val > 0
+    )
+);
+               QUERY PLAN               
+----------------------------------------
+ Hash Join
+   Hash Cond: (ta.id = tb.id)
+   Join Filter: EXISTS(SubPlan 1)
+   ->  Seq Scan on ta
+   ->  Hash
+         ->  Seq Scan on tb
+   SubPlan 1
+     ->  Index Scan using tc_pkey on tc
+           Index Cond: (id = tb.id)
+           Filter: ((aid + ta.val) > 0)
+(10 rows)
+
+-- Check with NULL and NOT NULL expressions
+ALTER TABLE ta ADD COLUMN is_active bool;
+UPDATE ta SET is_active = true;
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND COALESCE(ta.is_active, true)
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+         Filter: COALESCE(is_active, true)
+   ->  Nested Loop
+         ->  Index Only Scan using tb_pkey on tb
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tc
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM tb
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tc ON ta.id = tb.id
+         AND COALESCE(ta.is_active, true)
+);
+                            QUERY PLAN                             
+-------------------------------------------------------------------
+ Hash Join
+   Hash Cond: (tb.id = ta.id)
+   ->  Seq Scan on tb
+   ->  Hash
+         ->  HashAggregate
+               Group Key: ta.id
+               ->  Nested Loop
+                     ->  Seq Scan on tc
+                     ->  Materialize
+                           ->  Seq Scan on ta
+                                 Filter: COALESCE(is_active, true)
+(11 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND CASE
+               WHEN ta.is_active THEN true
+               ELSE false
+             END = true
+);
+                          QUERY PLAN                          
+--------------------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+         Filter: CASE WHEN is_active THEN true ELSE false END
+   ->  Nested Loop
+         ->  Index Only Scan using tb_pkey on tb
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tc
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM tb
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tc ON ta.id = tb.id
+         AND CASE
+               WHEN ta.is_active THEN true
+               ELSE false
+             END = true
+);
+                                      QUERY PLAN                                      
+--------------------------------------------------------------------------------------
+ Hash Join
+   Hash Cond: (tb.id = ta.id)
+   ->  Seq Scan on tb
+   ->  Hash
+         ->  HashAggregate
+               Group Key: ta.id
+               ->  Nested Loop
+                     ->  Seq Scan on tc
+                     ->  Materialize
+                           ->  Seq Scan on ta
+                                 Filter: CASE WHEN is_active THEN true ELSE false END
+(11 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND ta.is_active
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+         Filter: is_active
+   ->  Nested Loop
+         ->  Index Only Scan using tb_pkey on tb
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tc
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND ta.is_active IS NOT NULL
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+         Filter: (is_active IS NOT NULL)
+   ->  Nested Loop
+         ->  Index Only Scan using tb_pkey on tb
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tc
+(7 rows)
+
+-- Disabled pull up because it is applcapable for INNER JOIN connection
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  RIGHT JOIN tc ON ta.id = tc.id
+);
+               QUERY PLAN               
+----------------------------------------
+ Seq Scan on ta
+   Filter: EXISTS(SubPlan 1)
+   SubPlan 1
+     ->  Nested Loop Left Join
+           Join Filter: (ta.id = tc.id)
+           ->  Seq Scan on tc
+           ->  Materialize
+                 ->  Seq Scan on tb
+(8 rows)
+
+-- Disable pull-up due to lack of the outer var
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON tc.id = tb.id
+);
+                    QUERY PLAN                     
+---------------------------------------------------
+ Result
+   One-Time Filter: (InitPlan 1).col1
+   InitPlan 1
+     ->  Nested Loop
+           ->  Seq Scan on tb
+           ->  Index Only Scan using tc_pkey on tc
+                 Index Cond: (id = tb.id)
+   ->  Seq Scan on ta
+(8 rows)
+
+CREATE TABLE td (id int, tc_id bytea, val int);
+INSERT INTO td
+SELECT g.id, 'Test1'::bytea AS tc_id, 6 AS val
+FROM generate_series(1, 25) AS g(id)
+UNION ALL
+SELECT g.id, 'Test2'::bytea AS tc_id, 7 AS val
+FROM generate_series(26, 50) AS g(id)
+UNION ALL
+SELECT g.id, 'Test4'::bytea AS tc_id, 6 AS val
+FROM generate_series(51, 75) AS g(id)
+UNION ALL
+SELECT g.id, 'Test5'::bytea AS tc_id, 7 AS val
+FROM generate_series(76, 100) AS g(id);
+CREATE TABLE te (id int, tc_id bytea, val int);
+INSERT INTO te SELECT * FROM td;
+EXPLAIN (COSTS OFF)
+SELECT t1.*
+FROM ta t1
+WHERE EXISTS (
+  SELECT 1
+  FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+  JOIN tb t2 ON t2.id = t1.id
+  WHERE EXISTS (
+    SELECT 1
+    FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+    JOIN td t3 ON t3.tc_id IN ('Test1'::bytea, 'Test2'::bytea)
+    WHERE EXISTS (
+      SELECT 1
+      FROM te t4
+      WHERE t4.tc_id = t3.tc_id
+        AND t4.val = t2.aval
+    ) = EXISTS (
+      SELECT 1
+      FROM tc t5
+      WHERE t5.id = t3.id
+    )
+  )
+);
+                                                                      QUERY PLAN                                                                       
+-------------------------------------------------------------------------------------------------------------------------------------------------------
+ Merge Semi Join
+   Merge Cond: (t1.id = t2.id)
+   ->  Index Scan using ta_pkey on ta t1
+   ->  Nested Loop Semi Join
+         Join Filter: ((ANY ((t3.tc_id = (hashed SubPlan 2).col1) AND (t2.aval = (hashed SubPlan 2).col2))) = (ANY (t3.id = (hashed SubPlan 4).col1)))
+         ->  Index Scan using tb_pkey on tb t2
+         ->  Materialize
+               ->  Seq Scan on td t3
+                     Filter: (tc_id = ANY ('{"\\x5465737431","\\x5465737432"}'::bytea[]))
+         SubPlan 2
+           ->  Seq Scan on te t4
+         SubPlan 4
+           ->  Seq Scan on tc t5
+(13 rows)
+
+DROP TABLE td, te;
+CREATE TABLE tst1 (id int, mes varchar(20));
+CREATE TABLE tst2 (id int, tst1_id int, type_id int);
+CREATE TABLE tst3 (id bytea);
+CREATE TABLE tst4 (id int, tst3_id bytea, type_id int);
+CREATE TABLE tst5 (id int, tst3_id bytea, type_id int);
+INSERT INTO tst1 VALUES (1, 'test1');
+INSERT INTO tst1 VALUES (2, 'test2');
+INSERT INTO tst1 VALUES (3, 'test3');
+INSERT INTO tst2 VALUES (1, 2, 7);
+INSERT INTO tst2 VALUES (1, 2, 6);
+INSERT INTO tst2 VALUES (2, 3, 7);
+INSERT INTO tst3
+SELECT ('Test' || g.id)::bytea AS id
+   FROM generate_series(1, 5) AS g(id);
+INSERT INTO tst4
+(SELECT g.id, 'Test1'::bytea AS tst3_id, 6 AS type_id
+   FROM generate_series(1, 25) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test2'::bytea AS tst3_id, 7 AS type_id
+   FROM generate_series(26, 50) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test4'::bytea AS tst3_id, 6 AS type_id
+   FROM generate_series(51, 75) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test5'::bytea AS tst3_id, 7 AS type_id
+   FROM generate_series(76, 100) AS g(id));
+INSERT INTO tst5 SELECT * FROM tst4;
+VACUUM (ANALYZE) tst1, tst2, tst3, tst4, tst5;
+-- Case with two exists in OpExpr, in the first one t3.id is the reference to the parent query
+-- and t2.type-id is the reference to grandparent query
+EXPLAIN (COSTS OFF)
+SELECT t1.*
+FROM tst1 t1
+WHERE EXISTS (
+  SELECT 1
+  FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+  JOIN tst2 t2 ON t2.tst1_id = t1.id
+  WHERE EXISTS (
+    SELECT 1
+    FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+    JOIN tst3 t3 ON t3.id IN ('Test1'::bytea, 'Test2'::bytea)
+    WHERE EXISTS (
+      SELECT 1
+      FROM tst4 t4
+      WHERE t4.tst3_id = t3.id
+        AND t4.type_id = t2.type_id
+    ) = EXISTS (
+      SELECT 1
+      FROM tst5 t5
+      WHERE t5.tst3_id = t3.id
+    )
+  )
+);
+                                      QUERY PLAN                                       
+---------------------------------------------------------------------------------------
+ Hash Right Semi Join
+   Hash Cond: (t2.tst1_id = t1.id)
+   ->  Nested Loop Semi Join
+         Join Filter: (EXISTS(SubPlan 1) = EXISTS(SubPlan 3))
+         ->  Seq Scan on tst2 t2
+         ->  Materialize
+               ->  Seq Scan on tst3 t3
+                     Filter: (id = ANY ('{"\\x5465737431","\\x5465737432"}'::bytea[]))
+         SubPlan 1
+           ->  Seq Scan on tst4 t4
+                 Filter: ((tst3_id = t3.id) AND (type_id = t2.type_id))
+         SubPlan 3
+           ->  Seq Scan on tst5 t5
+                 Filter: (tst3_id = t3.id)
+   ->  Hash
+         ->  Seq Scan on tst1 t1
+(16 rows)
+
+DROP TABLE tst1, tst2, tst3, tst4, tst5;
 -- Test case for sublinks pushed down into subselects via join alias expansion
 --
 select
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql
index fec38ef85a6..84f0f2101bb 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -440,6 +440,373 @@ where exists (
 rollback;
 
 --
+-- Test case for exist sublink where we can consider some undependent expression
+-- with outer link
+--
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON 1 = 1
+  WHERE ta.id = tc.id
+);
+
+-- Join compound expression
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+           AND ta.id = tb.id
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON tb.aval = tc.aid
+           AND tb.aval = ta1.id
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+JOIN tb ON true
+WHERE EXISTS (
+  SELECT 1
+  FROM tb tb1
+  JOIN tc ON ta.id = tb.id
+);
+
+-- Compound expression with const type or other type of expressions
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+           AND ta.id = 1
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+           AND tb.id = 1
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  RIGHT JOIN tc ON ta.id = tc.id
+  WHERE ta.val = 1
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+           AND tb.aval = ANY ('{1}'::int[])
+);
+
+-- Exists SubLink expression within expression
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tb ON ta.id = ta1.id
+           AND ta1.val = 1
+  WHERE EXISTS (
+    SELECT 1
+    FROM ta ta2
+    WHERE ta2.id = ta1.id
+  )
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tb ON ta.val = ta1.id
+           AND ta1.id = 1
+  WHERE EXISTS (
+    SELECT 1
+    FROM ta ta2
+    WHERE ta2.id = ta.id
+  )
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  WHERE ta.id = tb.id
+    AND EXISTS (
+      SELECT 1
+      FROM tc
+      WHERE tc.id = tb.id
+        AND tc.aid + tb.aval > 0
+    )
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  WHERE ta.id = tb.id
+    AND EXISTS (
+      SELECT 1
+      FROM tc
+      WHERE tc.id = tb.id
+        AND tc.aid + ta.val > 0
+    )
+);
+
+-- Check with NULL and NOT NULL expressions
+ALTER TABLE ta ADD COLUMN is_active bool;
+UPDATE ta SET is_active = true;
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND COALESCE(ta.is_active, true)
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM tb
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tc ON ta.id = tb.id
+         AND COALESCE(ta.is_active, true)
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND CASE
+               WHEN ta.is_active THEN true
+               ELSE false
+             END = true
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM tb
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tc ON ta.id = tb.id
+         AND CASE
+               WHEN ta.is_active THEN true
+               ELSE false
+             END = true
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND ta.is_active
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND ta.is_active IS NOT NULL
+);
+
+
+-- Disabled pull up because it is applcapable for INNER JOIN connection
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  RIGHT JOIN tc ON ta.id = tc.id
+);
+
+-- Disable pull-up due to lack of the outer var
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON tc.id = tb.id
+);
+
+CREATE TABLE td (id int, tc_id bytea, val int);
+
+INSERT INTO td
+SELECT g.id, 'Test1'::bytea AS tc_id, 6 AS val
+FROM generate_series(1, 25) AS g(id)
+
+UNION ALL
+
+SELECT g.id, 'Test2'::bytea AS tc_id, 7 AS val
+FROM generate_series(26, 50) AS g(id)
+
+UNION ALL
+
+SELECT g.id, 'Test4'::bytea AS tc_id, 6 AS val
+FROM generate_series(51, 75) AS g(id)
+
+UNION ALL
+
+SELECT g.id, 'Test5'::bytea AS tc_id, 7 AS val
+FROM generate_series(76, 100) AS g(id);
+
+CREATE TABLE te (id int, tc_id bytea, val int);
+INSERT INTO te SELECT * FROM td;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.*
+FROM ta t1
+WHERE EXISTS (
+  SELECT 1
+  FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+  JOIN tb t2 ON t2.id = t1.id
+  WHERE EXISTS (
+    SELECT 1
+    FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+    JOIN td t3 ON t3.tc_id IN ('Test1'::bytea, 'Test2'::bytea)
+    WHERE EXISTS (
+      SELECT 1
+      FROM te t4
+      WHERE t4.tc_id = t3.tc_id
+        AND t4.val = t2.aval
+    ) = EXISTS (
+      SELECT 1
+      FROM tc t5
+      WHERE t5.id = t3.id
+    )
+  )
+);
+
+DROP TABLE td, te;
+
+CREATE TABLE tst1 (id int, mes varchar(20));
+CREATE TABLE tst2 (id int, tst1_id int, type_id int);
+CREATE TABLE tst3 (id bytea);
+CREATE TABLE tst4 (id int, tst3_id bytea, type_id int);
+CREATE TABLE tst5 (id int, tst3_id bytea, type_id int);
+
+INSERT INTO tst1 VALUES (1, 'test1');
+INSERT INTO tst1 VALUES (2, 'test2');
+INSERT INTO tst1 VALUES (3, 'test3');
+INSERT INTO tst2 VALUES (1, 2, 7);
+INSERT INTO tst2 VALUES (1, 2, 6);
+INSERT INTO tst2 VALUES (2, 3, 7);
+
+INSERT INTO tst3
+SELECT ('Test' || g.id)::bytea AS id
+   FROM generate_series(1, 5) AS g(id);
+
+INSERT INTO tst4
+(SELECT g.id, 'Test1'::bytea AS tst3_id, 6 AS type_id
+   FROM generate_series(1, 25) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test2'::bytea AS tst3_id, 7 AS type_id
+   FROM generate_series(26, 50) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test4'::bytea AS tst3_id, 6 AS type_id
+   FROM generate_series(51, 75) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test5'::bytea AS tst3_id, 7 AS type_id
+   FROM generate_series(76, 100) AS g(id));
+INSERT INTO tst5 SELECT * FROM tst4;
+
+VACUUM (ANALYZE) tst1, tst2, tst3, tst4, tst5;
+
+-- Case with two exists in OpExpr, in the first one t3.id is the reference to the parent query
+-- and t2.type-id is the reference to grandparent query
+EXPLAIN (COSTS OFF)
+SELECT t1.*
+FROM tst1 t1
+WHERE EXISTS (
+  SELECT 1
+  FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+  JOIN tst2 t2 ON t2.tst1_id = t1.id
+  WHERE EXISTS (
+    SELECT 1
+    FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+    JOIN tst3 t3 ON t3.id IN ('Test1'::bytea, 'Test2'::bytea)
+    WHERE EXISTS (
+      SELECT 1
+      FROM tst4 t4
+      WHERE t4.tst3_id = t3.id
+        AND t4.type_id = t2.type_id
+    ) = EXISTS (
+      SELECT 1
+      FROM tst5 t5
+      WHERE t5.tst3_id = t3.id
+    )
+  )
+);
+
+DROP TABLE tst1, tst2, tst3, tst4, tst5;
+
 -- Test case for sublinks pushed down into subselects via join alias expansion
 --
 
-- 
2.34.1

#13

Ilia Evdokimov

ilya.evdokimov@tantorlabs.com

4 months ago

In reply to: Alena Rybakina (#12)

1 attachment(s)

Re: pull-up subquery if JOIN-ON contains refs to upper-query

On 04.06.2025 13:40, Alena Rybakina wrote:

Do you have any ideas on how to solve this problem? So far, the only
approach I see is to try an alternative plan but I'm still learning this.

Hi,

I've reviewed this patch, and I have suggestion about the approach.

Currently, the patch extends 'convert_EXISTS_sublick_to_join' with quite
complex logic (clause collection, volatile checks, rewriting join quals,
etc). While it works, the amount of branching and special cases makes
the function harder to follow.

Looking at the logic, it seems that a large part of the complexity comes
from trying to directly adapt 'convert_EXISTS_sublink_to_join' instead
of factoring out a dedicated path. An alternative would be to introduce
a separate function *'convert_EXISTS_sublink_to_lateral_join' *- with a
similar API to 'convert_ANY_sublink_to_join'. Such a function can focus
only on the EXISTS-to-join case, while keeping the existing function
shorter and easier to reason about.

I even made some first rough sketches of this approach (not a finished
patch, just an outline). Of course, it would still need proper
adaptation, but I think it demonstrates that the overall structure can
be kept simpler.

What do you think about refactoring in this direction?

--
Best regards,
Ilia Evdokimov,
Tantor Labs LLC,
https://tantorlabs.com

Attachments:

convert_EXISTS_sublink_to_lateral_join.patchtext/x-patch; charset=UTF-8; name=convert_EXISTS_sublink_to_lateral_join.patchDownload

diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index d71ed958e31..6dbbc3d9b72 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -104,6 +104,96 @@ static Bitmapset *finalize_plan(PlannerInfo *root,
 static bool finalize_primnode(Node *node, finalize_primnode_context *context);
 static bool finalize_agg_primnode(Node *node, finalize_primnode_context *context);
 
+/*
+ * convert_EXISTS_sublink_to_lateral_join: 
+ * 		try to convert an EXISTS SubLink to a lateral join
+ *
+ * The API of this function is identical to convert_ANY_sublink_to_join's,
+ * except that we also support the case where the caller has found NOT EXISTS,
+ * so we need an additional input parameter "under_not".
+ */
+JoinExpr *
+convert_EXISTS_sublink_to_lateral_join(PlannerInfo *root, SubLink *sublink,
+							   bool under_not, Relids available_rels)
+{
+	JoinExpr   *result;
+	Query	   *parse = root->parse;
+	Query	   *subselect = (Query *) sublink->subselect;
+	int			rtindex;
+	ParseNamespaceItem *nsitem;
+	RangeTblEntry *rte;
+	RangeTblRef *rtr;
+	ParseState *pstate;
+	Relids		sub_ref_outer_relids;
+	bool		use_lateral;
+
+	Assert(sublink->subLinkType == EXISTS_SUBLINK);
+
+	/*
+	 * See if the subquery can be simplified based on the knowledge that it's
+	 * being used in EXISTS().  If we aren't able to get rid of its
+	 * targetlist, we have to fail, because the pullup operation leaves us
+	 * with noplace to evaluate the targetlist.
+	 */
+	if (!simplify_EXISTS_query(root, subselect))
+		return NULL;
+
+	/*
+	 * If the sub-select contains any Vars of the parent query, we treat it as
+	 * LATERAL.  (Vars from higher levels don't matter here.)
+	 */
+	sub_ref_outer_relids = pull_varnos_of_level(NULL, (Node *) subselect, 1);
+	use_lateral = !bms_is_empty(sub_ref_outer_relids);
+
+	/*
+	 * Can't convert if the sub-select contains parent-level Vars of relations
+	 * not in available_rels.
+	 */
+	if (!bms_is_subset(sub_ref_outer_relids, available_rels))
+		return NULL;
+
+	/* Create a dummy ParseState for addRangeTableEntryForSubquery */
+	pstate = make_parsestate(NULL);
+
+	/*
+	 * Okay, pull up the sub-select into upper range table.
+	 *
+	 * We rely here on the assumption that the outer query has no references
+	 * to the inner (necessarily true, other than the Vars that we build
+	 * below). Therefore this is a lot easier than what pull_up_subqueries has
+	 * to go through.
+	 */
+	nsitem = addRangeTableEntryForSubquery(pstate,
+										   subselect,
+										   makeAlias("EXISTS_subquery", NIL),
+										   use_lateral,
+										   false);
+	rte = nsitem->p_rte;
+	parse->rtable = lappend(parse->rtable, rte);
+	rtindex = list_length(parse->rtable);
+
+	/*
+	 * Form a RangeTblRef for the pulled-up sub-select.
+	 */
+	rtr = makeNode(RangeTblRef);
+	rtr->rtindex = rtindex;
+
+	/*
+	 * And finally, build the JoinExpr node.
+	 */
+	result = makeNode(JoinExpr);
+	result->jointype = under_not ? JOIN_ANTI : JOIN_SEMI;;
+	result->isNatural = false;
+	result->larg = NULL;		/* caller must fill this in */
+	result->rarg = (Node *) rtr;
+	result->usingClause = NIL;
+	result->join_using_alias = NULL;
+	result->quals = NULL;
+	result->alias = NULL;
+	result->rtindex = 0;		/* we don't need an RTE for it */
+
+	return result;
+}
 
 /*
  * Get the datatype/typmod/collation of the first column of the plan's output.
@@ -1462,6 +1552,9 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 
 	Assert(sublink->subLinkType == EXISTS_SUBLINK);
 
+	return convert_EXISTS_sublink_to_lateral_join(root, sublink, 
+										under_not, available_rels);
+
 	/*
 	 * Can't flatten if it contains WITH.  (We could arrange to pull up the
 	 * WITH into the parent query's cteList, but that risks changing the

#14

Alena Rybakina

a.rybakina@postgrespro.ru

4 months ago

In reply to: Ilia Evdokimov (#13)

1 attachment(s)

Re: pull-up subquery if JOIN-ON contains refs to upper-query

Hi! Thank you for your attention to this patch!

On 03.09.2025 00:07, Ilia Evdokimov wrote:

On 04.06.2025 13:40, Alena Rybakina wrote:

Do you have any ideas on how to solve this problem? So far, the only
approach I see is to try an alternative plan but I'm still learning this.

Hi,

I've reviewed this patch, and I have suggestion about the approach.

Currently, the patch extends 'convert_EXISTS_sublick_to_join' with
quite complex logic (clause collection, volatile checks, rewriting
join quals, etc). While it works, the amount of branching and special
cases makes the function harder to follow.

Looking at the logic, it seems that a large part of the complexity
comes from trying to directly adapt 'convert_EXISTS_sublink_to_join'
instead of factoring out a dedicated path. An alternative would be to
introduce a separate function
*'convert_EXISTS_sublink_to_lateral_join' *- with a similar API to
'convert_ANY_sublink_to_join'. Such a function can focus only on the
EXISTS-to-join case, while keeping the existing function shorter and
easier to reason about.

I even made some first rough sketches of this approach (not a finished
patch, just an outline). Of course, it would still need proper
adaptation, but I think it demonstrates that the overall structure can
be kept simpler.

What do you think about refactoring in this direction?

--
Best regards,
Ilia Evdokimov,
Tantor Labs LLC,
https://tantorlabs.com

I'll look at this. I need some time to consider it.

I'm working on this approach right now. I introduced mutator and made
the transformation if it is possible there but I need to fix some bugs.

Attachments:

pull_up.difftext/x-patch; charset=UTF-8; name=pull_up.diffDownload

diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index d71ed958e31..0d2820ca79f 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -1439,6 +1439,239 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	return result;
 }
 
+typedef struct HoistJoinQualsContext
+{
+	List *outer_clauses;   /* collect hoisted clauses */
+	Relids observed_nulltest_vars;
+} HoistJoinQualsContext;
+
+static Node *
+preprocess_quals(Node *node)
+{
+	/*
+	 * Run const-folding without planner context.
+	 *
+	 * IMPORTANT: Pass NULL as PlannerInfo here because we’re simplifying
+	 * a *subquery’s* quals before its rtable has been merged with the
+	 * parent. If we passed a non-NULL root, eval_const_expressions()
+	 * could perform root-dependent transforms (e.g., fold NullTest on Var
+	 * using var_is_nonnullable) against the *wrong* rangetable, risking
+	 * out-of-bounds RTE access. See eval_const_expressions()’s contract:
+	 * “root can be passed as NULL …” for exactly this use-case.
+	 */
+	node = eval_const_expressions(NULL, node);
+	node = (Node *) canonicalize_qual((Expr *) node, false);
+
+	node = (Node *) make_ands_implicit((Expr *) node);
+
+	return node;
+}
+
+static NullTest *
+make_nulltest(Var *var, NullTestType type)
+{
+	NullTest *nulltest = makeNode(NullTest);
+	nulltest->arg = (Expr *) var;
+	nulltest->nulltesttype = type;
+	nulltest->argisrow = false;
+	nulltest->location = -1;
+
+	return nulltest;
+}
+
+static bool
+simplicity_check_walker(Node *node, void *ctx)
+{
+	if (node == NULL)
+	{
+		return false;
+	}
+	else if(IsA(node, Var))
+		return true;
+	else if(IsA(node, Query))
+		return query_tree_walker((Query *) node,
+								 simplicity_check_walker,
+								 (void*) ctx,
+								 QTW_EXAMINE_RTES_BEFORE);
+
+	return expression_tree_walker(node, simplicity_check_walker,
+								  (void *) ctx);
+}
+
+static List *
+generate_not_null_exprs(List *list_expr, Relids *observed_vars)
+{
+	ListCell *lc;
+	List *result = NIL;
+
+	foreach(lc, list_expr)
+	{
+		Node *node = (Node *) lfirst(lc);
+
+		if (IsA(node, OpExpr))
+		{
+			Node *larg = get_leftop(node);
+			Node *rarg = get_rightop(node);
+
+			if (IsA(larg, RelabelType))
+				larg = (Node *) ((RelabelType *) larg)->arg;
+
+			if (IsA(rarg, RelabelType))
+				rarg = (Node *) ((RelabelType *) rarg)->arg;
+
+			if(IsA(larg, Var))
+			{
+				Var *var = (Var *) larg;
+				if (!bms_is_member(var->varno, *observed_vars) && var->varlevelsup == 1)
+				{
+					NullTest *nulltest = make_nulltest(var, IS_NOT_NULL);
+					result = lappend(result, nulltest);
+					*observed_vars = bms_add_member(*observed_vars, var->varno);
+					continue;
+				}
+			}
+
+			if(IsA(rarg, Var))
+			{
+				Var *var = (Var *) rarg;
+				if (!bms_is_member(var->varno, *observed_vars) && var->varlevelsup == 1)
+				{
+					NullTest *nulltest = make_nulltest(var, IS_NOT_NULL);
+					result = lappend(result, nulltest);
+					*observed_vars = bms_add_member(*observed_vars, var->varno);
+					continue;
+				}
+			}
+		}
+	}
+
+	return result;
+}
+
+static Node *
+hoist_parent_quals_jointree_mutator(Node *jtnode, HoistJoinQualsContext *context)
+{
+	if (jtnode == NULL)
+		return NULL;
+
+	if (IsA(jtnode, RangeTblRef))
+		return jtnode;  /* nothing to change */
+
+	if (IsA(jtnode, JoinExpr))
+	{
+		JoinExpr *j = (JoinExpr *) jtnode;
+		JoinExpr *newj = makeNode(JoinExpr);
+		ListCell *lc;
+		List *join_clauses = NIL;
+		Node *qual;
+		memcpy(newj, j, sizeof(JoinExpr));
+
+		/* Recurse into join inputs */
+		newj->larg = (Node *) hoist_parent_quals_jointree_mutator(j->larg, context);
+		newj->rarg = (Node *) hoist_parent_quals_jointree_mutator(j->rarg, context);
+
+		if(contain_volatile_functions(newj->quals) ||
+								newj->larg == NULL ||
+								newj->rarg == NULL)
+			return NULL;
+
+		qual = newj->quals;
+		qual = preprocess_quals(qual);
+
+		foreach(lc, (List *) qual)
+		{
+			Node *node = (Node *) lfirst(lc);
+
+			if (IsA(node, OpExpr))
+			{
+				if(simplicity_check_walker(get_leftop(node), NULL) &&
+						simplicity_check_walker(get_rightop(node), NULL))
+				{
+					join_clauses = lappend(join_clauses, node);
+					continue;
+				}
+			}
+			context->outer_clauses = lappend(context->outer_clauses, node);
+		}
+
+		/* Only touch INNER JOINs */
+		if ((j->jointype != JOIN_LEFT &&
+			 j->jointype != JOIN_RIGHT &&
+			 j->jointype != JOIN_FULL))  /* subquery vars */
+		{
+			List *null_tests;
+
+			if (join_clauses == NIL)  /* subquery vars */
+			{
+				newj->quals = (Node *) makeBoolConst(true, false);
+			}
+			else if(join_clauses != NIL && contain_vars_of_level((Node *) join_clauses, 1))
+			{
+				null_tests = generate_not_null_exprs(join_clauses, &context->observed_nulltest_vars);
+				context->outer_clauses = list_concat(context->outer_clauses, null_tests);
+				context->outer_clauses = list_concat(context->outer_clauses, join_clauses);
+				newj->quals = (Node *) makeBoolConst(true, false);
+			}
+			else
+			{
+				newj->quals = (Node *) make_ands_explicit(join_clauses);
+			}
+		}
+		else
+		{
+			if (contain_vars_of_level(j->quals, 1))
+				return NULL;
+		}
+
+		return (Node *) newj;
+	}
+
+	if (IsA(jtnode, FromExpr))
+	{
+		FromExpr *f = (FromExpr *) jtnode;
+		FromExpr *newf = makeNode(FromExpr);
+		ListCell *lc;
+		List *fromlist = NIL;
+
+		/* Recurse into fromlist */
+		memcpy(newf, f, sizeof(FromExpr));
+
+		/*
+		 * Process children, if any of their jointree contains Vars of the
+		 * parent query or quals of their JoinExpr contains volatile functions
+		 * then exit
+		 */
+		foreach(lc, newf->fromlist)
+		{
+			Node *fnode = hoist_parent_quals_jointree_mutator(lfirst(lc), context);
+
+			if (fnode == NULL)
+				return NULL;
+			fromlist = lappend(fromlist, fnode);
+		}
+
+		newf->fromlist = fromlist;
+
+		if(contain_volatile_functions(newf->quals))
+			return NULL;
+
+		if(newf->quals)
+		{
+			Node *qual = newf->quals;
+			/* Quals (WHERE clause) may still contain sublinks etc */
+			qual = preprocess_quals(qual);
+			context->outer_clauses = list_concat(context->outer_clauses, (List *) qual);
+			newf->quals = NULL;
+		}
+
+		return (Node *) newf;
+	}
+
+	return jtnode;  /* quiet compiler */
+}
+
+bool pull_up_with_joins = true;
+
 /*
  * convert_EXISTS_sublink_to_join: try to convert an EXISTS SubLink to a join
  *
@@ -1453,12 +1686,13 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	JoinExpr   *result;
 	Query	   *parse = root->parse;
 	Query	   *subselect = (Query *) sublink->subselect;
-	Node	   *whereClause;
+	Node	   *whereClause = NULL;
 	PlannerInfo subroot;
 	int			rtoffset;
 	int			varno;
 	Relids		clause_varnos;
 	Relids		upper_varnos;
+	List 	   *newWhere = NIL;
 
 	Assert(sublink->subLinkType == EXISTS_SUBLINK);
 
@@ -1488,145 +1722,269 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	if (!simplify_EXISTS_query(root, subselect))
 		return NULL;
 
-	/*
-	 * Separate out the WHERE clause.  (We could theoretically also remove
-	 * top-level plain JOIN/ON clauses, but it's probably not worth the
-	 * trouble.)
-	 */
-	whereClause = subselect->jointree->quals;
-	subselect->jointree->quals = NULL;
 
-	/*
-	 * The rest of the sub-select must not refer to any Vars of the parent
-	 * query.  (Vars of higher levels should be okay, though.)
-	 */
-	if (contain_vars_of_level((Node *) subselect, 1))
-		return NULL;
+	if(pull_up_with_joins)
+	{
+		HoistJoinQualsContext hjq_context = {NIL, NULL};
 
-	/*
-	 * On the other hand, the WHERE clause must contain some Vars of the
-	 * parent query, else it's not gonna be a join.
-	 */
-	if (!contain_vars_of_level(whereClause, 1))
-		return NULL;
+		subselect->jointree = (FromExpr * ) hoist_parent_quals_jointree_mutator((Node *) subselect->jointree, &hjq_context);
 
-	/*
-	 * We don't risk optimizing if the WHERE clause is volatile, either.
-	 */
-	if (contain_volatile_functions(whereClause))
-		return NULL;
+		if(subselect->jointree == NULL || hjq_context.outer_clauses == NIL)
+			return NULL;
 
-	/*
-	 * Scan the rangetable for relation RTEs and retrieve the necessary
-	 * catalog information for each relation.  Using this information, clear
-	 * the inh flag for any relation that has no children, collect not-null
-	 * attribute numbers for any relation that has column not-null
-	 * constraints, and expand virtual generated columns for any relation that
-	 * contains them.
-	 *
-	 * Note: we construct up an entirely dummy PlannerInfo for use here.  This
-	 * is fine because only the "glob" and "parse" links will be used in this
-	 * case.
-	 *
-	 * Note: we temporarily assign back the WHERE clause so that any virtual
-	 * generated column references within it can be expanded.  It should be
-	 * separated out again afterward.
-	 */
-	MemSet(&subroot, 0, sizeof(subroot));
-	subroot.type = T_PlannerInfo;
-	subroot.glob = root->glob;
-	subroot.parse = subselect;
-	subselect->jointree->quals = whereClause;
-	subselect = preprocess_relation_rtes(&subroot);
+		newWhere = hjq_context.outer_clauses;
 
-	/*
-	 * Now separate out the WHERE clause again.
-	 */
-	whereClause = subselect->jointree->quals;
-	subselect->jointree->quals = NULL;
+		bms_free(hjq_context.observed_nulltest_vars);
 
-	/*
-	 * The subquery must have a nonempty jointree, but we can make it so.
-	 */
-	replace_empty_jointree(subselect);
 
-	/*
-	 * Prepare to pull up the sub-select into top range table.
-	 *
-	 * We rely here on the assumption that the outer query has no references
-	 * to the inner (necessarily true). Therefore this is a lot easier than
-	 * what pull_up_subqueries has to go through.
-	 *
-	 * In fact, it's even easier than what convert_ANY_sublink_to_join has to
-	 * do.  The machinations of simplify_EXISTS_query ensured that there is
-	 * nothing interesting in the subquery except an rtable and jointree, and
-	 * even the jointree FromExpr no longer has quals.  So we can just append
-	 * the rtable to our own and use the FromExpr in our jointree. But first,
-	 * adjust all level-zero varnos in the subquery to account for the rtable
-	 * merger.
-	 */
-	rtoffset = list_length(parse->rtable);
-	OffsetVarNodes((Node *) subselect, rtoffset, 0);
-	OffsetVarNodes(whereClause, rtoffset, 0);
+		/*
+		* The subquery must have a nonempty jointree, but we can make it so.
+		*/
+		replace_empty_jointree(subselect);
 
-	/*
-	 * Upper-level vars in subquery will now be one level closer to their
-	 * parent than before; in particular, anything that had been level 1
-	 * becomes level zero.
-	 */
-	IncrementVarSublevelsUp((Node *) subselect, -1, 1);
-	IncrementVarSublevelsUp(whereClause, -1, 1);
+		/*
+		* Prepare to pull up the sub-select into top range table.
+		*
+		* We rely here on the assumption that the outer query has no references
+		* to the inner (necessarily true). Therefore this is a lot easier than
+		* what pull_up_subqueries has to go through.
+		*
+		* In fact, it's even easier than what convert_ANY_sublink_to_join has to
+		* do.  The machinations of simplify_EXISTS_query ensured that there is
+		* nothing interesting in the subquery except an rtable and jointree, and
+		* even the jointree FromExpr no longer has quals.  So we can just append
+		* the rtable to our own and use the FromExpr in our jointree. But first,
+		* adjust all level-zero varnos in the subquery to account for the rtable
+		* merger.
+		*/
+		rtoffset = list_length(parse->rtable);
+		OffsetVarNodes((Node *) subselect, rtoffset, 0);
 
-	/*
-	 * Now that the WHERE clause is adjusted to match the parent query
-	 * environment, we can easily identify all the level-zero rels it uses.
-	 * The ones <= rtoffset belong to the upper query; the ones > rtoffset do
-	 * not.
-	 */
-	clause_varnos = pull_varnos(root, whereClause);
-	upper_varnos = NULL;
-	varno = -1;
-	while ((varno = bms_next_member(clause_varnos, varno)) >= 0)
-	{
-		if (varno <= rtoffset)
-			upper_varnos = bms_add_member(upper_varnos, varno);
-	}
-	bms_free(clause_varnos);
-	Assert(!bms_is_empty(upper_varnos));
+		/*
+		* Upper-level vars in subquery will now be one level closer to their
+		* parent than before; in particular, anything that had been level 1
+		* becomes level zero.
+		*/
+		IncrementVarSublevelsUp((Node *) subselect, -1, 1);
 
-	/*
-	 * Now that we've got the set of upper-level varnos, we can make the last
-	 * check: only available_rels can be referenced.
-	 */
-	if (!bms_is_subset(upper_varnos, available_rels))
-		return NULL;
+		OffsetVarNodes((Node *) newWhere, rtoffset, 0);
+		IncrementVarSublevelsUp((Node *) newWhere, -1, 1);
 
-	/*
-	 * Now we can attach the modified subquery rtable to the parent. This also
-	 * adds subquery's RTEPermissionInfos into the upper query.
-	 */
-	CombineRangeTables(&parse->rtable, &parse->rteperminfos,
-					   subselect->rtable, subselect->rteperminfos);
+		/*
+		* Now that the WHERE clause is adjusted to match the parent query
+		* environment, we can easily identify all the level-zero rels it uses.
+		* The ones <= rtoffset belong to the upper query; the ones > rtoffset do
+		* not.
+		*/
+		clause_varnos = pull_varnos(root, (Node *) newWhere);
+		upper_varnos = NULL;
+		varno = -1;
+		while ((varno = bms_next_member(clause_varnos, varno)) >= 0)
+		{
+			if (varno <= rtoffset)
+				upper_varnos = bms_add_member(upper_varnos, varno);
+		}
+		bms_free(clause_varnos);
 
-	/*
-	 * And finally, build the JoinExpr node.
-	 */
-	result = makeNode(JoinExpr);
-	result->jointype = under_not ? JOIN_ANTI : JOIN_SEMI;
-	result->isNatural = false;
-	result->larg = NULL;		/* caller must fill this in */
-	/* flatten out the FromExpr node if it's useless */
-	if (list_length(subselect->jointree->fromlist) == 1)
-		result->rarg = (Node *) linitial(subselect->jointree->fromlist);
+		/*
+		* Now that we've got the set of upper-level varnos, we can make the last
+		* check: only available_rels can be referenced.
+		*/
+		if (!bms_is_empty(upper_varnos) && !bms_is_subset(upper_varnos, available_rels))
+			return NULL;
+
+		/*
+		* Now we can attach the modified subquery rtable to the parent. This also
+		* adds subquery's RTEPermissionInfos into the upper query.
+		*/
+		CombineRangeTables(&parse->rtable, &parse->rteperminfos,
+						subselect->rtable, subselect->rteperminfos);
+
+		/*
+		* And finally, build the JoinExpr node.
+		*/
+		result = makeNode(JoinExpr);
+		result->jointype = under_not ? JOIN_ANTI : JOIN_SEMI;
+		result->isNatural = false;
+		result->larg = NULL;		/* caller must fill this in */
+		/* flatten out the FromExpr node if it's useless */
+		if (list_length(subselect->jointree->fromlist) == 1)
+			result->rarg = (Node *) linitial(subselect->jointree->fromlist);
+		else
+			result->rarg = (Node *) subselect->jointree;
+		result->usingClause = NIL;
+		result->join_using_alias = NULL;
+		result->quals = (Node *) make_ands_explicit(newWhere);
+		result->alias = NULL;
+		result->rtindex = 0;		/* we don't need an RTE for it */
+
+		/*
+		* Scan the rangetable for relation RTEs and retrieve the necessary
+		* catalog information for each relation.  Using this information, clear
+		* the inh flag for any relation that has no children, collect not-null
+		* attribute numbers for any relation that has column not-null
+		* constraints, and expand virtual generated columns for any relation that
+		* contains them.
+		*
+		* Note: we construct up an entirely dummy PlannerInfo for use here.  This
+		* is fine because only the "glob" and "parse" links will be used in this
+		* case.
+		*
+		* Note: we temporarily assign back the WHERE clause so that any virtual
+		* generated column references within it can be expanded.  It should be
+		* separated out again afterward.
+		*/
+		MemSet(&subroot, 0, sizeof(subroot));
+		subroot.type = T_PlannerInfo;
+		subroot.glob = root->glob;
+		subroot.parse = subselect;
+		subselect->jointree->quals = result->quals;
+		subselect = preprocess_relation_rtes(&subroot);
+
+		return result;
+	}
 	else
-		result->rarg = (Node *) subselect->jointree;
-	result->usingClause = NIL;
-	result->join_using_alias = NULL;
-	result->quals = whereClause;
-	result->alias = NULL;
-	result->rtindex = 0;		/* we don't need an RTE for it */
+	{
+		/*
+		* Separate out the WHERE clause.  (We could theoretically also remove
+		* top-level plain JOIN/ON clauses, but it's probably not worth the
+		* trouble.)
+		*/
+		whereClause = subselect->jointree->quals;
+		subselect->jointree->quals = NULL;
 
-	return result;
+		/*
+		* The rest of the sub-select must not refer to any Vars of the parent
+		* query.  (Vars of higher levels should be okay, though.)
+		*/
+		if (contain_vars_of_level((Node *) subselect, 1))
+			return NULL;
+
+		/*
+		* On the other hand, the WHERE clause must contain some Vars of the
+		* parent query, else it's not gonna be a join.
+		*/
+		if (!contain_vars_of_level(whereClause, 1))
+			return NULL;
+
+		/*
+		* We don't risk optimizing if the WHERE clause is volatile, either.
+		*/
+		if (contain_volatile_functions(whereClause))
+			return NULL;
+
+		/*
+		* Scan the rangetable for relation RTEs and retrieve the necessary
+		* catalog information for each relation.  Using this information, clear
+		* the inh flag for any relation that has no children, collect not-null
+		* attribute numbers for any relation that has column not-null
+		* constraints, and expand virtual generated columns for any relation that
+		* contains them.
+		*
+		* Note: we construct up an entirely dummy PlannerInfo for use here.  This
+		* is fine because only the "glob" and "parse" links will be used in this
+		* case.
+		*
+		* Note: we temporarily assign back the WHERE clause so that any virtual
+		* generated column references within it can be expanded.  It should be
+		* separated out again afterward.
+		*/
+		MemSet(&subroot, 0, sizeof(subroot));
+		subroot.type = T_PlannerInfo;
+		subroot.glob = root->glob;
+		subroot.parse = subselect;
+		subselect->jointree->quals = whereClause;
+		subselect = preprocess_relation_rtes(&subroot);
+
+		/*
+		* Now separate out the WHERE clause again.
+		*/
+		whereClause = subselect->jointree->quals;
+		subselect->jointree->quals = NULL;
+
+		/*
+		* The subquery must have a nonempty jointree, but we can make it so.
+		*/
+		replace_empty_jointree(subselect);
+
+		/*
+		* Prepare to pull up the sub-select into top range table.
+		*
+		* We rely here on the assumption that the outer query has no references
+		* to the inner (necessarily true). Therefore this is a lot easier than
+		* what pull_up_subqueries has to go through.
+		*
+		* In fact, it's even easier than what convert_ANY_sublink_to_join has to
+		* do.  The machinations of simplify_EXISTS_query ensured that there is
+		* nothing interesting in the subquery except an rtable and jointree, and
+		* even the jointree FromExpr no longer has quals.  So we can just append
+		* the rtable to our own and use the FromExpr in our jointree. But first,
+		* adjust all level-zero varnos in the subquery to account for the rtable
+		* merger.
+		*/
+		rtoffset = list_length(parse->rtable);
+		OffsetVarNodes((Node *) subselect, rtoffset, 0);
+		OffsetVarNodes(whereClause, rtoffset, 0);
+
+		/*
+		* Upper-level vars in subquery will now be one level closer to their
+		* parent than before; in particular, anything that had been level 1
+		* becomes level zero.
+		*/
+		IncrementVarSublevelsUp((Node *) subselect, -1, 1);
+		IncrementVarSublevelsUp(whereClause, -1, 1);
+
+		/*
+		* Now that the WHERE clause is adjusted to match the parent query
+		* environment, we can easily identify all the level-zero rels it uses.
+		* The ones <= rtoffset belong to the upper query; the ones > rtoffset do
+		* not.
+		*/
+		clause_varnos = pull_varnos(root, whereClause);
+		upper_varnos = NULL;
+		varno = -1;
+		while ((varno = bms_next_member(clause_varnos, varno)) >= 0)
+		{
+			if (varno <= rtoffset)
+				upper_varnos = bms_add_member(upper_varnos, varno);
+		}
+		bms_free(clause_varnos);
+		Assert(!bms_is_empty(upper_varnos));
+
+		/*
+		* Now that we've got the set of upper-level varnos, we can make the last
+		* check: only available_rels can be referenced.
+		*/
+		if (!bms_is_subset(upper_varnos, available_rels))
+			return NULL;
+
+		/*
+		* Now we can attach the modified subquery rtable to the parent. This also
+		* adds subquery's RTEPermissionInfos into the upper query.
+		*/
+		CombineRangeTables(&parse->rtable, &parse->rteperminfos,
+						subselect->rtable, subselect->rteperminfos);
+
+		/*
+		* And finally, build the JoinExpr node.
+		*/
+		result = makeNode(JoinExpr);
+		result->jointype = under_not ? JOIN_ANTI : JOIN_SEMI;
+		result->isNatural = false;
+		result->larg = NULL;		/* caller must fill this in */
+		/* flatten out the FromExpr node if it's useless */
+		if (list_length(subselect->jointree->fromlist) == 1)
+			result->rarg = (Node *) linitial(subselect->jointree->fromlist);
+		else
+			result->rarg = (Node *) subselect->jointree;
+		result->usingClause = NIL;
+		result->join_using_alias = NULL;
+		result->quals = whereClause;
+		result->alias = NULL;
+		result->rtindex = 0;		/* we don't need an RTE for it */
+
+		return result;
+	}
 }
 
 /*
@@ -2959,7 +3317,7 @@ finalize_plan(PlannerInfo *root, Plan *plan,
 			break;
 
 		default:
-			elog(ERROR, "unrecognized node type: %d",
+			elog(PANIC, "unrecognized node type: %d",
 				 (int) nodeTag(plan));
 	}
 
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index f137129209f..0a6ea94c320 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -1060,6 +1060,17 @@ struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"pull_up_with_joins", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Enables the planner's use of exists pull-up with join expressions."),
+			NULL,
+			GUC_EXPLAIN
+		},
+		&pull_up_with_joins,
+		true,
+		NULL, NULL, NULL
+	},
+
 	{
 		{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
 			gettext_noop("Enables genetic query optimization."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index a9d8293474a..c43f2a1a2d2 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -427,6 +427,7 @@
 #enable_tidscan = on
 #enable_group_by_reordering = on
 #enable_distinct_reordering = on
+#pull_up_with_joins = on
 #enable_self_join_elimination = on
 
 # - Planner Cost Constants -
diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h
index 37bc13c2cbd..fa8db1362c7 100644
--- a/src/include/optimizer/optimizer.h
+++ b/src/include/optimizer/optimizer.h
@@ -112,6 +112,7 @@ typedef enum
 extern PGDLLIMPORT int debug_parallel_query;
 extern PGDLLIMPORT bool parallel_leader_participation;
 extern PGDLLIMPORT bool enable_distinct_reordering;
+extern PGDLLIMPORT bool pull_up_with_joins;
 
 extern struct PlannedStmt *planner(Query *parse, const char *query_string,
 								   int cursorOptions,
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 0563d0cd5a1..e2d78bc80b2 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -1019,6 +1019,802 @@ where exists (
   where road.name = ss.f1 );
 rollback;
 --
+-- Test case for exist sublink where we can consider some undependent expression
+-- with outer link
+--
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+   ->  Nested Loop
+         ->  Index Only Scan using tb_pkey on tb
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tc
+(6 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+   ->  Nested Loop
+         ->  Index Only Scan using tc_pkey on tc
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tb
+(6 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON 1 = 1
+  WHERE ta.id = tc.id
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+   ->  Nested Loop
+         ->  Index Only Scan using tc_pkey on tc
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tb
+(6 rows)
+
+-- Join compound expression
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+           AND ta.id = tb.id
+);
+             QUERY PLAN             
+------------------------------------
+ Hash Right Semi Join
+   Hash Cond: (tc.id = ta.id)
+   ->  Hash Join
+         Hash Cond: (tb.id = tc.id)
+         ->  Seq Scan on tb
+         ->  Hash
+               ->  Seq Scan on tc
+   ->  Hash
+         ->  Seq Scan on ta
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON tb.aval = tc.aid
+           AND tb.aval = ta1.id
+);
+                     QUERY PLAN                     
+----------------------------------------------------
+ Hash Join
+   Hash Cond: (ta1.id = tb.aval)
+   ->  Seq Scan on ta ta1
+   ->  Hash
+         ->  HashAggregate
+               Group Key: tb.aval
+               ->  Merge Join
+                     Merge Cond: (tb.aval = tc.aid)
+                     ->  Sort
+                           Sort Key: tb.aval
+                           ->  Seq Scan on tb
+                     ->  Sort
+                           Sort Key: tc.aid
+                           ->  Seq Scan on tc
+(14 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+JOIN tb ON true
+WHERE EXISTS (
+  SELECT 1
+  FROM tb tb1
+  JOIN tc ON ta.id = tb.id
+);
+             QUERY PLAN             
+------------------------------------
+ Nested Loop Semi Join
+   ->  Hash Join
+         Hash Cond: (ta.id = tb.id)
+         ->  Seq Scan on ta
+         ->  Hash
+               ->  Seq Scan on tb
+   ->  Nested Loop
+         ->  Seq Scan on tb tb1
+         ->  Materialize
+               ->  Seq Scan on tc
+(10 rows)
+
+-- Compound expression with const type or other type of expressions
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+           AND ta.id = 1
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Index Only Scan using ta_pkey on ta
+         Index Cond: (id = 1)
+   ->  Nested Loop
+         ->  Index Only Scan using tc_pkey on tc
+               Index Cond: (id = 1)
+         ->  Seq Scan on tb
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+           AND tb.id = 1
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Hash Right Semi Join
+   Hash Cond: (tc.id = ta.id)
+   ->  Nested Loop
+         ->  Index Only Scan using tb_pkey on tb
+               Index Cond: (id = 1)
+         ->  Seq Scan on tc
+   ->  Hash
+         ->  Seq Scan on ta
+(8 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  RIGHT JOIN tc ON ta.id = tc.id
+  WHERE ta.val = 1
+);
+                  QUERY PLAN                  
+----------------------------------------------
+ Seq Scan on ta
+   Filter: EXISTS(SubPlan 1)
+   SubPlan 1
+     ->  Result
+           One-Time Filter: (ta.val = 1)
+           ->  Nested Loop Left Join
+                 Join Filter: (ta.id = tc.id)
+                 ->  Seq Scan on tc
+                 ->  Materialize
+                       ->  Seq Scan on tb
+(10 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+           AND tb.aval = ANY ('{1}'::int[])
+);
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Hash Join
+   Hash Cond: (ta.id = tb.id)
+   ->  Seq Scan on ta
+   ->  Hash
+         ->  HashAggregate
+               Group Key: tb.id
+               ->  Nested Loop
+                     ->  Seq Scan on tc
+                     ->  Materialize
+                           ->  Seq Scan on tb
+                                 Filter: (aval = ANY ('{1}'::integer[]))
+(11 rows)
+
+-- Exists SubLink expression within expression
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tb ON ta.id = ta1.id
+           AND ta1.val = 1
+  WHERE EXISTS (
+    SELECT 1
+    FROM ta ta2
+    WHERE ta2.id = ta1.id
+  )
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta ta2
+         Filter: (val = 1)
+   ->  Nested Loop
+         ->  Index Only Scan using ta_pkey on ta
+               Index Cond: (id = ta2.id)
+         ->  Seq Scan on tb
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tb ON ta.val = ta1.id
+           AND ta1.id = 1
+  WHERE EXISTS (
+    SELECT 1
+    FROM ta ta2
+    WHERE ta2.id = ta.id
+  )
+);
+                  QUERY PLAN                   
+-----------------------------------------------
+ Nested Loop Semi Join
+   ->  Index Only Scan using ta_pkey on ta ta1
+         Index Cond: (id = 1)
+   ->  Nested Loop
+         ->  Seq Scan on tb
+         ->  Materialize
+               ->  Seq Scan on ta ta2
+                     Filter: (val = 1)
+(8 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  WHERE ta.id = tb.id
+    AND EXISTS (
+      SELECT 1
+      FROM tc
+      WHERE tc.id = tb.id
+        AND tc.aid + tb.aval > 0
+    )
+);
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Hash Semi Join
+   Hash Cond: (ta.id = tc.id)
+   ->  Seq Scan on ta
+   ->  Hash
+         ->  Hash Join
+               Hash Cond: (tb.id = tc.id)
+               Join Filter: ((tc.aid + tb.aval) > 0)
+               ->  Seq Scan on tb
+               ->  Hash
+                     ->  Seq Scan on tc
+(10 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  WHERE ta.id = tb.id
+    AND EXISTS (
+      SELECT 1
+      FROM tc
+      WHERE tc.id = tb.id
+        AND tc.aid + ta.val > 0
+    )
+);
+               QUERY PLAN               
+----------------------------------------
+ Hash Join
+   Hash Cond: (ta.id = tb.id)
+   Join Filter: EXISTS(SubPlan 1)
+   ->  Seq Scan on ta
+   ->  Hash
+         ->  Seq Scan on tb
+   SubPlan 1
+     ->  Index Scan using tc_pkey on tc
+           Index Cond: (id = tb.id)
+           Filter: ((aid + ta.val) > 0)
+(10 rows)
+
+-- Check with NULL and NOT NULL expressions
+ALTER TABLE ta ADD COLUMN is_active bool;
+UPDATE ta SET is_active = true;
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND COALESCE(ta.is_active, true)
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+         Filter: COALESCE(is_active, true)
+   ->  Nested Loop
+         ->  Index Only Scan using tb_pkey on tb
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tc
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM tb
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tc ON ta.id = tb.id
+         AND COALESCE(ta.is_active, true)
+);
+                            QUERY PLAN                             
+-------------------------------------------------------------------
+ Hash Join
+   Hash Cond: (tb.id = ta.id)
+   ->  Seq Scan on tb
+   ->  Hash
+         ->  HashAggregate
+               Group Key: ta.id
+               ->  Nested Loop
+                     ->  Seq Scan on tc
+                     ->  Materialize
+                           ->  Seq Scan on ta
+                                 Filter: COALESCE(is_active, true)
+(11 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND CASE
+               WHEN ta.is_active THEN true
+               ELSE false
+             END = true
+);
+                          QUERY PLAN                          
+--------------------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+         Filter: CASE WHEN is_active THEN true ELSE false END
+   ->  Nested Loop
+         ->  Index Only Scan using tb_pkey on tb
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tc
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM tb
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tc ON ta.id = tb.id
+         AND CASE
+               WHEN ta.is_active THEN true
+               ELSE false
+             END = true
+);
+                                      QUERY PLAN                                      
+--------------------------------------------------------------------------------------
+ Hash Join
+   Hash Cond: (tb.id = ta.id)
+   ->  Seq Scan on tb
+   ->  Hash
+         ->  HashAggregate
+               Group Key: ta.id
+               ->  Nested Loop
+                     ->  Seq Scan on tc
+                     ->  Materialize
+                           ->  Seq Scan on ta
+                                 Filter: CASE WHEN is_active THEN true ELSE false END
+(11 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND ta.is_active
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+         Filter: is_active
+   ->  Nested Loop
+         ->  Index Only Scan using tb_pkey on tb
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tc
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND ta.is_active IS NOT NULL
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+         Filter: (is_active IS NOT NULL)
+   ->  Nested Loop
+         ->  Index Only Scan using tb_pkey on tb
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tc
+(7 rows)
+
+-- Disabled pull up because it is applcapable for INNER JOIN connection
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  RIGHT JOIN tc ON ta.id = tc.id
+);
+               QUERY PLAN               
+----------------------------------------
+ Seq Scan on ta
+   Filter: EXISTS(SubPlan 1)
+   SubPlan 1
+     ->  Nested Loop Left Join
+           Join Filter: (ta.id = tc.id)
+           ->  Seq Scan on tc
+           ->  Materialize
+                 ->  Seq Scan on tb
+(8 rows)
+
+-- Disable pull-up due to lack of the outer var
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON tc.id = tb.id
+);
+                    QUERY PLAN                     
+---------------------------------------------------
+ Result
+   One-Time Filter: (InitPlan 1).col1
+   InitPlan 1
+     ->  Nested Loop
+           ->  Seq Scan on tb
+           ->  Index Only Scan using tc_pkey on tc
+                 Index Cond: (id = tb.id)
+   ->  Seq Scan on ta
+(8 rows)
+
+CREATE TABLE td (id int, tc_id bytea, val int);
+INSERT INTO td
+SELECT g.id, 'Test1'::bytea AS tc_id, 6 AS val
+FROM generate_series(1, 25) AS g(id)
+UNION ALL
+SELECT g.id, 'Test2'::bytea AS tc_id, 7 AS val
+FROM generate_series(26, 50) AS g(id)
+UNION ALL
+SELECT g.id, 'Test4'::bytea AS tc_id, 6 AS val
+FROM generate_series(51, 75) AS g(id)
+UNION ALL
+SELECT g.id, 'Test5'::bytea AS tc_id, 7 AS val
+FROM generate_series(76, 100) AS g(id);
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+     WHERE tb.id = ta.id AND
+           EXISTS
+        (SELECT 1
+           FROM tc
+          WHERE tc.id = tb.id)
+               );
+             QUERY PLAN             
+------------------------------------
+ Hash Right Semi Join
+   Hash Cond: (tc.id = ta.id)
+   ->  Hash Join
+         Hash Cond: (tb.id = tc.id)
+         ->  Seq Scan on tb
+         ->  Hash
+               ->  Seq Scan on tc
+   ->  Hash
+         ->  Seq Scan on ta
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+     WHERE tb.id = ta.id AND
+           EXISTS
+        (SELECT 1
+           FROM tc
+          WHERE tc.id = ta.id)
+               );
+             QUERY PLAN             
+------------------------------------
+ Hash Join
+   Hash Cond: (tc.id = tb.id)
+   ->  Hash Join
+         Hash Cond: (tc.id = ta.id)
+         ->  Seq Scan on tc
+         ->  Hash
+               ->  Seq Scan on ta
+   ->  Hash
+         ->  Seq Scan on tb
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+     WHERE tb.id = ta.id
+       AND EXISTS
+              (SELECT 1
+                 FROM tc
+               WHERE tb.id = ta.id)
+                );
+                 QUERY PLAN                 
+--------------------------------------------
+ Hash Join
+   Hash Cond: (tb.id = ta.id)
+   Join Filter: EXISTS(SubPlan 1)
+   ->  Seq Scan on tb
+   ->  Hash
+         ->  Seq Scan on ta
+   SubPlan 1
+     ->  Result
+           One-Time Filter: (tb.id = ta.id)
+           ->  Seq Scan on tc
+(10 rows)
+
+explain (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+        join tc on tc.id = ta.id
+          AND EXISTS (
+                SELECT 1
+                  FROM td
+                WHERE td.id = ta.id)
+                );
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Hash Join
+   Hash Cond: (ta.id = td.id)
+   ->  Nested Loop Semi Join
+         ->  Seq Scan on ta
+         ->  Nested Loop
+               ->  Index Only Scan using tc_pkey on tc
+                     Index Cond: (id = ta.id)
+               ->  Seq Scan on tb
+   ->  Hash
+         ->  HashAggregate
+               Group Key: td.id
+               ->  Seq Scan on td
+(12 rows)
+
+explain (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+        join tc on tc.id = ta.id
+          AND EXISTS (
+                SELECT 1
+                  FROM td
+                WHERE tb.id = ta.id)
+                );
+                    QUERY PLAN                    
+--------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+   ->  Nested Loop
+         Join Filter: EXISTS(SubPlan 1)
+         ->  Index Only Scan using tc_pkey on tc
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tb
+         SubPlan 1
+           ->  Result
+                 One-Time Filter: (tb.id = ta.id)
+                 ->  Seq Scan on td
+(11 rows)
+
+CREATE TABLE te (id int, tc_id bytea, val int);
+INSERT INTO te SELECT * FROM td;
+EXPLAIN (COSTS OFF)
+SELECT t1.*
+FROM ta t1
+WHERE EXISTS (
+  SELECT 1
+  FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+  JOIN tb t2 ON t2.id = t1.id
+  WHERE EXISTS (
+    SELECT 1
+    FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+    JOIN td t3 ON t3.tc_id IN ('Test1'::bytea, 'Test2'::bytea)
+    WHERE EXISTS (
+      SELECT 1
+      FROM te t4
+      WHERE t4.tc_id = t3.tc_id
+        AND t4.val = t2.aval
+    ) = EXISTS (
+      SELECT 1
+      FROM tc t5
+      WHERE t5.id = t3.id
+    )
+  )
+);
+                                                                      QUERY PLAN                                                                       
+-------------------------------------------------------------------------------------------------------------------------------------------------------
+ Merge Semi Join
+   Merge Cond: (t1.id = t2.id)
+   ->  Index Scan using ta_pkey on ta t1
+   ->  Nested Loop Semi Join
+         Join Filter: ((ANY ((t3.tc_id = (hashed SubPlan 2).col1) AND (t2.aval = (hashed SubPlan 2).col2))) = (ANY (t3.id = (hashed SubPlan 4).col1)))
+         ->  Index Scan using tb_pkey on tb t2
+         ->  Materialize
+               ->  Seq Scan on td t3
+                     Filter: (tc_id = ANY ('{"\\x5465737431","\\x5465737432"}'::bytea[]))
+         SubPlan 2
+           ->  Seq Scan on te t4
+         SubPlan 4
+           ->  Seq Scan on tc t5
+(13 rows)
+
+EXPLAIN
+SELECT ta.*
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON tc.id = tb.id
+         AND tb.id = ta.id
+  JOIN td ON td.id = tc.id
+);
+                                  QUERY PLAN                                  
+------------------------------------------------------------------------------
+ Hash Right Semi Join  (cost=181.20..219.35 rows=1100 width=9)
+   Hash Cond: (td.id = ta.id)
+   ->  Hash Join  (cost=121.70..150.02 rows=1200 width=12)
+         Hash Cond: (td.id = tc.id)
+         ->  Hash Join  (cost=60.85..86.01 rows=1200 width=8)
+               Hash Cond: (td.id = tb.id)
+               ->  Seq Scan on td  (cost=0.00..22.00 rows=1200 width=4)
+               ->  Hash  (cost=32.60..32.60 rows=2260 width=4)
+                     ->  Seq Scan on tb  (cost=0.00..32.60 rows=2260 width=4)
+         ->  Hash  (cost=32.60..32.60 rows=2260 width=4)
+               ->  Seq Scan on tc  (cost=0.00..32.60 rows=2260 width=4)
+   ->  Hash  (cost=32.00..32.00 rows=2200 width=9)
+         ->  Seq Scan on ta  (cost=0.00..32.00 rows=2200 width=9)
+(13 rows)
+
+DROP TABLE td, te;
+CREATE TABLE tst1 (id int, mes varchar(20));
+CREATE TABLE tst2 (id int, tst1_id int, type_id int);
+CREATE TABLE tst3 (id bytea);
+CREATE TABLE tst4 (id int, tst3_id bytea, type_id int);
+CREATE TABLE tst5 (id int, tst3_id bytea, type_id int);
+INSERT INTO tst1 VALUES (1, 'test1');
+INSERT INTO tst1 VALUES (2, 'test2');
+INSERT INTO tst1 VALUES (3, 'test3');
+INSERT INTO tst2 VALUES (1, 2, 7);
+INSERT INTO tst2 VALUES (1, 2, 6);
+INSERT INTO tst2 VALUES (2, 3, 7);
+INSERT INTO tst3
+SELECT ('Test' || g.id)::bytea AS id
+   FROM generate_series(1, 5) AS g(id);
+INSERT INTO tst4
+(SELECT g.id, 'Test1'::bytea AS tst3_id, 6 AS type_id
+   FROM generate_series(1, 25) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test2'::bytea AS tst3_id, 7 AS type_id
+   FROM generate_series(26, 50) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test4'::bytea AS tst3_id, 6 AS type_id
+   FROM generate_series(51, 75) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test5'::bytea AS tst3_id, 7 AS type_id
+   FROM generate_series(76, 100) AS g(id));
+INSERT INTO tst5 SELECT * FROM tst4;
+VACUUM (ANALYZE) tst1, tst2, tst3, tst4, tst5;
+-- Case with two exists in OpExpr, in the first one t3.id is the reference to the parent query
+-- and t2.type-id is the reference to grandparent query
+EXPLAIN (COSTS OFF)
+SELECT t1.*
+FROM tst1 t1
+WHERE EXISTS (
+  SELECT 1
+  FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+  JOIN tst2 t2 ON t2.tst1_id = t1.id
+  WHERE EXISTS (
+    SELECT 1
+    FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+    JOIN tst3 t3 ON t3.id IN ('Test1'::bytea, 'Test2'::bytea)
+    WHERE EXISTS (
+      SELECT 1
+      FROM tst4 t4
+      WHERE t4.tst3_id = t3.id
+        AND t4.type_id = t2.type_id
+    ) = EXISTS (
+      SELECT 1
+      FROM tst5 t5
+      WHERE t5.tst3_id = t3.id
+    )
+  )
+);
+                                      QUERY PLAN                                       
+---------------------------------------------------------------------------------------
+ Hash Right Semi Join
+   Hash Cond: (t2.tst1_id = t1.id)
+   ->  Nested Loop Semi Join
+         Join Filter: (EXISTS(SubPlan 1) = EXISTS(SubPlan 3))
+         ->  Seq Scan on tst2 t2
+         ->  Materialize
+               ->  Seq Scan on tst3 t3
+                     Filter: (id = ANY ('{"\\x5465737431","\\x5465737432"}'::bytea[]))
+         SubPlan 1
+           ->  Seq Scan on tst4 t4
+                 Filter: ((tst3_id = t3.id) AND (type_id = t2.type_id))
+         SubPlan 3
+           ->  Seq Scan on tst5 t5
+                 Filter: (tst3_id = t3.id)
+   ->  Hash
+         ->  Seq Scan on tst1 t1
+               Filter: (id IS NOT NULL)
+(17 rows)
+
+DROP TABLE tst1, tst2, tst3, tst4, tst5;
 -- Test case for sublinks pushed down into subselects via join alias expansion
 --
 select
diff --git a/src/test/regress/expected/updatable_views.out b/src/test/regress/expected/updatable_views.out
index 095df0a670c..d0762c1299e 100644
--- a/src/test/regress/expected/updatable_views.out
+++ b/src/test/regress/expected/updatable_views.out
@@ -3177,14 +3177,12 @@ EXPLAIN (costs off) INSERT INTO rw_view1 VALUES (2, 'New row 2');
          One-Time Filter: ((InitPlan 1).col1 IS NOT TRUE)
  
  Update on base_tbl
-   InitPlan 1
-     ->  Index Only Scan using base_tbl_pkey on base_tbl t
-           Index Cond: (id = 2)
-   ->  Result
-         One-Time Filter: (InitPlan 1).col1
+   ->  Nested Loop Semi Join
          ->  Index Scan using base_tbl_pkey on base_tbl
                Index Cond: (id = 2)
-(15 rows)
+         ->  Index Scan using base_tbl_pkey on base_tbl t
+               Index Cond: (id = 2)
+(13 rows)
 
 INSERT INTO rw_view1 VALUES (2, 'New row 2');
 SELECT * FROM base_tbl;
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql
index a6d276a115b..ca31e47c973 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -507,6 +507,449 @@ where exists (
 rollback;
 
 --
+-- Test case for exist sublink where we can consider some undependent expression
+-- with outer link
+--
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON 1 = 1
+  WHERE ta.id = tc.id
+);
+
+-- Join compound expression
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+           AND ta.id = tb.id
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON tb.aval = tc.aid
+           AND tb.aval = ta1.id
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+JOIN tb ON true
+WHERE EXISTS (
+  SELECT 1
+  FROM tb tb1
+  JOIN tc ON ta.id = tb.id
+);
+
+-- Compound expression with const type or other type of expressions
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+           AND ta.id = 1
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+           AND tb.id = 1
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  RIGHT JOIN tc ON ta.id = tc.id
+  WHERE ta.val = 1
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+           AND tb.aval = ANY ('{1}'::int[])
+);
+
+-- Exists SubLink expression within expression
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tb ON ta.id = ta1.id
+           AND ta1.val = 1
+  WHERE EXISTS (
+    SELECT 1
+    FROM ta ta2
+    WHERE ta2.id = ta1.id
+  )
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tb ON ta.val = ta1.id
+           AND ta1.id = 1
+  WHERE EXISTS (
+    SELECT 1
+    FROM ta ta2
+    WHERE ta2.id = ta.id
+  )
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  WHERE ta.id = tb.id
+    AND EXISTS (
+      SELECT 1
+      FROM tc
+      WHERE tc.id = tb.id
+        AND tc.aid + tb.aval > 0
+    )
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  WHERE ta.id = tb.id
+    AND EXISTS (
+      SELECT 1
+      FROM tc
+      WHERE tc.id = tb.id
+        AND tc.aid + ta.val > 0
+    )
+);
+
+-- Check with NULL and NOT NULL expressions
+ALTER TABLE ta ADD COLUMN is_active bool;
+UPDATE ta SET is_active = true;
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND COALESCE(ta.is_active, true)
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM tb
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tc ON ta.id = tb.id
+         AND COALESCE(ta.is_active, true)
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND CASE
+               WHEN ta.is_active THEN true
+               ELSE false
+             END = true
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM tb
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tc ON ta.id = tb.id
+         AND CASE
+               WHEN ta.is_active THEN true
+               ELSE false
+             END = true
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND ta.is_active
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND ta.is_active IS NOT NULL
+);
+
+
+-- Disabled pull up because it is applcapable for INNER JOIN connection
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  RIGHT JOIN tc ON ta.id = tc.id
+);
+
+-- Disable pull-up due to lack of the outer var
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON tc.id = tb.id
+);
+
+CREATE TABLE td (id int, tc_id bytea, val int);
+
+INSERT INTO td
+SELECT g.id, 'Test1'::bytea AS tc_id, 6 AS val
+FROM generate_series(1, 25) AS g(id)
+
+UNION ALL
+
+SELECT g.id, 'Test2'::bytea AS tc_id, 7 AS val
+FROM generate_series(26, 50) AS g(id)
+
+UNION ALL
+
+SELECT g.id, 'Test4'::bytea AS tc_id, 6 AS val
+FROM generate_series(51, 75) AS g(id)
+
+UNION ALL
+
+SELECT g.id, 'Test5'::bytea AS tc_id, 7 AS val
+FROM generate_series(76, 100) AS g(id);
+
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+     WHERE tb.id = ta.id AND
+           EXISTS
+        (SELECT 1
+           FROM tc
+          WHERE tc.id = tb.id)
+               );
+
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+     WHERE tb.id = ta.id AND
+           EXISTS
+        (SELECT 1
+           FROM tc
+          WHERE tc.id = ta.id)
+               );
+
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+     WHERE tb.id = ta.id
+       AND EXISTS
+              (SELECT 1
+                 FROM tc
+               WHERE tb.id = ta.id)
+                );
+
+explain (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+        join tc on tc.id = ta.id
+          AND EXISTS (
+                SELECT 1
+                  FROM td
+                WHERE td.id = ta.id)
+                );
+
+explain (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+        join tc on tc.id = ta.id
+          AND EXISTS (
+                SELECT 1
+                  FROM td
+                WHERE tb.id = ta.id)
+                );
+
+CREATE TABLE te (id int, tc_id bytea, val int);
+INSERT INTO te SELECT * FROM td;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.*
+FROM ta t1
+WHERE EXISTS (
+  SELECT 1
+  FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+  JOIN tb t2 ON t2.id = t1.id
+  WHERE EXISTS (
+    SELECT 1
+    FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+    JOIN td t3 ON t3.tc_id IN ('Test1'::bytea, 'Test2'::bytea)
+    WHERE EXISTS (
+      SELECT 1
+      FROM te t4
+      WHERE t4.tc_id = t3.tc_id
+        AND t4.val = t2.aval
+    ) = EXISTS (
+      SELECT 1
+      FROM tc t5
+      WHERE t5.id = t3.id
+    )
+  )
+);
+
+EXPLAIN
+SELECT ta.*
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON tc.id = tb.id
+         AND tb.id = ta.id
+  JOIN td ON td.id = tc.id
+);
+
+DROP TABLE td, te;
+
+CREATE TABLE tst1 (id int, mes varchar(20));
+CREATE TABLE tst2 (id int, tst1_id int, type_id int);
+CREATE TABLE tst3 (id bytea);
+CREATE TABLE tst4 (id int, tst3_id bytea, type_id int);
+CREATE TABLE tst5 (id int, tst3_id bytea, type_id int);
+
+INSERT INTO tst1 VALUES (1, 'test1');
+INSERT INTO tst1 VALUES (2, 'test2');
+INSERT INTO tst1 VALUES (3, 'test3');
+INSERT INTO tst2 VALUES (1, 2, 7);
+INSERT INTO tst2 VALUES (1, 2, 6);
+INSERT INTO tst2 VALUES (2, 3, 7);
+
+INSERT INTO tst3
+SELECT ('Test' || g.id)::bytea AS id
+   FROM generate_series(1, 5) AS g(id);
+
+INSERT INTO tst4
+(SELECT g.id, 'Test1'::bytea AS tst3_id, 6 AS type_id
+   FROM generate_series(1, 25) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test2'::bytea AS tst3_id, 7 AS type_id
+   FROM generate_series(26, 50) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test4'::bytea AS tst3_id, 6 AS type_id
+   FROM generate_series(51, 75) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test5'::bytea AS tst3_id, 7 AS type_id
+   FROM generate_series(76, 100) AS g(id));
+INSERT INTO tst5 SELECT * FROM tst4;
+
+VACUUM (ANALYZE) tst1, tst2, tst3, tst4, tst5;
+
+-- Case with two exists in OpExpr, in the first one t3.id is the reference to the parent query
+-- and t2.type-id is the reference to grandparent query
+EXPLAIN (COSTS OFF)
+SELECT t1.*
+FROM tst1 t1
+WHERE EXISTS (
+  SELECT 1
+  FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+  JOIN tst2 t2 ON t2.tst1_id = t1.id
+  WHERE EXISTS (
+    SELECT 1
+    FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+    JOIN tst3 t3 ON t3.id IN ('Test1'::bytea, 'Test2'::bytea)
+    WHERE EXISTS (
+      SELECT 1
+      FROM tst4 t4
+      WHERE t4.tst3_id = t3.id
+        AND t4.type_id = t2.type_id
+    ) = EXISTS (
+      SELECT 1
+      FROM tst5 t5
+      WHERE t5.tst3_id = t3.id
+    )
+  )
+);
+
+DROP TABLE tst1, tst2, tst3, tst4, tst5;
+
 -- Test case for sublinks pushed down into subselects via join alias expansion
 --

#15

Alena Rybakina

a.rybakina@postgrespro.ru

4 months ago

In reply to: Alena Rybakina (#14)

1 attachment(s)

Re: pull-up subquery if JOIN-ON contains refs to upper-query

Hi, I rebased the patch and fixed earlier problem.

Show quoted text

On 03.09.2025 00:20, Alena Rybakina wrote:

I'll look at this. I need some time to consider it.

I'm working on this approach right now. I introduced mutator and made
the transformation if it is possible there but I need to fix some bugs.

Attachments:

v5-0001-Enables-pull-up-of-EXISTS-subqueries-tha.patchtext/x-patch; charset=UTF-8; name=v5-0001-Enables-pull-up-of-EXISTS-subqueries-tha.patchDownload

From bea0cf815e2ac283a973deb31b2ca67bd66d7f8a Mon Sep 17 00:00:00 2001
From: Alena Rybakina <a.rybakina@postgrespro.ru>
Date: Fri, 5 Sep 2025 14:01:35 +0300
Subject: [PATCH] Enables pull-up of EXISTS subqueries that
 contain INNER joins, unlocking join reordering and earlier filtering. OUTER
 joins with outer references are safely excluded to preserve null-preserving
 semantics.

To achieve this, introduce a mutator that performs a single conservative
pass over the subquery jointree and stops transformation if subquery
contains volatile quals, or OUTER joins with outer references, since
hoisting would break null-preserving behavior.

On the other hand, OUTER joins without such references remain intact.
Add IS NOT NULL guards on hoisted outer Vars to avoid redundant null elements
that obviously won't result after join operation.
Replace affected subquery quals with true.
---
 src/backend/optimizer/plan/subselect.c        | 611 +++++++++++---
 src/backend/utils/misc/guc_tables.c           |  11 +
 src/backend/utils/misc/postgresql.conf.sample |   1 +
 src/include/optimizer/optimizer.h             |   1 +
 src/test/regress/expected/subselect.out       | 791 ++++++++++++++++++
 src/test/regress/expected/updatable_views.out |  10 +-
 src/test/regress/sql/subselect.sql            | 443 ++++++++++
 7 files changed, 1736 insertions(+), 132 deletions(-)

diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index d71ed958e31..33fc19f7abc 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -1439,6 +1439,239 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	return result;
 }
 
+typedef struct HoistJoinQualsContext
+{
+	List *outer_clauses;   /* collect hoisted clauses */
+	Relids observed_nulltest_vars;
+} HoistJoinQualsContext;
+
+static Node *
+preprocess_quals(Node *node)
+{
+	/*
+	 * Run const-folding without planner context.
+	 *
+	 * IMPORTANT: Pass NULL as PlannerInfo here because we’re simplifying
+	 * a *subquery’s* quals before its rtable has been merged with the
+	 * parent. If we passed a non-NULL root, eval_const_expressions()
+	 * could perform root-dependent transforms (e.g., fold NullTest on Var
+	 * using var_is_nonnullable) against the *wrong* rangetable, risking
+	 * out-of-bounds RTE access. See eval_const_expressions()’s contract:
+	 * “root can be passed as NULL …” for exactly this use-case.
+	 */
+	node = eval_const_expressions(NULL, node);
+	node = (Node *) canonicalize_qual((Expr *) node, false);
+
+	node = (Node *) make_ands_implicit((Expr *) node);
+
+	return node;
+}
+
+static NullTest *
+make_nulltest(Var *var, NullTestType type)
+{
+	NullTest *nulltest = makeNode(NullTest);
+	nulltest->arg = (Expr *) var;
+	nulltest->nulltesttype = type;
+	nulltest->argisrow = false;
+	nulltest->location = -1;
+
+	return nulltest;
+}
+
+static bool
+simplicity_check_walker(Node *node, void *ctx)
+{
+	if (node == NULL)
+	{
+		return false;
+	}
+	else if(IsA(node, Var))
+		return true;
+	else if(IsA(node, Query))
+		return query_tree_walker((Query *) node,
+								 simplicity_check_walker,
+								 (void*) ctx,
+								 QTW_EXAMINE_RTES_BEFORE);
+
+	return expression_tree_walker(node, simplicity_check_walker,
+								  (void *) ctx);
+}
+
+static List *
+generate_not_null_exprs(List *list_expr, Relids *observed_vars)
+{
+	ListCell *lc;
+	List *result = NIL;
+
+	foreach(lc, list_expr)
+	{
+		Node *node = (Node *) lfirst(lc);
+
+		if (IsA(node, OpExpr))
+		{
+			Node *larg = get_leftop(node);
+			Node *rarg = get_rightop(node);
+
+			if (IsA(larg, RelabelType))
+				larg = (Node *) ((RelabelType *) larg)->arg;
+
+			if (IsA(rarg, RelabelType))
+				rarg = (Node *) ((RelabelType *) rarg)->arg;
+
+			if(IsA(larg, Var))
+			{
+				Var *var = (Var *) larg;
+				if (!bms_is_member(var->varno, *observed_vars) && var->varlevelsup == 1)
+				{
+					NullTest *nulltest = make_nulltest(var, IS_NOT_NULL);
+					result = lappend(result, nulltest);
+					*observed_vars = bms_add_member(*observed_vars, var->varno);
+					continue;
+				}
+			}
+
+			if(IsA(rarg, Var))
+			{
+				Var *var = (Var *) rarg;
+				if (!bms_is_member(var->varno, *observed_vars) && var->varlevelsup == 1)
+				{
+					NullTest *nulltest = make_nulltest(var, IS_NOT_NULL);
+					result = lappend(result, nulltest);
+					*observed_vars = bms_add_member(*observed_vars, var->varno);
+					continue;
+				}
+			}
+		}
+	}
+
+	return result;
+}
+
+static Node *
+hoist_parent_quals_jointree_mutator(Node *jtnode, HoistJoinQualsContext *context)
+{
+	if (jtnode == NULL)
+		return NULL;
+
+	if (IsA(jtnode, RangeTblRef))
+		return jtnode;  /* nothing to change */
+
+	if (IsA(jtnode, JoinExpr))
+	{
+		JoinExpr *j = (JoinExpr *) jtnode;
+		JoinExpr *newj = makeNode(JoinExpr);
+		ListCell *lc;
+		List *join_clauses = NIL;
+		Node *qual;
+		memcpy(newj, j, sizeof(JoinExpr));
+
+		/* Recurse into join inputs */
+		newj->larg = (Node *) hoist_parent_quals_jointree_mutator(j->larg, context);
+		newj->rarg = (Node *) hoist_parent_quals_jointree_mutator(j->rarg, context);
+
+		if(contain_volatile_functions(newj->quals) ||
+								newj->larg == NULL ||
+								newj->rarg == NULL)
+			return NULL;
+
+		qual = newj->quals;
+		qual = preprocess_quals(qual);
+
+		foreach(lc, (List *) qual)
+		{
+			Node *node = (Node *) lfirst(lc);
+
+			if (IsA(node, OpExpr))
+			{
+				if(simplicity_check_walker(get_leftop(node), NULL) &&
+						simplicity_check_walker(get_rightop(node), NULL))
+				{
+					join_clauses = lappend(join_clauses, node);
+					continue;
+				}
+			}
+			context->outer_clauses = lappend(context->outer_clauses, node);
+		}
+
+		/* Only touch INNER JOINs */
+		if ((j->jointype != JOIN_LEFT &&
+			 j->jointype != JOIN_RIGHT &&
+			 j->jointype != JOIN_FULL))  /* subquery vars */
+		{
+			List *null_tests;
+
+			if (join_clauses == NIL)  /* subquery vars */
+			{
+				newj->quals = (Node *) makeBoolConst(true, false);
+			}
+			else if(join_clauses != NIL && contain_vars_of_level((Node *) join_clauses, 1))
+			{
+				null_tests = generate_not_null_exprs(join_clauses, &context->observed_nulltest_vars);
+				context->outer_clauses = list_concat(context->outer_clauses, null_tests);
+				context->outer_clauses = list_concat(context->outer_clauses, join_clauses);
+				newj->quals = (Node *) makeBoolConst(true, false);
+			}
+			else
+			{
+				newj->quals = (Node *) make_ands_explicit(join_clauses);
+			}
+		}
+		else
+		{
+			if (contain_vars_of_level(j->quals, 1))
+				return NULL;
+		}
+
+		return (Node *) newj;
+	}
+
+	if (IsA(jtnode, FromExpr))
+	{
+		FromExpr *f = (FromExpr *) jtnode;
+		FromExpr *newf = makeNode(FromExpr);
+		ListCell *lc;
+		List *fromlist = NIL;
+
+		/* Recurse into fromlist */
+		memcpy(newf, f, sizeof(FromExpr));
+
+		/*
+		 * Process children, if any of their jointree contains Vars of the
+		 * parent query or quals of their JoinExpr contains volatile functions
+		 * then exit
+		 */
+		foreach(lc, newf->fromlist)
+		{
+			Node *fnode = hoist_parent_quals_jointree_mutator(lfirst(lc), context);
+
+			if (fnode == NULL)
+				return NULL;
+			fromlist = lappend(fromlist, fnode);
+		}
+
+		newf->fromlist = fromlist;
+
+		if(contain_volatile_functions(newf->quals))
+			return NULL;
+
+		if(newf->quals)
+		{
+			Node *qual = newf->quals;
+			/* Quals (WHERE clause) may still contain sublinks etc */
+			qual = preprocess_quals(qual);
+			context->outer_clauses = list_concat(context->outer_clauses, (List *) qual);
+			newf->quals = NULL;
+		}
+
+		return (Node *) newf;
+	}
+
+	return jtnode;  /* quiet compiler */
+}
+
+bool pull_up_with_joins = true;
+
 /*
  * convert_EXISTS_sublink_to_join: try to convert an EXISTS SubLink to a join
  *
@@ -1453,12 +1686,13 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	JoinExpr   *result;
 	Query	   *parse = root->parse;
 	Query	   *subselect = (Query *) sublink->subselect;
-	Node	   *whereClause;
+	Node	   *whereClause = NULL;
 	PlannerInfo subroot;
 	int			rtoffset;
 	int			varno;
 	Relids		clause_varnos;
 	Relids		upper_varnos;
+	List 	   *newWhere = NIL;
 
 	Assert(sublink->subLinkType == EXISTS_SUBLINK);
 
@@ -1488,145 +1722,270 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	if (!simplify_EXISTS_query(root, subselect))
 		return NULL;
 
-	/*
-	 * Separate out the WHERE clause.  (We could theoretically also remove
-	 * top-level plain JOIN/ON clauses, but it's probably not worth the
-	 * trouble.)
-	 */
-	whereClause = subselect->jointree->quals;
-	subselect->jointree->quals = NULL;
 
-	/*
-	 * The rest of the sub-select must not refer to any Vars of the parent
-	 * query.  (Vars of higher levels should be okay, though.)
-	 */
-	if (contain_vars_of_level((Node *) subselect, 1))
-		return NULL;
+	if(pull_up_with_joins)
+	{
+		HoistJoinQualsContext hjq_context = {NIL, NULL};
 
-	/*
-	 * On the other hand, the WHERE clause must contain some Vars of the
-	 * parent query, else it's not gonna be a join.
-	 */
-	if (!contain_vars_of_level(whereClause, 1))
-		return NULL;
 
-	/*
-	 * We don't risk optimizing if the WHERE clause is volatile, either.
-	 */
-	if (contain_volatile_functions(whereClause))
-		return NULL;
+		/*
+		* Scan the rangetable for relation RTEs and retrieve the necessary
+		* catalog information for each relation.  Using this information, clear
+		* the inh flag for any relation that has no children, collect not-null
+		* attribute numbers for any relation that has column not-null
+		* constraints, and expand virtual generated columns for any relation that
+		* contains them.
+		*
+		* Note: we construct up an entirely dummy PlannerInfo for use here.  This
+		* is fine because only the "glob" and "parse" links will be used in this
+		* case.
+		*
+		* Note: we temporarily assign back the WHERE clause so that any virtual
+		* generated column references within it can be expanded.  It should be
+		* separated out again afterward.
+		*/
+		MemSet(&subroot, 0, sizeof(subroot));
+		subroot.type = T_PlannerInfo;
+		subroot.glob = root->glob;
+		subroot.parse = subselect;
+		subselect = preprocess_relation_rtes(&subroot);
+
+		subselect->jointree = (FromExpr * ) hoist_parent_quals_jointree_mutator((Node *) subselect->jointree, &hjq_context);
+
+		if(subselect->jointree == NULL || hjq_context.outer_clauses == NIL)
+			return NULL;
 
-	/*
-	 * Scan the rangetable for relation RTEs and retrieve the necessary
-	 * catalog information for each relation.  Using this information, clear
-	 * the inh flag for any relation that has no children, collect not-null
-	 * attribute numbers for any relation that has column not-null
-	 * constraints, and expand virtual generated columns for any relation that
-	 * contains them.
-	 *
-	 * Note: we construct up an entirely dummy PlannerInfo for use here.  This
-	 * is fine because only the "glob" and "parse" links will be used in this
-	 * case.
-	 *
-	 * Note: we temporarily assign back the WHERE clause so that any virtual
-	 * generated column references within it can be expanded.  It should be
-	 * separated out again afterward.
-	 */
-	MemSet(&subroot, 0, sizeof(subroot));
-	subroot.type = T_PlannerInfo;
-	subroot.glob = root->glob;
-	subroot.parse = subselect;
-	subselect->jointree->quals = whereClause;
-	subselect = preprocess_relation_rtes(&subroot);
+		newWhere = hjq_context.outer_clauses;
 
-	/*
-	 * Now separate out the WHERE clause again.
-	 */
-	whereClause = subselect->jointree->quals;
-	subselect->jointree->quals = NULL;
+		bms_free(hjq_context.observed_nulltest_vars);
 
-	/*
-	 * The subquery must have a nonempty jointree, but we can make it so.
-	 */
-	replace_empty_jointree(subselect);
+		subselect->jointree->quals = NULL;
 
-	/*
-	 * Prepare to pull up the sub-select into top range table.
-	 *
-	 * We rely here on the assumption that the outer query has no references
-	 * to the inner (necessarily true). Therefore this is a lot easier than
-	 * what pull_up_subqueries has to go through.
-	 *
-	 * In fact, it's even easier than what convert_ANY_sublink_to_join has to
-	 * do.  The machinations of simplify_EXISTS_query ensured that there is
-	 * nothing interesting in the subquery except an rtable and jointree, and
-	 * even the jointree FromExpr no longer has quals.  So we can just append
-	 * the rtable to our own and use the FromExpr in our jointree. But first,
-	 * adjust all level-zero varnos in the subquery to account for the rtable
-	 * merger.
-	 */
-	rtoffset = list_length(parse->rtable);
-	OffsetVarNodes((Node *) subselect, rtoffset, 0);
-	OffsetVarNodes(whereClause, rtoffset, 0);
+		/*
+		* The subquery must have a nonempty jointree, but we can make it so.
+		*/
+		replace_empty_jointree(subselect);
 
-	/*
-	 * Upper-level vars in subquery will now be one level closer to their
-	 * parent than before; in particular, anything that had been level 1
-	 * becomes level zero.
-	 */
-	IncrementVarSublevelsUp((Node *) subselect, -1, 1);
-	IncrementVarSublevelsUp(whereClause, -1, 1);
+		/*
+		* Prepare to pull up the sub-select into top range table.
+		*
+		* We rely here on the assumption that the outer query has no references
+		* to the inner (necessarily true). Therefore this is a lot easier than
+		* what pull_up_subqueries has to go through.
+		*
+		* In fact, it's even easier than what convert_ANY_sublink_to_join has to
+		* do.  The machinations of simplify_EXISTS_query ensured that there is
+		* nothing interesting in the subquery except an rtable and jointree, and
+		* even the jointree FromExpr no longer has quals.  So we can just append
+		* the rtable to our own and use the FromExpr in our jointree. But first,
+		* adjust all level-zero varnos in the subquery to account for the rtable
+		* merger.
+		*/
+		rtoffset = list_length(parse->rtable);
+		OffsetVarNodes((Node *) subselect, rtoffset, 0);
 
-	/*
-	 * Now that the WHERE clause is adjusted to match the parent query
-	 * environment, we can easily identify all the level-zero rels it uses.
-	 * The ones <= rtoffset belong to the upper query; the ones > rtoffset do
-	 * not.
-	 */
-	clause_varnos = pull_varnos(root, whereClause);
-	upper_varnos = NULL;
-	varno = -1;
-	while ((varno = bms_next_member(clause_varnos, varno)) >= 0)
-	{
-		if (varno <= rtoffset)
-			upper_varnos = bms_add_member(upper_varnos, varno);
-	}
-	bms_free(clause_varnos);
-	Assert(!bms_is_empty(upper_varnos));
+		/*
+		* Upper-level vars in subquery will now be one level closer to their
+		* parent than before; in particular, anything that had been level 1
+		* becomes level zero.
+		*/
+		IncrementVarSublevelsUp((Node *) subselect, -1, 1);
 
-	/*
-	 * Now that we've got the set of upper-level varnos, we can make the last
-	 * check: only available_rels can be referenced.
-	 */
-	if (!bms_is_subset(upper_varnos, available_rels))
-		return NULL;
+		OffsetVarNodes((Node *) newWhere, rtoffset, 0);
+		IncrementVarSublevelsUp((Node *) newWhere, -1, 1);
 
-	/*
-	 * Now we can attach the modified subquery rtable to the parent. This also
-	 * adds subquery's RTEPermissionInfos into the upper query.
-	 */
-	CombineRangeTables(&parse->rtable, &parse->rteperminfos,
-					   subselect->rtable, subselect->rteperminfos);
+		/*
+		* Now that the WHERE clause is adjusted to match the parent query
+		* environment, we can easily identify all the level-zero rels it uses.
+		* The ones <= rtoffset belong to the upper query; the ones > rtoffset do
+		* not.
+		*/
+		clause_varnos = pull_varnos(root, (Node *) newWhere);
+		upper_varnos = NULL;
+		varno = -1;
+		while ((varno = bms_next_member(clause_varnos, varno)) >= 0)
+		{
+			if (varno <= rtoffset)
+				upper_varnos = bms_add_member(upper_varnos, varno);
+		}
+		bms_free(clause_varnos);
 
-	/*
-	 * And finally, build the JoinExpr node.
-	 */
-	result = makeNode(JoinExpr);
-	result->jointype = under_not ? JOIN_ANTI : JOIN_SEMI;
-	result->isNatural = false;
-	result->larg = NULL;		/* caller must fill this in */
-	/* flatten out the FromExpr node if it's useless */
-	if (list_length(subselect->jointree->fromlist) == 1)
-		result->rarg = (Node *) linitial(subselect->jointree->fromlist);
+		/*
+		* Now that we've got the set of upper-level varnos, we can make the last
+		* check: only available_rels can be referenced.
+		*/
+		if (!bms_is_empty(upper_varnos) && !bms_is_subset(upper_varnos, available_rels))
+			return NULL;
+
+		/*
+		* Now we can attach the modified subquery rtable to the parent. This also
+		* adds subquery's RTEPermissionInfos into the upper query.
+		*/
+		CombineRangeTables(&parse->rtable, &parse->rteperminfos,
+						subselect->rtable, subselect->rteperminfos);
+
+		/*
+		* And finally, build the JoinExpr node.
+		*/
+		result = makeNode(JoinExpr);
+		result->jointype = under_not ? JOIN_ANTI : JOIN_SEMI;
+		result->isNatural = false;
+		result->larg = NULL;		/* caller must fill this in */
+		/* flatten out the FromExpr node if it's useless */
+		if (list_length(subselect->jointree->fromlist) == 1)
+			result->rarg = (Node *) linitial(subselect->jointree->fromlist);
+		else
+			result->rarg = (Node *) subselect->jointree;
+		result->usingClause = NIL;
+		result->join_using_alias = NULL;
+		result->alias = NULL;
+		result->rtindex = 0;		/* we don't need an RTE for it */
+		result->quals = (Node *) make_ands_explicit(newWhere);
+
+		return result;
+	}
 	else
-		result->rarg = (Node *) subselect->jointree;
-	result->usingClause = NIL;
-	result->join_using_alias = NULL;
-	result->quals = whereClause;
-	result->alias = NULL;
-	result->rtindex = 0;		/* we don't need an RTE for it */
+	{
+		/*
+		* Separate out the WHERE clause.  (We could theoretically also remove
+		* top-level plain JOIN/ON clauses, but it's probably not worth the
+		* trouble.)
+		*/
+		whereClause = subselect->jointree->quals;
+		subselect->jointree->quals = NULL;
 
-	return result;
+		/*
+		* The rest of the sub-select must not refer to any Vars of the parent
+		* query.  (Vars of higher levels should be okay, though.)
+		*/
+		if (contain_vars_of_level((Node *) subselect, 1))
+			return NULL;
+
+		/*
+		* On the other hand, the WHERE clause must contain some Vars of the
+		* parent query, else it's not gonna be a join.
+		*/
+		if (!contain_vars_of_level(whereClause, 1))
+			return NULL;
+
+		/*
+		* We don't risk optimizing if the WHERE clause is volatile, either.
+		*/
+		if (contain_volatile_functions(whereClause))
+			return NULL;
+
+		/*
+		* Scan the rangetable for relation RTEs and retrieve the necessary
+		* catalog information for each relation.  Using this information, clear
+		* the inh flag for any relation that has no children, collect not-null
+		* attribute numbers for any relation that has column not-null
+		* constraints, and expand virtual generated columns for any relation that
+		* contains them.
+		*
+		* Note: we construct up an entirely dummy PlannerInfo for use here.  This
+		* is fine because only the "glob" and "parse" links will be used in this
+		* case.
+		*
+		* Note: we temporarily assign back the WHERE clause so that any virtual
+		* generated column references within it can be expanded.  It should be
+		* separated out again afterward.
+		*/
+		MemSet(&subroot, 0, sizeof(subroot));
+		subroot.type = T_PlannerInfo;
+		subroot.glob = root->glob;
+		subroot.parse = subselect;
+		subselect->jointree->quals = whereClause;
+		subselect = preprocess_relation_rtes(&subroot);
+
+		/*
+		* Now separate out the WHERE clause again.
+		*/
+		whereClause = subselect->jointree->quals;
+		subselect->jointree->quals = NULL;
+
+		/*
+		* The subquery must have a nonempty jointree, but we can make it so.
+		*/
+		replace_empty_jointree(subselect);
+
+		/*
+		* Prepare to pull up the sub-select into top range table.
+		*
+		* We rely here on the assumption that the outer query has no references
+		* to the inner (necessarily true). Therefore this is a lot easier than
+		* what pull_up_subqueries has to go through.
+		*
+		* In fact, it's even easier than what convert_ANY_sublink_to_join has to
+		* do.  The machinations of simplify_EXISTS_query ensured that there is
+		* nothing interesting in the subquery except an rtable and jointree, and
+		* even the jointree FromExpr no longer has quals.  So we can just append
+		* the rtable to our own and use the FromExpr in our jointree. But first,
+		* adjust all level-zero varnos in the subquery to account for the rtable
+		* merger.
+		*/
+		rtoffset = list_length(parse->rtable);
+		OffsetVarNodes((Node *) subselect, rtoffset, 0);
+		OffsetVarNodes(whereClause, rtoffset, 0);
+
+		/*
+		* Upper-level vars in subquery will now be one level closer to their
+		* parent than before; in particular, anything that had been level 1
+		* becomes level zero.
+		*/
+		IncrementVarSublevelsUp((Node *) subselect, -1, 1);
+		IncrementVarSublevelsUp(whereClause, -1, 1);
+
+		/*
+		* Now that the WHERE clause is adjusted to match the parent query
+		* environment, we can easily identify all the level-zero rels it uses.
+		* The ones <= rtoffset belong to the upper query; the ones > rtoffset do
+		* not.
+		*/
+		clause_varnos = pull_varnos(root, whereClause);
+		upper_varnos = NULL;
+		varno = -1;
+		while ((varno = bms_next_member(clause_varnos, varno)) >= 0)
+		{
+			if (varno <= rtoffset)
+				upper_varnos = bms_add_member(upper_varnos, varno);
+		}
+		bms_free(clause_varnos);
+		Assert(!bms_is_empty(upper_varnos));
+
+		/*
+		* Now that we've got the set of upper-level varnos, we can make the last
+		* check: only available_rels can be referenced.
+		*/
+		if (!bms_is_subset(upper_varnos, available_rels))
+			return NULL;
+
+		/*
+		* Now we can attach the modified subquery rtable to the parent. This also
+		* adds subquery's RTEPermissionInfos into the upper query.
+		*/
+		CombineRangeTables(&parse->rtable, &parse->rteperminfos,
+						subselect->rtable, subselect->rteperminfos);
+
+		/*
+		* And finally, build the JoinExpr node.
+		*/
+		result = makeNode(JoinExpr);
+		result->jointype = under_not ? JOIN_ANTI : JOIN_SEMI;
+		result->isNatural = false;
+		result->larg = NULL;		/* caller must fill this in */
+		/* flatten out the FromExpr node if it's useless */
+		if (list_length(subselect->jointree->fromlist) == 1)
+			result->rarg = (Node *) linitial(subselect->jointree->fromlist);
+		else
+			result->rarg = (Node *) subselect->jointree;
+		result->usingClause = NIL;
+		result->join_using_alias = NULL;
+		result->quals = whereClause;
+		result->alias = NULL;
+		result->rtindex = 0;		/* we don't need an RTE for it */
+
+		return result;
+	}
 }
 
 /*
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index f137129209f..0a6ea94c320 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -1060,6 +1060,17 @@ struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"pull_up_with_joins", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Enables the planner's use of exists pull-up with join expressions."),
+			NULL,
+			GUC_EXPLAIN
+		},
+		&pull_up_with_joins,
+		true,
+		NULL, NULL, NULL
+	},
+
 	{
 		{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
 			gettext_noop("Enables genetic query optimization."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index a9d8293474a..c43f2a1a2d2 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -427,6 +427,7 @@
 #enable_tidscan = on
 #enable_group_by_reordering = on
 #enable_distinct_reordering = on
+#pull_up_with_joins = on
 #enable_self_join_elimination = on
 
 # - Planner Cost Constants -
diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h
index 37bc13c2cbd..fa8db1362c7 100644
--- a/src/include/optimizer/optimizer.h
+++ b/src/include/optimizer/optimizer.h
@@ -112,6 +112,7 @@ typedef enum
 extern PGDLLIMPORT int debug_parallel_query;
 extern PGDLLIMPORT bool parallel_leader_participation;
 extern PGDLLIMPORT bool enable_distinct_reordering;
+extern PGDLLIMPORT bool pull_up_with_joins;
 
 extern struct PlannedStmt *planner(Query *parse, const char *query_string,
 								   int cursorOptions,
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 0563d0cd5a1..236a6bbd086 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -1019,6 +1019,797 @@ where exists (
   where road.name = ss.f1 );
 rollback;
 --
+-- Test case for exist sublink where we can consider some undependent expression
+-- with outer link
+--
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+   ->  Nested Loop
+         ->  Index Only Scan using tb_pkey on tb
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tc
+(6 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+   ->  Nested Loop
+         ->  Index Only Scan using tc_pkey on tc
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tb
+(6 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON 1 = 1
+  WHERE ta.id = tc.id
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+   ->  Nested Loop
+         ->  Index Only Scan using tc_pkey on tc
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tb
+(6 rows)
+
+-- Join compound expression
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+           AND ta.id = tb.id
+);
+             QUERY PLAN             
+------------------------------------
+ Hash Right Semi Join
+   Hash Cond: (tc.id = ta.id)
+   ->  Hash Join
+         Hash Cond: (tb.id = tc.id)
+         ->  Seq Scan on tb
+         ->  Hash
+               ->  Seq Scan on tc
+   ->  Hash
+         ->  Seq Scan on ta
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON tb.aval = tc.aid
+           AND tb.aval = ta1.id
+);
+                     QUERY PLAN                     
+----------------------------------------------------
+ Hash Join
+   Hash Cond: (ta1.id = tb.aval)
+   ->  Seq Scan on ta ta1
+   ->  Hash
+         ->  Unique
+               ->  Merge Join
+                     Merge Cond: (tb.aval = tc.aid)
+                     ->  Sort
+                           Sort Key: tb.aval
+                           ->  Seq Scan on tb
+                     ->  Sort
+                           Sort Key: tc.aid
+                           ->  Seq Scan on tc
+(13 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+JOIN tb ON true
+WHERE EXISTS (
+  SELECT 1
+  FROM tb tb1
+  JOIN tc ON ta.id = tb.id
+);
+             QUERY PLAN             
+------------------------------------
+ Nested Loop Semi Join
+   ->  Hash Join
+         Hash Cond: (ta.id = tb.id)
+         ->  Seq Scan on ta
+         ->  Hash
+               ->  Seq Scan on tb
+   ->  Nested Loop
+         ->  Seq Scan on tb tb1
+         ->  Materialize
+               ->  Seq Scan on tc
+(10 rows)
+
+-- Compound expression with const type or other type of expressions
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+           AND ta.id = 1
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Index Only Scan using ta_pkey on ta
+         Index Cond: (id = 1)
+   ->  Nested Loop
+         ->  Index Only Scan using tc_pkey on tc
+               Index Cond: (id = 1)
+         ->  Seq Scan on tb
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+           AND tb.id = 1
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Hash Right Semi Join
+   Hash Cond: (tc.id = ta.id)
+   ->  Nested Loop
+         ->  Index Only Scan using tb_pkey on tb
+               Index Cond: (id = 1)
+         ->  Seq Scan on tc
+   ->  Hash
+         ->  Seq Scan on ta
+(8 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  RIGHT JOIN tc ON ta.id = tc.id
+  WHERE ta.val = 1
+);
+                  QUERY PLAN                  
+----------------------------------------------
+ Seq Scan on ta
+   Filter: EXISTS(SubPlan 1)
+   SubPlan 1
+     ->  Result
+           One-Time Filter: (ta.val = 1)
+           ->  Nested Loop Left Join
+                 Join Filter: (ta.id = tc.id)
+                 ->  Seq Scan on tc
+                 ->  Materialize
+                       ->  Seq Scan on tb
+(10 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+           AND tb.aval = ANY ('{1}'::int[])
+);
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Hash Join
+   Hash Cond: (ta.id = tb.id)
+   ->  Seq Scan on ta
+   ->  Hash
+         ->  HashAggregate
+               Group Key: tb.id
+               ->  Nested Loop
+                     ->  Seq Scan on tc
+                     ->  Materialize
+                           ->  Seq Scan on tb
+                                 Filter: (aval = ANY ('{1}'::integer[]))
+(11 rows)
+
+-- Exists SubLink expression within expression
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tb ON ta.id = ta1.id
+           AND ta1.val = 1
+  WHERE EXISTS (
+    SELECT 1
+    FROM ta ta2
+    WHERE ta2.id = ta1.id
+  )
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta ta2
+         Filter: (val = 1)
+   ->  Nested Loop
+         ->  Index Only Scan using ta_pkey on ta
+               Index Cond: (id = ta2.id)
+         ->  Seq Scan on tb
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tb ON ta.val = ta1.id
+           AND ta1.id = 1
+  WHERE EXISTS (
+    SELECT 1
+    FROM ta ta2
+    WHERE ta2.id = ta.id
+  )
+);
+                  QUERY PLAN                   
+-----------------------------------------------
+ Nested Loop Semi Join
+   ->  Index Only Scan using ta_pkey on ta ta1
+         Index Cond: (id = 1)
+   ->  Nested Loop
+         ->  Seq Scan on tb
+         ->  Materialize
+               ->  Seq Scan on ta ta2
+                     Filter: (val = 1)
+(8 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  WHERE ta.id = tb.id
+    AND EXISTS (
+      SELECT 1
+      FROM tc
+      WHERE tc.id = tb.id
+        AND tc.aid + tb.aval > 0
+    )
+);
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Hash Semi Join
+   Hash Cond: (ta.id = tc.id)
+   ->  Seq Scan on ta
+   ->  Hash
+         ->  Hash Join
+               Hash Cond: (tb.id = tc.id)
+               Join Filter: ((tc.aid + tb.aval) > 0)
+               ->  Seq Scan on tb
+               ->  Hash
+                     ->  Seq Scan on tc
+(10 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  WHERE ta.id = tb.id
+    AND EXISTS (
+      SELECT 1
+      FROM tc
+      WHERE tc.id = tb.id
+        AND tc.aid + ta.val > 0
+    )
+);
+               QUERY PLAN               
+----------------------------------------
+ Hash Join
+   Hash Cond: (ta.id = tb.id)
+   Join Filter: EXISTS(SubPlan 1)
+   ->  Seq Scan on ta
+   ->  Hash
+         ->  Seq Scan on tb
+   SubPlan 1
+     ->  Index Scan using tc_pkey on tc
+           Index Cond: (id = tb.id)
+           Filter: ((aid + ta.val) > 0)
+(10 rows)
+
+-- Check with NULL and NOT NULL expressions
+ALTER TABLE ta ADD COLUMN is_active bool;
+UPDATE ta SET is_active = true;
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND COALESCE(ta.is_active, true)
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+         Filter: COALESCE(is_active, true)
+   ->  Nested Loop
+         ->  Index Only Scan using tb_pkey on tb
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tc
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM tb
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tc ON ta.id = tb.id
+         AND COALESCE(ta.is_active, true)
+);
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Merge Join
+   Merge Cond: (tb.id = ta.id)
+   ->  Index Only Scan using tb_pkey on tb
+   ->  Unique
+         ->  Nested Loop
+               ->  Index Scan using ta_pkey on ta
+                     Filter: COALESCE(is_active, true)
+               ->  Materialize
+                     ->  Seq Scan on tc
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND CASE
+               WHEN ta.is_active THEN true
+               ELSE false
+             END = true
+);
+                          QUERY PLAN                          
+--------------------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+         Filter: CASE WHEN is_active THEN true ELSE false END
+   ->  Nested Loop
+         ->  Index Only Scan using tb_pkey on tb
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tc
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM tb
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tc ON ta.id = tb.id
+         AND CASE
+               WHEN ta.is_active THEN true
+               ELSE false
+             END = true
+);
+                                QUERY PLAN                                
+--------------------------------------------------------------------------
+ Merge Join
+   Merge Cond: (tb.id = ta.id)
+   ->  Index Only Scan using tb_pkey on tb
+   ->  Unique
+         ->  Nested Loop
+               ->  Index Scan using ta_pkey on ta
+                     Filter: CASE WHEN is_active THEN true ELSE false END
+               ->  Materialize
+                     ->  Seq Scan on tc
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND ta.is_active
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+         Filter: is_active
+   ->  Nested Loop
+         ->  Index Only Scan using tb_pkey on tb
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tc
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND ta.is_active IS NOT NULL
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+         Filter: (is_active IS NOT NULL)
+   ->  Nested Loop
+         ->  Index Only Scan using tb_pkey on tb
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tc
+(7 rows)
+
+-- Disabled pull up because it is applcapable for INNER JOIN connection
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  RIGHT JOIN tc ON ta.id = tc.id
+);
+               QUERY PLAN               
+----------------------------------------
+ Seq Scan on ta
+   Filter: EXISTS(SubPlan 1)
+   SubPlan 1
+     ->  Nested Loop Left Join
+           Join Filter: (ta.id = tc.id)
+           ->  Seq Scan on tc
+           ->  Materialize
+                 ->  Seq Scan on tb
+(8 rows)
+
+-- Disable pull-up due to lack of the outer var
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON tc.id = tb.id
+);
+                    QUERY PLAN                     
+---------------------------------------------------
+ Result
+   One-Time Filter: (InitPlan 1).col1
+   InitPlan 1
+     ->  Nested Loop
+           ->  Seq Scan on tb
+           ->  Index Only Scan using tc_pkey on tc
+                 Index Cond: (id = tb.id)
+   ->  Seq Scan on ta
+(8 rows)
+
+CREATE TABLE td (id int, tc_id bytea, val int);
+INSERT INTO td
+SELECT g.id, 'Test1'::bytea AS tc_id, 6 AS val
+FROM generate_series(1, 25) AS g(id)
+UNION ALL
+SELECT g.id, 'Test2'::bytea AS tc_id, 7 AS val
+FROM generate_series(26, 50) AS g(id)
+UNION ALL
+SELECT g.id, 'Test4'::bytea AS tc_id, 6 AS val
+FROM generate_series(51, 75) AS g(id)
+UNION ALL
+SELECT g.id, 'Test5'::bytea AS tc_id, 7 AS val
+FROM generate_series(76, 100) AS g(id);
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+     WHERE tb.id = ta.id AND
+           EXISTS
+        (SELECT 1
+           FROM tc
+          WHERE tc.id = tb.id)
+               );
+             QUERY PLAN             
+------------------------------------
+ Hash Right Semi Join
+   Hash Cond: (tc.id = ta.id)
+   ->  Hash Join
+         Hash Cond: (tb.id = tc.id)
+         ->  Seq Scan on tb
+         ->  Hash
+               ->  Seq Scan on tc
+   ->  Hash
+         ->  Seq Scan on ta
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+     WHERE tb.id = ta.id AND
+           EXISTS
+        (SELECT 1
+           FROM tc
+          WHERE tc.id = ta.id)
+               );
+             QUERY PLAN             
+------------------------------------
+ Hash Join
+   Hash Cond: (tc.id = tb.id)
+   ->  Hash Join
+         Hash Cond: (tc.id = ta.id)
+         ->  Seq Scan on tc
+         ->  Hash
+               ->  Seq Scan on ta
+   ->  Hash
+         ->  Seq Scan on tb
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+     WHERE tb.id = ta.id
+       AND EXISTS
+              (SELECT 1
+                 FROM tc
+               WHERE tb.id = ta.id)
+                );
+                 QUERY PLAN                 
+--------------------------------------------
+ Hash Join
+   Hash Cond: (tb.id = ta.id)
+   Join Filter: EXISTS(SubPlan 1)
+   ->  Seq Scan on tb
+   ->  Hash
+         ->  Seq Scan on ta
+   SubPlan 1
+     ->  Result
+           One-Time Filter: (tb.id = ta.id)
+           ->  Seq Scan on tc
+(10 rows)
+
+explain (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+        join tc on tc.id = ta.id
+          AND EXISTS (
+                SELECT 1
+                  FROM td
+                WHERE td.id = ta.id)
+                );
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Hash Join
+   Hash Cond: (ta.id = td.id)
+   ->  Nested Loop Semi Join
+         ->  Seq Scan on ta
+         ->  Nested Loop
+               ->  Index Only Scan using tc_pkey on tc
+                     Index Cond: (id = ta.id)
+               ->  Seq Scan on tb
+   ->  Hash
+         ->  HashAggregate
+               Group Key: td.id
+               ->  Seq Scan on td
+(12 rows)
+
+explain (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+        join tc on tc.id = ta.id
+          AND EXISTS (
+                SELECT 1
+                  FROM td
+                WHERE tb.id = ta.id)
+                );
+                    QUERY PLAN                    
+--------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+   ->  Nested Loop
+         Join Filter: EXISTS(SubPlan 1)
+         ->  Index Only Scan using tc_pkey on tc
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tb
+         SubPlan 1
+           ->  Result
+                 One-Time Filter: (tb.id = ta.id)
+                 ->  Seq Scan on td
+(11 rows)
+
+CREATE TABLE te (id int, tc_id bytea, val int);
+INSERT INTO te SELECT * FROM td;
+EXPLAIN (COSTS OFF)
+SELECT t1.*
+FROM ta t1
+WHERE EXISTS (
+  SELECT 1
+  FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+  JOIN tb t2 ON t2.id = t1.id
+  WHERE EXISTS (
+    SELECT 1
+    FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+    JOIN td t3 ON t3.tc_id IN ('Test1'::bytea, 'Test2'::bytea)
+    WHERE EXISTS (
+      SELECT 1
+      FROM te t4
+      WHERE t4.tc_id = t3.tc_id
+        AND t4.val = t2.aval
+    ) = EXISTS (
+      SELECT 1
+      FROM tc t5
+      WHERE t5.id = t3.id
+    )
+  )
+);
+                                                                      QUERY PLAN                                                                       
+-------------------------------------------------------------------------------------------------------------------------------------------------------
+ Merge Semi Join
+   Merge Cond: (t1.id = t2.id)
+   ->  Index Scan using ta_pkey on ta t1
+   ->  Nested Loop Semi Join
+         Join Filter: ((ANY ((t3.tc_id = (hashed SubPlan 2).col1) AND (t2.aval = (hashed SubPlan 2).col2))) = (ANY (t3.id = (hashed SubPlan 4).col1)))
+         ->  Index Scan using tb_pkey on tb t2
+         ->  Materialize
+               ->  Seq Scan on td t3
+                     Filter: (tc_id = ANY ('{"\\x5465737431","\\x5465737432"}'::bytea[]))
+         SubPlan 2
+           ->  Seq Scan on te t4
+         SubPlan 4
+           ->  Seq Scan on tc t5
+(13 rows)
+
+EXPLAIN
+SELECT ta.*
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON tc.id = tb.id
+         AND tb.id = ta.id
+  JOIN td ON td.id = tc.id
+);
+                                  QUERY PLAN                                  
+------------------------------------------------------------------------------
+ Hash Right Semi Join  (cost=181.20..219.35 rows=1100 width=9)
+   Hash Cond: (td.id = ta.id)
+   ->  Hash Join  (cost=121.70..150.02 rows=1200 width=12)
+         Hash Cond: (td.id = tc.id)
+         ->  Hash Join  (cost=60.85..86.01 rows=1200 width=8)
+               Hash Cond: (td.id = tb.id)
+               ->  Seq Scan on td  (cost=0.00..22.00 rows=1200 width=4)
+               ->  Hash  (cost=32.60..32.60 rows=2260 width=4)
+                     ->  Seq Scan on tb  (cost=0.00..32.60 rows=2260 width=4)
+         ->  Hash  (cost=32.60..32.60 rows=2260 width=4)
+               ->  Seq Scan on tc  (cost=0.00..32.60 rows=2260 width=4)
+   ->  Hash  (cost=32.00..32.00 rows=2200 width=9)
+         ->  Seq Scan on ta  (cost=0.00..32.00 rows=2200 width=9)
+(13 rows)
+
+DROP TABLE td, te;
+CREATE TABLE tst1 (id int, mes varchar(20));
+CREATE TABLE tst2 (id int, tst1_id int, type_id int);
+CREATE TABLE tst3 (id bytea);
+CREATE TABLE tst4 (id int, tst3_id bytea, type_id int);
+CREATE TABLE tst5 (id int, tst3_id bytea, type_id int);
+INSERT INTO tst1 VALUES (1, 'test1');
+INSERT INTO tst1 VALUES (2, 'test2');
+INSERT INTO tst1 VALUES (3, 'test3');
+INSERT INTO tst2 VALUES (1, 2, 7);
+INSERT INTO tst2 VALUES (1, 2, 6);
+INSERT INTO tst2 VALUES (2, 3, 7);
+INSERT INTO tst3
+SELECT ('Test' || g.id)::bytea AS id
+   FROM generate_series(1, 5) AS g(id);
+INSERT INTO tst4
+(SELECT g.id, 'Test1'::bytea AS tst3_id, 6 AS type_id
+   FROM generate_series(1, 25) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test2'::bytea AS tst3_id, 7 AS type_id
+   FROM generate_series(26, 50) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test4'::bytea AS tst3_id, 6 AS type_id
+   FROM generate_series(51, 75) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test5'::bytea AS tst3_id, 7 AS type_id
+   FROM generate_series(76, 100) AS g(id));
+INSERT INTO tst5 SELECT * FROM tst4;
+VACUUM (ANALYZE) tst1, tst2, tst3, tst4, tst5;
+-- Case with two exists in OpExpr, in the first one t3.id is the reference to the parent query
+-- and t2.type-id is the reference to grandparent query
+EXPLAIN (COSTS OFF)
+SELECT t1.*
+FROM tst1 t1
+WHERE EXISTS (
+  SELECT 1
+  FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+  JOIN tst2 t2 ON t2.tst1_id = t1.id
+  WHERE EXISTS (
+    SELECT 1
+    FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+    JOIN tst3 t3 ON t3.id IN ('Test1'::bytea, 'Test2'::bytea)
+    WHERE EXISTS (
+      SELECT 1
+      FROM tst4 t4
+      WHERE t4.tst3_id = t3.id
+        AND t4.type_id = t2.type_id
+    ) = EXISTS (
+      SELECT 1
+      FROM tst5 t5
+      WHERE t5.tst3_id = t3.id
+    )
+  )
+);
+                                      QUERY PLAN                                       
+---------------------------------------------------------------------------------------
+ Hash Right Semi Join
+   Hash Cond: (t2.tst1_id = t1.id)
+   ->  Nested Loop Semi Join
+         Join Filter: (EXISTS(SubPlan 1) = EXISTS(SubPlan 3))
+         ->  Seq Scan on tst2 t2
+         ->  Materialize
+               ->  Seq Scan on tst3 t3
+                     Filter: (id = ANY ('{"\\x5465737431","\\x5465737432"}'::bytea[]))
+         SubPlan 1
+           ->  Seq Scan on tst4 t4
+                 Filter: ((tst3_id = t3.id) AND (type_id = t2.type_id))
+         SubPlan 3
+           ->  Seq Scan on tst5 t5
+                 Filter: (tst3_id = t3.id)
+   ->  Hash
+         ->  Seq Scan on tst1 t1
+               Filter: (id IS NOT NULL)
+(17 rows)
+
+DROP TABLE tst1, tst2, tst3, tst4, tst5;
 -- Test case for sublinks pushed down into subselects via join alias expansion
 --
 select
diff --git a/src/test/regress/expected/updatable_views.out b/src/test/regress/expected/updatable_views.out
index 095df0a670c..d0762c1299e 100644
--- a/src/test/regress/expected/updatable_views.out
+++ b/src/test/regress/expected/updatable_views.out
@@ -3177,14 +3177,12 @@ EXPLAIN (costs off) INSERT INTO rw_view1 VALUES (2, 'New row 2');
          One-Time Filter: ((InitPlan 1).col1 IS NOT TRUE)
  
  Update on base_tbl
-   InitPlan 1
-     ->  Index Only Scan using base_tbl_pkey on base_tbl t
-           Index Cond: (id = 2)
-   ->  Result
-         One-Time Filter: (InitPlan 1).col1
+   ->  Nested Loop Semi Join
          ->  Index Scan using base_tbl_pkey on base_tbl
                Index Cond: (id = 2)
-(15 rows)
+         ->  Index Scan using base_tbl_pkey on base_tbl t
+               Index Cond: (id = 2)
+(13 rows)
 
 INSERT INTO rw_view1 VALUES (2, 'New row 2');
 SELECT * FROM base_tbl;
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql
index a6d276a115b..ca31e47c973 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -507,6 +507,449 @@ where exists (
 rollback;
 
 --
+-- Test case for exist sublink where we can consider some undependent expression
+-- with outer link
+--
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON 1 = 1
+  WHERE ta.id = tc.id
+);
+
+-- Join compound expression
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+           AND ta.id = tb.id
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON tb.aval = tc.aid
+           AND tb.aval = ta1.id
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+JOIN tb ON true
+WHERE EXISTS (
+  SELECT 1
+  FROM tb tb1
+  JOIN tc ON ta.id = tb.id
+);
+
+-- Compound expression with const type or other type of expressions
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+           AND ta.id = 1
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+           AND tb.id = 1
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  RIGHT JOIN tc ON ta.id = tc.id
+  WHERE ta.val = 1
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+           AND tb.aval = ANY ('{1}'::int[])
+);
+
+-- Exists SubLink expression within expression
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tb ON ta.id = ta1.id
+           AND ta1.val = 1
+  WHERE EXISTS (
+    SELECT 1
+    FROM ta ta2
+    WHERE ta2.id = ta1.id
+  )
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tb ON ta.val = ta1.id
+           AND ta1.id = 1
+  WHERE EXISTS (
+    SELECT 1
+    FROM ta ta2
+    WHERE ta2.id = ta.id
+  )
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  WHERE ta.id = tb.id
+    AND EXISTS (
+      SELECT 1
+      FROM tc
+      WHERE tc.id = tb.id
+        AND tc.aid + tb.aval > 0
+    )
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  WHERE ta.id = tb.id
+    AND EXISTS (
+      SELECT 1
+      FROM tc
+      WHERE tc.id = tb.id
+        AND tc.aid + ta.val > 0
+    )
+);
+
+-- Check with NULL and NOT NULL expressions
+ALTER TABLE ta ADD COLUMN is_active bool;
+UPDATE ta SET is_active = true;
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND COALESCE(ta.is_active, true)
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM tb
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tc ON ta.id = tb.id
+         AND COALESCE(ta.is_active, true)
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND CASE
+               WHEN ta.is_active THEN true
+               ELSE false
+             END = true
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM tb
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tc ON ta.id = tb.id
+         AND CASE
+               WHEN ta.is_active THEN true
+               ELSE false
+             END = true
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND ta.is_active
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND ta.is_active IS NOT NULL
+);
+
+
+-- Disabled pull up because it is applcapable for INNER JOIN connection
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  RIGHT JOIN tc ON ta.id = tc.id
+);
+
+-- Disable pull-up due to lack of the outer var
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON tc.id = tb.id
+);
+
+CREATE TABLE td (id int, tc_id bytea, val int);
+
+INSERT INTO td
+SELECT g.id, 'Test1'::bytea AS tc_id, 6 AS val
+FROM generate_series(1, 25) AS g(id)
+
+UNION ALL
+
+SELECT g.id, 'Test2'::bytea AS tc_id, 7 AS val
+FROM generate_series(26, 50) AS g(id)
+
+UNION ALL
+
+SELECT g.id, 'Test4'::bytea AS tc_id, 6 AS val
+FROM generate_series(51, 75) AS g(id)
+
+UNION ALL
+
+SELECT g.id, 'Test5'::bytea AS tc_id, 7 AS val
+FROM generate_series(76, 100) AS g(id);
+
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+     WHERE tb.id = ta.id AND
+           EXISTS
+        (SELECT 1
+           FROM tc
+          WHERE tc.id = tb.id)
+               );
+
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+     WHERE tb.id = ta.id AND
+           EXISTS
+        (SELECT 1
+           FROM tc
+          WHERE tc.id = ta.id)
+               );
+
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+     WHERE tb.id = ta.id
+       AND EXISTS
+              (SELECT 1
+                 FROM tc
+               WHERE tb.id = ta.id)
+                );
+
+explain (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+        join tc on tc.id = ta.id
+          AND EXISTS (
+                SELECT 1
+                  FROM td
+                WHERE td.id = ta.id)
+                );
+
+explain (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+        join tc on tc.id = ta.id
+          AND EXISTS (
+                SELECT 1
+                  FROM td
+                WHERE tb.id = ta.id)
+                );
+
+CREATE TABLE te (id int, tc_id bytea, val int);
+INSERT INTO te SELECT * FROM td;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.*
+FROM ta t1
+WHERE EXISTS (
+  SELECT 1
+  FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+  JOIN tb t2 ON t2.id = t1.id
+  WHERE EXISTS (
+    SELECT 1
+    FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+    JOIN td t3 ON t3.tc_id IN ('Test1'::bytea, 'Test2'::bytea)
+    WHERE EXISTS (
+      SELECT 1
+      FROM te t4
+      WHERE t4.tc_id = t3.tc_id
+        AND t4.val = t2.aval
+    ) = EXISTS (
+      SELECT 1
+      FROM tc t5
+      WHERE t5.id = t3.id
+    )
+  )
+);
+
+EXPLAIN
+SELECT ta.*
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON tc.id = tb.id
+         AND tb.id = ta.id
+  JOIN td ON td.id = tc.id
+);
+
+DROP TABLE td, te;
+
+CREATE TABLE tst1 (id int, mes varchar(20));
+CREATE TABLE tst2 (id int, tst1_id int, type_id int);
+CREATE TABLE tst3 (id bytea);
+CREATE TABLE tst4 (id int, tst3_id bytea, type_id int);
+CREATE TABLE tst5 (id int, tst3_id bytea, type_id int);
+
+INSERT INTO tst1 VALUES (1, 'test1');
+INSERT INTO tst1 VALUES (2, 'test2');
+INSERT INTO tst1 VALUES (3, 'test3');
+INSERT INTO tst2 VALUES (1, 2, 7);
+INSERT INTO tst2 VALUES (1, 2, 6);
+INSERT INTO tst2 VALUES (2, 3, 7);
+
+INSERT INTO tst3
+SELECT ('Test' || g.id)::bytea AS id
+   FROM generate_series(1, 5) AS g(id);
+
+INSERT INTO tst4
+(SELECT g.id, 'Test1'::bytea AS tst3_id, 6 AS type_id
+   FROM generate_series(1, 25) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test2'::bytea AS tst3_id, 7 AS type_id
+   FROM generate_series(26, 50) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test4'::bytea AS tst3_id, 6 AS type_id
+   FROM generate_series(51, 75) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test5'::bytea AS tst3_id, 7 AS type_id
+   FROM generate_series(76, 100) AS g(id));
+INSERT INTO tst5 SELECT * FROM tst4;
+
+VACUUM (ANALYZE) tst1, tst2, tst3, tst4, tst5;
+
+-- Case with two exists in OpExpr, in the first one t3.id is the reference to the parent query
+-- and t2.type-id is the reference to grandparent query
+EXPLAIN (COSTS OFF)
+SELECT t1.*
+FROM tst1 t1
+WHERE EXISTS (
+  SELECT 1
+  FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+  JOIN tst2 t2 ON t2.tst1_id = t1.id
+  WHERE EXISTS (
+    SELECT 1
+    FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+    JOIN tst3 t3 ON t3.id IN ('Test1'::bytea, 'Test2'::bytea)
+    WHERE EXISTS (
+      SELECT 1
+      FROM tst4 t4
+      WHERE t4.tst3_id = t3.id
+        AND t4.type_id = t2.type_id
+    ) = EXISTS (
+      SELECT 1
+      FROM tst5 t5
+      WHERE t5.tst3_id = t3.id
+    )
+  )
+);
+
+DROP TABLE tst1, tst2, tst3, tst4, tst5;
+
 -- Test case for sublinks pushed down into subselects via join alias expansion
 --
 
-- 
2.34.1

#16

Alena Rybakina

lena.ribackina@yandex.ru

3 months ago

In reply to: Ilia Evdokimov (#13)

1 attachment(s)

Re: pull-up subquery if JOIN-ON contains refs to upper-query

Hi!

On 03.09.2025 00:07, Ilia Evdokimov wrote:

Hi,

I've reviewed this patch, and I have suggestion about the approach.

Currently, the patch extends 'convert_EXISTS_sublick_to_join' with
quite complex logic (clause collection, volatile checks, rewriting
join quals, etc). While it works, the amount of branching and special
cases makes the function harder to follow.

Looking at the logic, it seems that a large part of the complexity
comes from trying to directly adapt 'convert_EXISTS_sublink_to_join'
instead of factoring out a dedicated path. An alternative would be to
introduce a separate function
*'convert_EXISTS_sublink_to_lateral_join' *- with a similar API to
'convert_ANY_sublink_to_join'. Such a function can focus only on the
EXISTS-to-join case, while keeping the existing function shorter and
easier to reason about.

I even made some first rough sketches of this approach (not a finished
patch, just an outline). Of course, it would still need proper
adaptation, but I think it demonstrates that the overall structure can
be kept simpler.

What do you think about refactoring in this direction?

I think this approach isn’t fully correct. By forming a join between a
subquery and the outer relation, you effectively force the optimizer to
choose a Nested Loop join. Furthermore, it prevents the planner from
exploring all join orders between the subquery’s tables and the outer
relation, so we may miss a more optimal plan.

With your patch, I consistently get the following plan. I even disabled
nested loops to see whether the planner could switch to a Hash Join or
Merge Join, but those aren’t applicable with lateral parameters in this
pattern.

CREATE TABLE ta (id int PRIMARY KEY, val int);
INSERT INTO ta VALUES (1,1), (2,2);

CREATE TABLE tb (id int PRIMARY KEY, aval int);
INSERT INTO tb VALUES (1,1), (2,1);

CREATE TABLE tc (id int PRIMARY KEY, aid int);
INSERT INTO tc VALUES (3,5), (1,5);

CREATE TABLE td (id int PRIMARY KEY, aid int);
INSERT INTO td VALUES (1,6), (2,7), (3,8), (4,9);

CREATE TABLE te (id int PRIMARY KEY, aid int);
INSERT INTO te VALUES (5,6), (6,7), (7,8), (4,9), (1,1);

SET enable_nestloop = OFF;

EXPLAIN ANALYZE
SELECT ta.id
FROM ta
WHERE EXISTS (
SELECT 1
FROM tb
WHERE tb.id = ta.id
AND EXISTS (SELECT 1 FROM tc WHERE tc.id = tb.id)
);

                                                           QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------
Nested Loop Semi Join (cost=0.31..37017.50 rows=2260 width=4) (actual
time=0.116..0.142 rows=2.00 loops=1)
   Disabled: true
   Buffers: local hit=9
   -> Seq Scan on ta (cost=0.00..32.60 rows=2260 width=4) (actual
time=0.037..0.039 rows=2.00 loops=1)
         Buffers: local hit=1

   -> Nested Loop Semi Join (cost=0.31..16.36 rows=1 width=0) (actual
time=0.045..0.046 rows=1.00 loops=2)
         Disabled: true
         Buffers: local hit=8
         -> Index Only Scan using tb_pkey on tb (cost=0.15..8.17
rows=1 width=4) (actual time=0.030..0.030 rows=1.00 loops=2)
               Index Cond: (id = ta.id)
               Heap Fetches: 2
               Index Searches: 2
               Buffers: local hit=4
         -> Index Only Scan using tc_pkey on tc (cost=0.15..8.17
rows=1 width=4) (actual time=0.010..0.010 rows=1.00 loops=2)
               Index Cond: (id = ta.id)
               Heap Fetches: 2
               Index Searches: 2
               Buffers: local hit=4
Planning Time: 0.539 ms
Execution Time: 0.252 ms
(20 rows)

Anyway, thank you for the work and attention here - your feedback was
useful!

I’ve also rebased the patch on current master.

Attachments:

v6-0001-Enables-pull-up-of-EXISTS-subqueries-that-contain-IN.patchtext/x-patch; charset=UTF-8; name=v6-0001-Enables-pull-up-of-EXISTS-subqueries-that-contain-IN.patchDownload

From 965d0985029b3d05459e716908755ec12bdb1100 Mon Sep 17 00:00:00 2001
From: Alena Rybakina <a.rybakina@postgrespro.ru>
Date: Fri, 10 Oct 2025 13:49:46 +0300
Subject: [PATCH] Enables pull-up of EXISTS subqueries that contain INNER
 joins, unlocking join reordering and earlier filtering. OUTER joins with
 outer references are safely excluded to preserve null-preserving semantics.

To achieve this, introduce a mutator that performs a single conservative
pass over the subquery jointree and stops transformation if subquery
contains volatile quals, or OUTER joins with outer references, since
hoisting would break null-preserving behavior.

On the other hand, OUTER joins without such references remain intact.
Add IS NOT NULL guards on hoisted outer Vars to avoid redundant null elements
that obviously won't result after join operation.
Replace affected subquery quals with true.

Author: Alena Rybakina
Reviewers: Ranier Vilela <ranier.vf@gmail.com>,
	   Peter Petrov <p.petrov@postgrespro.ru>,
	   Ilia Evdokimov <ilya.evdokimov@tantorlabs.com>
---
 src/backend/optimizer/plan/subselect.c        | 289 ++++++-
 src/test/regress/expected/subselect.out       | 791 ++++++++++++++++++
 src/test/regress/expected/updatable_views.out |  10 +-
 src/test/regress/sql/subselect.sql            | 443 ++++++++++
 4 files changed, 1487 insertions(+), 46 deletions(-)

diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 14192a13236..f57430a0429 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -1440,6 +1440,237 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	return result;
 }
 
+typedef struct HoistJoinQualsContext
+{
+	List *outer_clauses;   /* collect hoisted clauses */
+	Relids observed_nulltest_vars;
+} HoistJoinQualsContext;
+
+static Node *
+preprocess_quals(Node *node)
+{
+	/*
+	 * Run const-folding without planner context.
+	 *
+	 * IMPORTANT: Pass NULL as PlannerInfo here because we’re simplifying
+	 * a *subquery’s* quals before its rtable has been merged with the
+	 * parent. If we passed a non-NULL root, eval_const_expressions()
+	 * could perform root-dependent transforms (e.g., fold NullTest on Var
+	 * using var_is_nonnullable) against the *wrong* rangetable, risking
+	 * out-of-bounds RTE access. See eval_const_expressions()’s contract:
+	 * “root can be passed as NULL …” for exactly this use-case.
+	 */
+	node = eval_const_expressions(NULL, node);
+	node = (Node *) canonicalize_qual((Expr *) node, false);
+
+	node = (Node *) make_ands_implicit((Expr *) node);
+
+	return node;
+}
+
+static NullTest *
+make_nulltest(Var *var, NullTestType type)
+{
+	NullTest *nulltest = makeNode(NullTest);
+	nulltest->arg = (Expr *) var;
+	nulltest->nulltesttype = type;
+	nulltest->argisrow = false;
+	nulltest->location = -1;
+
+	return nulltest;
+}
+
+static bool
+simplicity_check_walker(Node *node, void *ctx)
+{
+	if (node == NULL)
+	{
+		return false;
+	}
+	else if(IsA(node, Var))
+		return true;
+	else if(IsA(node, Query))
+		return query_tree_walker((Query *) node,
+								 simplicity_check_walker,
+								 (void*) ctx,
+								 QTW_EXAMINE_RTES_BEFORE);
+
+	return expression_tree_walker(node, simplicity_check_walker,
+								  (void *) ctx);
+}
+
+static List *
+generate_not_null_exprs(List *list_expr, Relids *observed_vars)
+{
+	ListCell *lc;
+	List *result = NIL;
+
+	foreach(lc, list_expr)
+	{
+		Node *node = (Node *) lfirst(lc);
+
+		if (IsA(node, OpExpr))
+		{
+			Node *larg = get_leftop(node);
+			Node *rarg = get_rightop(node);
+
+			if (IsA(larg, RelabelType))
+				larg = (Node *) ((RelabelType *) larg)->arg;
+
+			if (IsA(rarg, RelabelType))
+				rarg = (Node *) ((RelabelType *) rarg)->arg;
+
+			if(IsA(larg, Var))
+			{
+				Var *var = (Var *) larg;
+				if (!bms_is_member(var->varno, *observed_vars) && var->varlevelsup == 1)
+				{
+					NullTest *nulltest = make_nulltest(var, IS_NOT_NULL);
+					result = lappend(result, nulltest);
+					*observed_vars = bms_add_member(*observed_vars, var->varno);
+					continue;
+				}
+			}
+
+			if(IsA(rarg, Var))
+			{
+				Var *var = (Var *) rarg;
+				if (!bms_is_member(var->varno, *observed_vars) && var->varlevelsup == 1)
+				{
+					NullTest *nulltest = make_nulltest(var, IS_NOT_NULL);
+					result = lappend(result, nulltest);
+					*observed_vars = bms_add_member(*observed_vars, var->varno);
+					continue;
+				}
+			}
+		}
+	}
+
+	return result;
+}
+
+static Node *
+hoist_parent_quals_jointree_mutator(Node *jtnode, HoistJoinQualsContext *context)
+{
+	if (jtnode == NULL)
+		return NULL;
+
+	if (IsA(jtnode, RangeTblRef))
+		return jtnode;  /* nothing to change */
+
+	if (IsA(jtnode, JoinExpr))
+	{
+		JoinExpr *j = (JoinExpr *) jtnode;
+		JoinExpr *newj = makeNode(JoinExpr);
+		ListCell *lc;
+		List *join_clauses = NIL;
+		Node *qual;
+		memcpy(newj, j, sizeof(JoinExpr));
+
+		/* Recurse into join inputs */
+		newj->larg = (Node *) hoist_parent_quals_jointree_mutator(j->larg, context);
+		newj->rarg = (Node *) hoist_parent_quals_jointree_mutator(j->rarg, context);
+
+		if(contain_volatile_functions(newj->quals) ||
+								newj->larg == NULL ||
+								newj->rarg == NULL)
+			return NULL;
+
+		qual = newj->quals;
+		qual = preprocess_quals(qual);
+
+		foreach(lc, (List *) qual)
+		{
+			Node *node = (Node *) lfirst(lc);
+
+			if (IsA(node, OpExpr))
+			{
+				if(simplicity_check_walker(get_leftop(node), NULL) &&
+						simplicity_check_walker(get_rightop(node), NULL))
+				{
+					join_clauses = lappend(join_clauses, node);
+					continue;
+				}
+			}
+			context->outer_clauses = lappend(context->outer_clauses, node);
+		}
+
+		/* Only touch INNER JOINs */
+		if ((j->jointype != JOIN_LEFT &&
+			 j->jointype != JOIN_RIGHT &&
+			 j->jointype != JOIN_FULL))  /* subquery vars */
+		{
+			List *null_tests;
+
+			if (join_clauses == NIL)  /* subquery vars */
+			{
+				newj->quals = (Node *) makeBoolConst(true, false);
+			}
+			else if(join_clauses != NIL && contain_vars_of_level((Node *) join_clauses, 1))
+			{
+				null_tests = generate_not_null_exprs(join_clauses, &context->observed_nulltest_vars);
+				context->outer_clauses = list_concat(context->outer_clauses, null_tests);
+				context->outer_clauses = list_concat(context->outer_clauses, join_clauses);
+				newj->quals = (Node *) makeBoolConst(true, false);
+			}
+			else
+			{
+				newj->quals = (Node *) make_ands_explicit(join_clauses);
+			}
+		}
+		else
+		{
+			if (contain_vars_of_level(j->quals, 1))
+				return NULL;
+		}
+
+		return (Node *) newj;
+	}
+
+	if (IsA(jtnode, FromExpr))
+	{
+		FromExpr *f = (FromExpr *) jtnode;
+		FromExpr *newf = makeNode(FromExpr);
+		ListCell *lc;
+		List *fromlist = NIL;
+
+		/* Recurse into fromlist */
+		memcpy(newf, f, sizeof(FromExpr));
+
+		/*
+		 * Process children, if any of their jointree contains Vars of the
+		 * parent query or quals of their JoinExpr contains volatile functions
+		 * then exit
+		 */
+		foreach(lc, newf->fromlist)
+		{
+			Node *fnode = hoist_parent_quals_jointree_mutator(lfirst(lc), context);
+
+			if (fnode == NULL)
+				return NULL;
+			fromlist = lappend(fromlist, fnode);
+		}
+
+		newf->fromlist = fromlist;
+
+		if(contain_volatile_functions(newf->quals))
+			return NULL;
+
+		if(newf->quals)
+		{
+			Node *qual = newf->quals;
+			/* Quals (WHERE clause) may still contain sublinks etc */
+			qual = preprocess_quals(qual);
+			context->outer_clauses = list_concat(context->outer_clauses, (List *) qual);
+			newf->quals = NULL;
+		}
+
+		return (Node *) newf;
+	}
+
+	return jtnode;  /* quiet compiler */
+}
+
 /*
  * convert_EXISTS_sublink_to_join: try to convert an EXISTS SubLink to a join
  *
@@ -1454,12 +1685,13 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	JoinExpr   *result;
 	Query	   *parse = root->parse;
 	Query	   *subselect = (Query *) sublink->subselect;
-	Node	   *whereClause;
 	PlannerInfo subroot;
 	int			rtoffset;
 	int			varno;
 	Relids		clause_varnos;
 	Relids		upper_varnos;
+	List 	   *newWhere = NIL;
+	HoistJoinQualsContext hjq_context = {NIL, NULL};
 
 	Assert(sublink->subLinkType == EXISTS_SUBLINK);
 
@@ -1489,34 +1721,6 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	if (!simplify_EXISTS_query(root, subselect))
 		return NULL;
 
-	/*
-	 * Separate out the WHERE clause.  (We could theoretically also remove
-	 * top-level plain JOIN/ON clauses, but it's probably not worth the
-	 * trouble.)
-	 */
-	whereClause = subselect->jointree->quals;
-	subselect->jointree->quals = NULL;
-
-	/*
-	 * The rest of the sub-select must not refer to any Vars of the parent
-	 * query.  (Vars of higher levels should be okay, though.)
-	 */
-	if (contain_vars_of_level((Node *) subselect, 1))
-		return NULL;
-
-	/*
-	 * On the other hand, the WHERE clause must contain some Vars of the
-	 * parent query, else it's not gonna be a join.
-	 */
-	if (!contain_vars_of_level(whereClause, 1))
-		return NULL;
-
-	/*
-	 * We don't risk optimizing if the WHERE clause is volatile, either.
-	 */
-	if (contain_volatile_functions(whereClause))
-		return NULL;
-
 	/*
 	 * Scan the rangetable for relation RTEs and retrieve the necessary
 	 * catalog information for each relation.  Using this information, clear
@@ -1537,13 +1741,17 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	subroot.type = T_PlannerInfo;
 	subroot.glob = root->glob;
 	subroot.parse = subselect;
-	subselect->jointree->quals = whereClause;
 	subselect = preprocess_relation_rtes(&subroot);
 
-	/*
-	 * Now separate out the WHERE clause again.
-	 */
-	whereClause = subselect->jointree->quals;
+	subselect->jointree = (FromExpr * ) hoist_parent_quals_jointree_mutator((Node *) subselect->jointree, &hjq_context);
+
+	if(subselect->jointree == NULL || hjq_context.outer_clauses == NIL)
+		return NULL;
+
+	newWhere = hjq_context.outer_clauses;
+
+	bms_free(hjq_context.observed_nulltest_vars);
+
 	subselect->jointree->quals = NULL;
 
 	/*
@@ -1568,7 +1776,7 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	 */
 	rtoffset = list_length(parse->rtable);
 	OffsetVarNodes((Node *) subselect, rtoffset, 0);
-	OffsetVarNodes(whereClause, rtoffset, 0);
+	OffsetVarNodes((Node *) newWhere, rtoffset, 0);
 
 	/*
 	 * Upper-level vars in subquery will now be one level closer to their
@@ -1576,7 +1784,7 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	 * becomes level zero.
 	 */
 	IncrementVarSublevelsUp((Node *) subselect, -1, 1);
-	IncrementVarSublevelsUp(whereClause, -1, 1);
+	IncrementVarSublevelsUp((Node *) newWhere, -1, 1);
 
 	/*
 	 * Now that the WHERE clause is adjusted to match the parent query
@@ -1584,7 +1792,7 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	 * The ones <= rtoffset belong to the upper query; the ones > rtoffset do
 	 * not.
 	 */
-	clause_varnos = pull_varnos(root, whereClause);
+	clause_varnos = pull_varnos(root, (Node *) newWhere);
 	upper_varnos = NULL;
 	varno = -1;
 	while ((varno = bms_next_member(clause_varnos, varno)) >= 0)
@@ -1593,7 +1801,6 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 			upper_varnos = bms_add_member(upper_varnos, varno);
 	}
 	bms_free(clause_varnos);
-	Assert(!bms_is_empty(upper_varnos));
 
 	/*
 	 * Now that we've got the set of upper-level varnos, we can make the last
@@ -1607,7 +1814,7 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	 * adds subquery's RTEPermissionInfos into the upper query.
 	 */
 	CombineRangeTables(&parse->rtable, &parse->rteperminfos,
-					   subselect->rtable, subselect->rteperminfos);
+					subselect->rtable, subselect->rteperminfos);
 
 	/*
 	 * And finally, build the JoinExpr node.
@@ -1616,16 +1823,18 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	result->jointype = under_not ? JOIN_ANTI : JOIN_SEMI;
 	result->isNatural = false;
 	result->larg = NULL;		/* caller must fill this in */
+	
 	/* flatten out the FromExpr node if it's useless */
 	if (list_length(subselect->jointree->fromlist) == 1)
 		result->rarg = (Node *) linitial(subselect->jointree->fromlist);
 	else
 		result->rarg = (Node *) subselect->jointree;
+	
 	result->usingClause = NIL;
 	result->join_using_alias = NULL;
-	result->quals = whereClause;
 	result->alias = NULL;
 	result->rtindex = 0;		/* we don't need an RTE for it */
+	result->quals = (Node *) make_ands_explicit(newWhere);
 
 	return result;
 }
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index cf6b32d1173..e49866bf1c9 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -1038,6 +1038,797 @@ where exists (
   where road.name = ss.f1 );
 rollback;
 --
+-- Test case for exist sublink where we can consider some undependent expression
+-- with outer link
+--
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+   ->  Nested Loop
+         ->  Index Only Scan using tb_pkey on tb
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tc
+(6 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+   ->  Nested Loop
+         ->  Index Only Scan using tc_pkey on tc
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tb
+(6 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON 1 = 1
+  WHERE ta.id = tc.id
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+   ->  Nested Loop
+         ->  Index Only Scan using tc_pkey on tc
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tb
+(6 rows)
+
+-- Join compound expression
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+           AND ta.id = tb.id
+);
+             QUERY PLAN             
+------------------------------------
+ Hash Right Semi Join
+   Hash Cond: (tc.id = ta.id)
+   ->  Hash Join
+         Hash Cond: (tb.id = tc.id)
+         ->  Seq Scan on tb
+         ->  Hash
+               ->  Seq Scan on tc
+   ->  Hash
+         ->  Seq Scan on ta
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON tb.aval = tc.aid
+           AND tb.aval = ta1.id
+);
+                     QUERY PLAN                     
+----------------------------------------------------
+ Hash Join
+   Hash Cond: (ta1.id = tb.aval)
+   ->  Seq Scan on ta ta1
+   ->  Hash
+         ->  Unique
+               ->  Merge Join
+                     Merge Cond: (tb.aval = tc.aid)
+                     ->  Sort
+                           Sort Key: tb.aval
+                           ->  Seq Scan on tb
+                     ->  Sort
+                           Sort Key: tc.aid
+                           ->  Seq Scan on tc
+(13 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+JOIN tb ON true
+WHERE EXISTS (
+  SELECT 1
+  FROM tb tb1
+  JOIN tc ON ta.id = tb.id
+);
+             QUERY PLAN             
+------------------------------------
+ Nested Loop Semi Join
+   ->  Hash Join
+         Hash Cond: (ta.id = tb.id)
+         ->  Seq Scan on ta
+         ->  Hash
+               ->  Seq Scan on tb
+   ->  Nested Loop
+         ->  Seq Scan on tb tb1
+         ->  Materialize
+               ->  Seq Scan on tc
+(10 rows)
+
+-- Compound expression with const type or other type of expressions
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+           AND ta.id = 1
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Index Only Scan using ta_pkey on ta
+         Index Cond: (id = 1)
+   ->  Nested Loop
+         ->  Index Only Scan using tc_pkey on tc
+               Index Cond: (id = 1)
+         ->  Seq Scan on tb
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+           AND tb.id = 1
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Hash Right Semi Join
+   Hash Cond: (tc.id = ta.id)
+   ->  Nested Loop
+         ->  Index Only Scan using tb_pkey on tb
+               Index Cond: (id = 1)
+         ->  Seq Scan on tc
+   ->  Hash
+         ->  Seq Scan on ta
+(8 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  RIGHT JOIN tc ON ta.id = tc.id
+  WHERE ta.val = 1
+);
+                  QUERY PLAN                  
+----------------------------------------------
+ Seq Scan on ta
+   Filter: EXISTS(SubPlan exists_1)
+   SubPlan exists_1
+     ->  Result
+           One-Time Filter: (ta.val = 1)
+           ->  Nested Loop Left Join
+                 Join Filter: (ta.id = tc.id)
+                 ->  Seq Scan on tc
+                 ->  Materialize
+                       ->  Seq Scan on tb
+(10 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+           AND tb.aval = ANY ('{1}'::int[])
+);
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Hash Join
+   Hash Cond: (ta.id = tb.id)
+   ->  Seq Scan on ta
+   ->  Hash
+         ->  HashAggregate
+               Group Key: tb.id
+               ->  Nested Loop
+                     ->  Seq Scan on tc
+                     ->  Materialize
+                           ->  Seq Scan on tb
+                                 Filter: (aval = ANY ('{1}'::integer[]))
+(11 rows)
+
+-- Exists SubLink expression within expression
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tb ON ta.id = ta1.id
+           AND ta1.val = 1
+  WHERE EXISTS (
+    SELECT 1
+    FROM ta ta2
+    WHERE ta2.id = ta1.id
+  )
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta ta2
+         Filter: (val = 1)
+   ->  Nested Loop
+         ->  Index Only Scan using ta_pkey on ta
+               Index Cond: (id = ta2.id)
+         ->  Seq Scan on tb
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tb ON ta.val = ta1.id
+           AND ta1.id = 1
+  WHERE EXISTS (
+    SELECT 1
+    FROM ta ta2
+    WHERE ta2.id = ta.id
+  )
+);
+                  QUERY PLAN                   
+-----------------------------------------------
+ Nested Loop Semi Join
+   ->  Index Only Scan using ta_pkey on ta ta1
+         Index Cond: (id = 1)
+   ->  Nested Loop
+         ->  Seq Scan on tb
+         ->  Materialize
+               ->  Seq Scan on ta ta2
+                     Filter: (val = 1)
+(8 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  WHERE ta.id = tb.id
+    AND EXISTS (
+      SELECT 1
+      FROM tc
+      WHERE tc.id = tb.id
+        AND tc.aid + tb.aval > 0
+    )
+);
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Hash Semi Join
+   Hash Cond: (ta.id = tc.id)
+   ->  Seq Scan on ta
+   ->  Hash
+         ->  Hash Join
+               Hash Cond: (tb.id = tc.id)
+               Join Filter: ((tc.aid + tb.aval) > 0)
+               ->  Seq Scan on tb
+               ->  Hash
+                     ->  Seq Scan on tc
+(10 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  WHERE ta.id = tb.id
+    AND EXISTS (
+      SELECT 1
+      FROM tc
+      WHERE tc.id = tb.id
+        AND tc.aid + ta.val > 0
+    )
+);
+               QUERY PLAN                
+-----------------------------------------
+ Hash Join
+   Hash Cond: (ta.id = tb.id)
+   Join Filter: EXISTS(SubPlan exists_1)
+   ->  Seq Scan on ta
+   ->  Hash
+         ->  Seq Scan on tb
+   SubPlan exists_1
+     ->  Index Scan using tc_pkey on tc
+           Index Cond: (id = tb.id)
+           Filter: ((aid + ta.val) > 0)
+(10 rows)
+
+-- Check with NULL and NOT NULL expressions
+ALTER TABLE ta ADD COLUMN is_active bool;
+UPDATE ta SET is_active = true;
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND COALESCE(ta.is_active, true)
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+         Filter: COALESCE(is_active, true)
+   ->  Nested Loop
+         ->  Index Only Scan using tb_pkey on tb
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tc
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM tb
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tc ON ta.id = tb.id
+         AND COALESCE(ta.is_active, true)
+);
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Merge Join
+   Merge Cond: (tb.id = ta.id)
+   ->  Index Only Scan using tb_pkey on tb
+   ->  Unique
+         ->  Nested Loop
+               ->  Index Scan using ta_pkey on ta
+                     Filter: COALESCE(is_active, true)
+               ->  Materialize
+                     ->  Seq Scan on tc
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND CASE
+               WHEN ta.is_active THEN true
+               ELSE false
+             END = true
+);
+                          QUERY PLAN                          
+--------------------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+         Filter: CASE WHEN is_active THEN true ELSE false END
+   ->  Nested Loop
+         ->  Index Only Scan using tb_pkey on tb
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tc
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM tb
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tc ON ta.id = tb.id
+         AND CASE
+               WHEN ta.is_active THEN true
+               ELSE false
+             END = true
+);
+                                QUERY PLAN                                
+--------------------------------------------------------------------------
+ Merge Join
+   Merge Cond: (tb.id = ta.id)
+   ->  Index Only Scan using tb_pkey on tb
+   ->  Unique
+         ->  Nested Loop
+               ->  Index Scan using ta_pkey on ta
+                     Filter: CASE WHEN is_active THEN true ELSE false END
+               ->  Materialize
+                     ->  Seq Scan on tc
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND ta.is_active
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+         Filter: is_active
+   ->  Nested Loop
+         ->  Index Only Scan using tb_pkey on tb
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tc
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND ta.is_active IS NOT NULL
+);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+         Filter: (is_active IS NOT NULL)
+   ->  Nested Loop
+         ->  Index Only Scan using tb_pkey on tb
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tc
+(7 rows)
+
+-- Disabled pull up because it is applcapable for INNER JOIN connection
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  RIGHT JOIN tc ON ta.id = tc.id
+);
+               QUERY PLAN               
+----------------------------------------
+ Seq Scan on ta
+   Filter: EXISTS(SubPlan exists_1)
+   SubPlan exists_1
+     ->  Nested Loop Left Join
+           Join Filter: (ta.id = tc.id)
+           ->  Seq Scan on tc
+           ->  Materialize
+                 ->  Seq Scan on tb
+(8 rows)
+
+-- Disable pull-up due to lack of the outer var
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON tc.id = tb.id
+);
+                    QUERY PLAN                     
+---------------------------------------------------
+ Result
+   One-Time Filter: (InitPlan exists_1).col1
+   InitPlan exists_1
+     ->  Nested Loop
+           ->  Seq Scan on tb
+           ->  Index Only Scan using tc_pkey on tc
+                 Index Cond: (id = tb.id)
+   ->  Seq Scan on ta
+(8 rows)
+
+CREATE TABLE td (id int, tc_id bytea, val int);
+INSERT INTO td
+SELECT g.id, 'Test1'::bytea AS tc_id, 6 AS val
+FROM generate_series(1, 25) AS g(id)
+UNION ALL
+SELECT g.id, 'Test2'::bytea AS tc_id, 7 AS val
+FROM generate_series(26, 50) AS g(id)
+UNION ALL
+SELECT g.id, 'Test4'::bytea AS tc_id, 6 AS val
+FROM generate_series(51, 75) AS g(id)
+UNION ALL
+SELECT g.id, 'Test5'::bytea AS tc_id, 7 AS val
+FROM generate_series(76, 100) AS g(id);
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+     WHERE tb.id = ta.id AND
+           EXISTS
+        (SELECT 1
+           FROM tc
+          WHERE tc.id = tb.id)
+               );
+             QUERY PLAN             
+------------------------------------
+ Hash Right Semi Join
+   Hash Cond: (tc.id = ta.id)
+   ->  Hash Join
+         Hash Cond: (tb.id = tc.id)
+         ->  Seq Scan on tb
+         ->  Hash
+               ->  Seq Scan on tc
+   ->  Hash
+         ->  Seq Scan on ta
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+     WHERE tb.id = ta.id AND
+           EXISTS
+        (SELECT 1
+           FROM tc
+          WHERE tc.id = ta.id)
+               );
+             QUERY PLAN             
+------------------------------------
+ Hash Join
+   Hash Cond: (tc.id = tb.id)
+   ->  Hash Join
+         Hash Cond: (tc.id = ta.id)
+         ->  Seq Scan on tc
+         ->  Hash
+               ->  Seq Scan on ta
+   ->  Hash
+         ->  Seq Scan on tb
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+     WHERE tb.id = ta.id
+       AND EXISTS
+              (SELECT 1
+                 FROM tc
+               WHERE tb.id = ta.id)
+                );
+                 QUERY PLAN                 
+--------------------------------------------
+ Hash Join
+   Hash Cond: (tb.id = ta.id)
+   Join Filter: EXISTS(SubPlan exists_1)
+   ->  Seq Scan on tb
+   ->  Hash
+         ->  Seq Scan on ta
+   SubPlan exists_1
+     ->  Result
+           One-Time Filter: (tb.id = ta.id)
+           ->  Seq Scan on tc
+(10 rows)
+
+explain (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+        join tc on tc.id = ta.id
+          AND EXISTS (
+                SELECT 1
+                  FROM td
+                WHERE td.id = ta.id)
+                );
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Hash Join
+   Hash Cond: (ta.id = td.id)
+   ->  Nested Loop Semi Join
+         ->  Seq Scan on ta
+         ->  Nested Loop
+               ->  Index Only Scan using tc_pkey on tc
+                     Index Cond: (id = ta.id)
+               ->  Seq Scan on tb
+   ->  Hash
+         ->  HashAggregate
+               Group Key: td.id
+               ->  Seq Scan on td
+(12 rows)
+
+explain (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+        join tc on tc.id = ta.id
+          AND EXISTS (
+                SELECT 1
+                  FROM td
+                WHERE tb.id = ta.id)
+                );
+                    QUERY PLAN                    
+--------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on ta
+   ->  Nested Loop
+         Join Filter: EXISTS(SubPlan exists_1)
+         ->  Index Only Scan using tc_pkey on tc
+               Index Cond: (id = ta.id)
+         ->  Seq Scan on tb
+         SubPlan exists_1
+           ->  Result
+                 One-Time Filter: (tb.id = ta.id)
+                 ->  Seq Scan on td
+(11 rows)
+
+CREATE TABLE te (id int, tc_id bytea, val int);
+INSERT INTO te SELECT * FROM td;
+EXPLAIN (COSTS OFF)
+SELECT t1.*
+FROM ta t1
+WHERE EXISTS (
+  SELECT 1
+  FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+  JOIN tb t2 ON t2.id = t1.id
+  WHERE EXISTS (
+    SELECT 1
+    FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+    JOIN td t3 ON t3.tc_id IN ('Test1'::bytea, 'Test2'::bytea)
+    WHERE EXISTS (
+      SELECT 1
+      FROM te t4
+      WHERE t4.tc_id = t3.tc_id
+        AND t4.val = t2.aval
+    ) = EXISTS (
+      SELECT 1
+      FROM tc t5
+      WHERE t5.id = t3.id
+    )
+  )
+);
+                                                                                 QUERY PLAN                                                                                 
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Merge Semi Join
+   Merge Cond: (t1.id = t2.id)
+   ->  Index Scan using ta_pkey on ta t1
+   ->  Nested Loop Semi Join
+         Join Filter: ((ANY ((t3.tc_id = (hashed SubPlan exists_2).col1) AND (t2.aval = (hashed SubPlan exists_2).col2))) = (ANY (t3.id = (hashed SubPlan exists_4).col1)))
+         ->  Index Scan using tb_pkey on tb t2
+         ->  Materialize
+               ->  Seq Scan on td t3
+                     Filter: (tc_id = ANY ('{"\\x5465737431","\\x5465737432"}'::bytea[]))
+         SubPlan exists_2
+           ->  Seq Scan on te t4
+         SubPlan exists_4
+           ->  Seq Scan on tc t5
+(13 rows)
+
+EXPLAIN
+SELECT ta.*
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON tc.id = tb.id
+         AND tb.id = ta.id
+  JOIN td ON td.id = tc.id
+);
+                                  QUERY PLAN                                  
+------------------------------------------------------------------------------
+ Hash Right Semi Join  (cost=181.20..219.35 rows=1100 width=9)
+   Hash Cond: (td.id = ta.id)
+   ->  Hash Join  (cost=121.70..150.02 rows=1200 width=12)
+         Hash Cond: (td.id = tc.id)
+         ->  Hash Join  (cost=60.85..86.01 rows=1200 width=8)
+               Hash Cond: (td.id = tb.id)
+               ->  Seq Scan on td  (cost=0.00..22.00 rows=1200 width=4)
+               ->  Hash  (cost=32.60..32.60 rows=2260 width=4)
+                     ->  Seq Scan on tb  (cost=0.00..32.60 rows=2260 width=4)
+         ->  Hash  (cost=32.60..32.60 rows=2260 width=4)
+               ->  Seq Scan on tc  (cost=0.00..32.60 rows=2260 width=4)
+   ->  Hash  (cost=32.00..32.00 rows=2200 width=9)
+         ->  Seq Scan on ta  (cost=0.00..32.00 rows=2200 width=9)
+(13 rows)
+
+DROP TABLE td, te;
+CREATE TABLE tst1 (id int, mes varchar(20));
+CREATE TABLE tst2 (id int, tst1_id int, type_id int);
+CREATE TABLE tst3 (id bytea);
+CREATE TABLE tst4 (id int, tst3_id bytea, type_id int);
+CREATE TABLE tst5 (id int, tst3_id bytea, type_id int);
+INSERT INTO tst1 VALUES (1, 'test1');
+INSERT INTO tst1 VALUES (2, 'test2');
+INSERT INTO tst1 VALUES (3, 'test3');
+INSERT INTO tst2 VALUES (1, 2, 7);
+INSERT INTO tst2 VALUES (1, 2, 6);
+INSERT INTO tst2 VALUES (2, 3, 7);
+INSERT INTO tst3
+SELECT ('Test' || g.id)::bytea AS id
+   FROM generate_series(1, 5) AS g(id);
+INSERT INTO tst4
+(SELECT g.id, 'Test1'::bytea AS tst3_id, 6 AS type_id
+   FROM generate_series(1, 25) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test2'::bytea AS tst3_id, 7 AS type_id
+   FROM generate_series(26, 50) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test4'::bytea AS tst3_id, 6 AS type_id
+   FROM generate_series(51, 75) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test5'::bytea AS tst3_id, 7 AS type_id
+   FROM generate_series(76, 100) AS g(id));
+INSERT INTO tst5 SELECT * FROM tst4;
+VACUUM (ANALYZE) tst1, tst2, tst3, tst4, tst5;
+-- Case with two exists in OpExpr, in the first one t3.id is the reference to the parent query
+-- and t2.type-id is the reference to grandparent query
+EXPLAIN (COSTS OFF)
+SELECT t1.*
+FROM tst1 t1
+WHERE EXISTS (
+  SELECT 1
+  FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+  JOIN tst2 t2 ON t2.tst1_id = t1.id
+  WHERE EXISTS (
+    SELECT 1
+    FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+    JOIN tst3 t3 ON t3.id IN ('Test1'::bytea, 'Test2'::bytea)
+    WHERE EXISTS (
+      SELECT 1
+      FROM tst4 t4
+      WHERE t4.tst3_id = t3.id
+        AND t4.type_id = t2.type_id
+    ) = EXISTS (
+      SELECT 1
+      FROM tst5 t5
+      WHERE t5.tst3_id = t3.id
+    )
+  )
+);
+                                      QUERY PLAN                                       
+---------------------------------------------------------------------------------------
+ Hash Right Semi Join
+   Hash Cond: (t2.tst1_id = t1.id)
+   ->  Nested Loop Semi Join
+         Join Filter: (EXISTS(SubPlan exists_1) = EXISTS(SubPlan exists_3))
+         ->  Seq Scan on tst2 t2
+         ->  Materialize
+               ->  Seq Scan on tst3 t3
+                     Filter: (id = ANY ('{"\\x5465737431","\\x5465737432"}'::bytea[]))
+         SubPlan exists_1
+           ->  Seq Scan on tst4 t4
+                 Filter: ((tst3_id = t3.id) AND (type_id = t2.type_id))
+         SubPlan exists_3
+           ->  Seq Scan on tst5 t5
+                 Filter: (tst3_id = t3.id)
+   ->  Hash
+         ->  Seq Scan on tst1 t1
+               Filter: (id IS NOT NULL)
+(17 rows)
+
+DROP TABLE tst1, tst2, tst3, tst4, tst5;
 -- Test case for sublinks pushed down into subselects via join alias expansion
 --
 select
diff --git a/src/test/regress/expected/updatable_views.out b/src/test/regress/expected/updatable_views.out
index 03df7e75b7b..5ff8db53cf0 100644
--- a/src/test/regress/expected/updatable_views.out
+++ b/src/test/regress/expected/updatable_views.out
@@ -3177,14 +3177,12 @@ EXPLAIN (costs off) INSERT INTO rw_view1 VALUES (2, 'New row 2');
          One-Time Filter: ((InitPlan exists_1).col1 IS NOT TRUE)
  
  Update on base_tbl
-   InitPlan exists_1
-     ->  Index Only Scan using base_tbl_pkey on base_tbl t
-           Index Cond: (id = 2)
-   ->  Result
-         One-Time Filter: (InitPlan exists_1).col1
+   ->  Nested Loop Semi Join
          ->  Index Scan using base_tbl_pkey on base_tbl
                Index Cond: (id = 2)
-(15 rows)
+         ->  Index Scan using base_tbl_pkey on base_tbl t
+               Index Cond: (id = 2)
+(13 rows)
 
 INSERT INTO rw_view1 VALUES (2, 'New row 2');
 SELECT * FROM base_tbl;
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql
index 36a8a0aa1d5..40c9c3d884b 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -516,6 +516,449 @@ where exists (
 rollback;
 
 --
+-- Test case for exist sublink where we can consider some undependent expression
+-- with outer link
+--
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON 1 = 1
+  WHERE ta.id = tc.id
+);
+
+-- Join compound expression
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+           AND ta.id = tb.id
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON tb.aval = tc.aid
+           AND tb.aval = ta1.id
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+JOIN tb ON true
+WHERE EXISTS (
+  SELECT 1
+  FROM tb tb1
+  JOIN tc ON ta.id = tb.id
+);
+
+-- Compound expression with const type or other type of expressions
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+           AND ta.id = 1
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tc.id
+           AND tb.id = 1
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  RIGHT JOIN tc ON ta.id = tc.id
+  WHERE ta.val = 1
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+           AND tb.aval = ANY ('{1}'::int[])
+);
+
+-- Exists SubLink expression within expression
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tb ON ta.id = ta1.id
+           AND ta1.val = 1
+  WHERE EXISTS (
+    SELECT 1
+    FROM ta ta2
+    WHERE ta2.id = ta1.id
+  )
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tb ON ta.val = ta1.id
+           AND ta1.id = 1
+  WHERE EXISTS (
+    SELECT 1
+    FROM ta ta2
+    WHERE ta2.id = ta.id
+  )
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  WHERE ta.id = tb.id
+    AND EXISTS (
+      SELECT 1
+      FROM tc
+      WHERE tc.id = tb.id
+        AND tc.aid + tb.aval > 0
+    )
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  WHERE ta.id = tb.id
+    AND EXISTS (
+      SELECT 1
+      FROM tc
+      WHERE tc.id = tb.id
+        AND tc.aid + ta.val > 0
+    )
+);
+
+-- Check with NULL and NOT NULL expressions
+ALTER TABLE ta ADD COLUMN is_active bool;
+UPDATE ta SET is_active = true;
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND COALESCE(ta.is_active, true)
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM tb
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tc ON ta.id = tb.id
+         AND COALESCE(ta.is_active, true)
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND CASE
+               WHEN ta.is_active THEN true
+               ELSE false
+             END = true
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM tb
+WHERE EXISTS (
+  SELECT 1
+  FROM ta
+  JOIN tc ON ta.id = tb.id
+         AND CASE
+               WHEN ta.is_active THEN true
+               ELSE false
+             END = true
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND ta.is_active
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON ta.id = tb.id
+         AND ta.is_active IS NOT NULL
+);
+
+
+-- Disabled pull up because it is applcapable for INNER JOIN connection
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  RIGHT JOIN tc ON ta.id = tc.id
+);
+
+-- Disable pull-up due to lack of the outer var
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON tc.id = tb.id
+);
+
+CREATE TABLE td (id int, tc_id bytea, val int);
+
+INSERT INTO td
+SELECT g.id, 'Test1'::bytea AS tc_id, 6 AS val
+FROM generate_series(1, 25) AS g(id)
+
+UNION ALL
+
+SELECT g.id, 'Test2'::bytea AS tc_id, 7 AS val
+FROM generate_series(26, 50) AS g(id)
+
+UNION ALL
+
+SELECT g.id, 'Test4'::bytea AS tc_id, 6 AS val
+FROM generate_series(51, 75) AS g(id)
+
+UNION ALL
+
+SELECT g.id, 'Test5'::bytea AS tc_id, 7 AS val
+FROM generate_series(76, 100) AS g(id);
+
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+     WHERE tb.id = ta.id AND
+           EXISTS
+        (SELECT 1
+           FROM tc
+          WHERE tc.id = tb.id)
+               );
+
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+     WHERE tb.id = ta.id AND
+           EXISTS
+        (SELECT 1
+           FROM tc
+          WHERE tc.id = ta.id)
+               );
+
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+     WHERE tb.id = ta.id
+       AND EXISTS
+              (SELECT 1
+                 FROM tc
+               WHERE tb.id = ta.id)
+                );
+
+explain (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+        join tc on tc.id = ta.id
+          AND EXISTS (
+                SELECT 1
+                  FROM td
+                WHERE td.id = ta.id)
+                );
+
+explain (COSTS OFF)
+ SELECT ta.id
+   FROM ta
+  WHERE EXISTS (
+    SELECT 1
+      FROM tb
+        join tc on tc.id = ta.id
+          AND EXISTS (
+                SELECT 1
+                  FROM td
+                WHERE tb.id = ta.id)
+                );
+
+CREATE TABLE te (id int, tc_id bytea, val int);
+INSERT INTO te SELECT * FROM td;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.*
+FROM ta t1
+WHERE EXISTS (
+  SELECT 1
+  FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+  JOIN tb t2 ON t2.id = t1.id
+  WHERE EXISTS (
+    SELECT 1
+    FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+    JOIN td t3 ON t3.tc_id IN ('Test1'::bytea, 'Test2'::bytea)
+    WHERE EXISTS (
+      SELECT 1
+      FROM te t4
+      WHERE t4.tc_id = t3.tc_id
+        AND t4.val = t2.aval
+    ) = EXISTS (
+      SELECT 1
+      FROM tc t5
+      WHERE t5.id = t3.id
+    )
+  )
+);
+
+EXPLAIN
+SELECT ta.*
+FROM ta
+WHERE EXISTS (
+  SELECT 1
+  FROM tb
+  JOIN tc ON tc.id = tb.id
+         AND tb.id = ta.id
+  JOIN td ON td.id = tc.id
+);
+
+DROP TABLE td, te;
+
+CREATE TABLE tst1 (id int, mes varchar(20));
+CREATE TABLE tst2 (id int, tst1_id int, type_id int);
+CREATE TABLE tst3 (id bytea);
+CREATE TABLE tst4 (id int, tst3_id bytea, type_id int);
+CREATE TABLE tst5 (id int, tst3_id bytea, type_id int);
+
+INSERT INTO tst1 VALUES (1, 'test1');
+INSERT INTO tst1 VALUES (2, 'test2');
+INSERT INTO tst1 VALUES (3, 'test3');
+INSERT INTO tst2 VALUES (1, 2, 7);
+INSERT INTO tst2 VALUES (1, 2, 6);
+INSERT INTO tst2 VALUES (2, 3, 7);
+
+INSERT INTO tst3
+SELECT ('Test' || g.id)::bytea AS id
+   FROM generate_series(1, 5) AS g(id);
+
+INSERT INTO tst4
+(SELECT g.id, 'Test1'::bytea AS tst3_id, 6 AS type_id
+   FROM generate_series(1, 25) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test2'::bytea AS tst3_id, 7 AS type_id
+   FROM generate_series(26, 50) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test4'::bytea AS tst3_id, 6 AS type_id
+   FROM generate_series(51, 75) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test5'::bytea AS tst3_id, 7 AS type_id
+   FROM generate_series(76, 100) AS g(id));
+INSERT INTO tst5 SELECT * FROM tst4;
+
+VACUUM (ANALYZE) tst1, tst2, tst3, tst4, tst5;
+
+-- Case with two exists in OpExpr, in the first one t3.id is the reference to the parent query
+-- and t2.type-id is the reference to grandparent query
+EXPLAIN (COSTS OFF)
+SELECT t1.*
+FROM tst1 t1
+WHERE EXISTS (
+  SELECT 1
+  FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+  JOIN tst2 t2 ON t2.tst1_id = t1.id
+  WHERE EXISTS (
+    SELECT 1
+    FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+    JOIN tst3 t3 ON t3.id IN ('Test1'::bytea, 'Test2'::bytea)
+    WHERE EXISTS (
+      SELECT 1
+      FROM tst4 t4
+      WHERE t4.tst3_id = t3.id
+        AND t4.type_id = t2.type_id
+    ) = EXISTS (
+      SELECT 1
+      FROM tst5 t5
+      WHERE t5.tst3_id = t3.id
+    )
+  )
+);
+
+DROP TABLE tst1, tst2, tst3, tst4, tst5;
+
 -- Test case for sublinks pushed down into subselects via join alias expansion
 --
 
-- 
2.34.1