Correlated IN/Any Subquery Transformation

Started by Li, Zhengalmost 6 years ago2 messages
#1Li, Zheng
zhelli@amazon.com
1 attachment(s)

Hi PGHackers:

Currently, correlated IN/Any subquery always gets planned as a SubPlan which leads to poor performance:
postgres=# explain (costs off) select count(*) from s where s.n in (select l.n from l where l.u != s.u);
QUERY PLAN
------------------------------------
Aggregate
-> Seq Scan on s
Filter: (SubPlan 1)
SubPlan 1
-> Seq Scan on l
Filter: (u <> s.u)

postgres=# select count() from s where s.n in (select l.n from l where l.u != s.u);
Time: 3419.466 ms (00:03.419)
However, you can rewrite the query using exists which will be executed using join. In this example the join plan is more than 3 orders of magnitudes faster than the SubPlan:
postgres=# explain (costs off) select count(*) from s where exists (select 1 from l where l.n = s.n and l.u != s.u);
QUERY PLAN
---------------------------------------
Aggregate
-> Merge Semi Join
Merge Cond: (s.n = l.n)
Join Filter: (l.u <> s.u)
-> Index Scan using s_n on s
-> Index Scan using l_n on l

postgres=# select count() from s where exists (select 1 from l where l.n = s.n and l.u != s.u);
Time: 1.188 ms

Table s has 10 rows, table l has 1, 000, 000 rows.

This patch enables correlated IN/Any subquery to be transformed to join, the transformation is allowed only when the correlated Var is in the where clause of the subquery. It covers the most common correlated cases and follows the same criteria that is followed by the correlated Exists transformation code.

Here is the new query plan for the same correlated IN query:
postgres=# explain (costs off) select count(*) from s where s.n in (select l.n from l where l.u != s.u);
QUERY PLAN

Aggregate
-> Merge Semi Join
Merge Cond: (s.n = l.n)
Join Filter: (l.u <> s.u)
-> Index Scan using s_n on s
-> Index Scan using l_n on l

postgres=# select count(*) from s where s.n in (select l.n from l where l.u != s.u);
Time: 1.693 ms
________________________________
Also the patch introduces a new GUC enable_correlated_any_transform (on by default) to guard the optimization. Test cases are included in the patch. Comments are welcome!

-----------
Zheng Li
AWS, Amazon Aurora PostgreSQL

Attachments:

correlated_any_transformation.patchapplication/octet-stream; name=correlated_any_transformation.patchDownload
commit c539a2fd1a83ac86a0fb5474965bc55be24414d8
Author: Zheng Li <zhelli@amazon.com>
Date:   Fri Mar 20 15:33:14 2020 +0000

    Allow correlated IN/NOT IN subquery to be converted to join. Add a new GUC for the optimization.

diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 3650e8329d..fbbb0ad464 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -39,6 +39,8 @@
 #include "utils/syscache.h"
 
 
+bool enable_correlated_any_transform;
+
 typedef struct convert_testexpr_context
 {
 	PlannerInfo *root;
@@ -1222,16 +1224,31 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	RangeTblRef *rtr;
 	List	   *subquery_vars;
 	Node	   *quals;
+	Node       *whereClause;
 	ParseState *pstate;
 
 	Assert(sublink->subLinkType == ANY_SUBLINK);
 
+	whereClause = subselect->jointree->quals;
+	/*
+	 * Separate out the WHERE clause if enable_correlated_any_transform is
+	 * enabled in order to pass the next check.
+	 */
+	if (enable_correlated_any_transform)
+		subselect->jointree->quals = NULL;
+
 	/*
 	 * The sub-select must not refer to any Vars of the parent query. (Vars of
 	 * higher levels should be okay, though.)
 	 */
 	if (contain_vars_of_level((Node *) subselect, 1))
+	{
+		/* Add the whereClause back */
+		subselect->jointree->quals = whereClause;
 		return NULL;
+	}
+
+	subselect->jointree->quals = whereClause;
 
 	/*
 	 * The test expression must contain some Vars of the parent query, else
@@ -1271,6 +1288,16 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 										   false,
 										   false);
 	rte = nsitem->p_rte;
+
+	/*
+	 * Mark the RTE (of type subselect) as lateral if the whereClause contains
+	 * Var from the immediate containing query block, i.e. we will do a lateral
+	 * join since the whereClause needs access to columns that appear before the
+	 * subselect.
+	 */
+	if (contain_vars_of_level((Node *) whereClause, 1))
+		rte->lateral = true;
+
 	parse->rtable = lappend(parse->rtable, rte);
 	rtindex = list_length(parse->rtable);
 
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index af876d1f01..13758a74df 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1059,6 +1059,16 @@ static struct config_bool ConfigureNamesBool[] =
 		true,
 		NULL, NULL, NULL
 	},
+	{
+		{"enable_correlated_any_transform", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Enables the planner to transform correlated ANY Sublink (IN/NOT IN subquery) to JOIN when possible."),
+			NULL,
+			GUC_EXPLAIN
+		},
+		&enable_correlated_any_transform,
+		true,
+		NULL, NULL, NULL
+	},
 	{
 		{"enable_gathermerge", PGC_USERSET, QUERY_TUNING_METHOD,
 			gettext_noop("Enables the planner's use of gather merge plans."),
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 735ba09650..fe1e418bf6 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -60,6 +60,7 @@ extern PGDLLIMPORT bool enable_nestloop;
 extern PGDLLIMPORT bool enable_material;
 extern PGDLLIMPORT bool enable_mergejoin;
 extern PGDLLIMPORT bool enable_hashjoin;
+extern PGDLLIMPORT bool enable_correlated_any_transform;
 extern PGDLLIMPORT bool enable_gathermerge;
 extern PGDLLIMPORT bool enable_partitionwise_join;
 extern PGDLLIMPORT bool enable_partitionwise_aggregate;
diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h
index 5283995df8..8d3a6a2e7f 100644
--- a/src/include/optimizer/optimizer.h
+++ b/src/include/optimizer/optimizer.h
@@ -182,6 +182,7 @@ extern void pull_varattnos(Node *node, Index varno, Bitmapset **varattnos);
 extern List *pull_vars_of_level(Node *node, int levelsup);
 extern bool contain_var_clause(Node *node);
 extern bool contain_vars_of_level(Node *node, int levelsup);
+extern bool contain_vars_of_upper_levels(Node *node, int level);
 extern int	locate_var_of_level(Node *node, int levelsup);
 extern List *pull_var_clause(Node *node, int flags);
 extern Node *flatten_join_alias_vars(Query *query, Node *node);
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 761376b007..494f41bb74 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -5662,8 +5662,8 @@ lateral (select * from int8_tbl t1,
                                      where q2 = (select greatest(t1.q1,t2.q2))
                                        and (select v.id=0)) offset 0) ss2) ss
          where t1.q1 = ss.q2) ss0;
-                           QUERY PLAN                            
------------------------------------------------------------------
+                             QUERY PLAN                              
+---------------------------------------------------------------------
  Nested Loop
    Output: "*VALUES*".column1, t1.q1, t1.q2, ss2.q1, ss2.q2
    ->  Seq Scan on public.int8_tbl t1
@@ -5675,23 +5675,24 @@ lateral (select * from int8_tbl t1,
          ->  Subquery Scan on ss2
                Output: ss2.q1, ss2.q2
                Filter: (t1.q1 = ss2.q2)
-               ->  Seq Scan on public.int8_tbl t2
+               ->  Result
                      Output: t2.q1, t2.q2
-                     Filter: (SubPlan 3)
-                     SubPlan 3
+                     One-Time Filter: $3
+                     InitPlan 2 (returns $3)
                        ->  Result
-                             Output: t3.q2
-                             One-Time Filter: $4
-                             InitPlan 1 (returns $2)
-                               ->  Result
-                                     Output: GREATEST($0, t2.q2)
-                             InitPlan 2 (returns $4)
-                               ->  Result
-                                     Output: ($3 = 0)
-                             ->  Seq Scan on public.int8_tbl t3
-                                   Output: t3.q1, t3.q2
-                                   Filter: (t3.q2 = $2)
-(27 rows)
+                             Output: ($2 = 0)
+                     ->  Nested Loop Semi Join
+                           Output: t2.q1, t2.q2
+                           Join Filter: (t2.q1 = t3.q2)
+                           ->  Seq Scan on public.int8_tbl t2
+                                 Output: t2.q1, t2.q2
+                                 Filter: ((SubPlan 1) = t2.q1)
+                                 SubPlan 1
+                                   ->  Result
+                                         Output: GREATEST($0, t2.q2)
+                           ->  Seq Scan on public.int8_tbl t3
+                                 Output: t3.q1, t3.q2
+(28 rows)
 
 select * from (values (0), (1)) v(id),
 lateral (select * from int8_tbl t1,
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 4c6cd5f146..66e4bad271 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -1614,3 +1614,805 @@ select * from x for update;
    Output: subselect_tbl.f1, subselect_tbl.f2, subselect_tbl.f3
 (2 rows)
 
+CREATE TABLE s (u INTEGER NOT NULL, n INTEGER NULL, nn INTEGER NOT NULL, p VARCHAR(128) NULL);
+insert into s (u, n, nn, p)
+    select
+    generate_series(1,3) as u,
+    generate_series(1,3) as n,
+    generate_series(1,3) as nn,
+    'foo' as p;
+insert into s values(1000002, 1000002, 1000002, 'foofoo');
+UPDATE s set n = NULL WHERE n = 3;
+analyze s;
+CREATE TABLE l (u INTEGER NOT NULL, n INTEGER NULL, nn INTEGER NOT NULL, p VARCHAR(128) NULL);
+insert into l (u, n, nn, p)
+    select
+    generate_series(1,10000 ) as u,
+    generate_series(1,10000 ) as n,
+    generate_series(1,10000 ) as nn,
+    'bar' as p;
+UPDATE l set n = NULL WHERE n = 7;
+CREATE TABLE s1 (u INTEGER NOT NULL, n INTEGER NULL, n1 INTEGER NULL, nn INTEGER NOT NULL, p VARCHAR(128) NULL);
+insert into s1 (u, n, n1, nn, p)
+    select
+    generate_series(1,3) as u,
+    generate_series(1,3) as n,
+    generate_series(1,3) as n1,
+    generate_series(1,3) as nn,
+    'foo' as p;
+insert into s1 values(1000003, 1000003, 1000003, 1000003, 'foofoo');
+insert into s1 values(1003, 1003, 1003, 1003, 'foofoo');
+UPDATE s1 set n = NULL WHERE n = 3;
+UPDATE s1 set n1 = NULL WHERE n = 2;
+UPDATE s1 set n1 = NULL WHERE n1 = 3;
+analyze s1;
+CREATE UNIQUE INDEX l_u ON l (u);
+CREATE INDEX l_n ON l (n);
+CREATE INDEX l_nn ON l (nn);
+analyze l;
+CREATE TABLE empty (u INTEGER NOT NULL, n INTEGER NULL, nn INTEGER NOT NULL, p VARCHAR(128) NULL);
+analyze empty;
+-- Recursive CTE
+CREATE TABLE employees (
+  id serial,
+  name varchar(255),
+  manager_id int
+);
+INSERT INTO employees VALUES (1, 'Mark', null);
+INSERT INTO employees VALUES (2, 'John', 1);
+INSERT INTO employees VALUES (3, 'Dan', 2);
+INSERT INTO employees VALUES (4, 'Clark', 1);
+INSERT INTO employees VALUES (5, 'Linda', 2);
+INSERT INTO employees VALUES (6, 'Willy', 2);
+INSERT INTO employees VALUES (7, 'Barack', 2);
+INSERT INTO employees VALUES (8, 'Elen', 2);
+INSERT INTO employees VALUES (9, 'Kate', 3);
+INSERT INTO employees VALUES (10, 'Terry', 4);
+WITH RECURSIVE managertree AS (
+  SELECT id, name, manager_id
+  FROM employees
+  WHERE id = 2
+  UNION ALL
+  SELECT e.id, e.name, e.manager_id
+  FROM employees e
+  INNER JOIN managertree mtree ON mtree.id = e.manager_id
+)
+SELECT *
+FROM managertree;
+ id |  name  | manager_id 
+----+--------+------------
+  2 | John   |          1
+  3 | Dan    |          2
+  5 | Linda  |          2
+  6 | Willy  |          2
+  7 | Barack |          2
+  8 | Elen   |          2
+  9 | Kate   |          3
+(7 rows)
+
+--test corrrelated IN subquery
+explain (costs off) select count(*) from s where s.u in (select l.u from l where l.n = s.n);
+              QUERY PLAN               
+---------------------------------------
+ Aggregate
+   ->  Nested Loop
+         ->  Seq Scan on s
+         ->  Index Scan using l_n on l
+               Index Cond: (n = s.n)
+               Filter: (s.u = u)
+(6 rows)
+
+select count(*) from s where s.u in (select l.u from l where l.n = s.n);
+ count 
+-------
+     2
+(1 row)
+
+set enable_correlated_any_transform = off;
+explain (costs off) select count(*) from s where s.u in (select l.u from l where l.n = s.n);
+               QUERY PLAN                
+-----------------------------------------
+ Aggregate
+   ->  Seq Scan on s
+         Filter: (SubPlan 1)
+         SubPlan 1
+           ->  Index Scan using l_n on l
+                 Index Cond: (n = s.n)
+(6 rows)
+
+--result should match previous result
+select count(*) from s where s.u in (select l.u from l where l.n = s.n);
+ count 
+-------
+     2
+(1 row)
+
+reset enable_correlated_any_transform;
+explain (costs off) select count(*) from s where s.n in (select l.n from l where l.u != s.u);
+              QUERY PLAN               
+---------------------------------------
+ Aggregate
+   ->  Nested Loop Semi Join
+         ->  Seq Scan on s
+         ->  Index Scan using l_n on l
+               Index Cond: (n = s.n)
+               Filter: (u <> s.u)
+(6 rows)
+
+select count(*) from s where s.n in (select l.n from l where l.u != s.u);
+ count 
+-------
+     0
+(1 row)
+
+set enable_correlated_any_transform = off;
+explain (costs off) select count(*) from s where s.n in (select l.n from l where l.u != s.u);
+             QUERY PLAN             
+------------------------------------
+ Aggregate
+   ->  Seq Scan on s
+         Filter: (SubPlan 1)
+         SubPlan 1
+           ->  Seq Scan on l
+                 Filter: (u <> s.u)
+(6 rows)
+
+--result should match previous result
+select count(*) from s where s.n in (select l.n from l where l.u != s.u);
+ count 
+-------
+     0
+(1 row)
+
+reset enable_correlated_any_transform;
+explain (costs off) select count(*) from s where s.n in (select l.n from l where l.u > s.u);
+              QUERY PLAN               
+---------------------------------------
+ Aggregate
+   ->  Nested Loop Semi Join
+         ->  Seq Scan on s
+         ->  Index Scan using l_n on l
+               Index Cond: (n = s.n)
+               Filter: (u > s.u)
+(6 rows)
+
+select count(*) from s where s.n in (select l.n from l where l.u > s.u);
+ count 
+-------
+     0
+(1 row)
+
+set enable_correlated_any_transform = off;
+explain (costs off) select count(*) from s where s.n in (select l.n from l where l.u > s.u);
+               QUERY PLAN                
+-----------------------------------------
+ Aggregate
+   ->  Seq Scan on s
+         Filter: (SubPlan 1)
+         SubPlan 1
+           ->  Index Scan using l_u on l
+                 Index Cond: (u > s.u)
+(6 rows)
+
+--result should match previous result
+select count(*) from s where s.n in (select l.n from l where l.u > s.u);
+ count 
+-------
+     0
+(1 row)
+
+reset enable_correlated_any_transform;
+--test corrrelated NOT IN subquery
+explain (costs off) select count(*) from s where s.u not in (select l.u from l where l.n = s.n);
+               QUERY PLAN                
+-----------------------------------------
+ Aggregate
+   ->  Seq Scan on s
+         Filter: (NOT (SubPlan 1))
+         SubPlan 1
+           ->  Index Scan using l_n on l
+                 Index Cond: (n = s.n)
+(6 rows)
+
+select count(*) from s where s.u not in (select l.u from l where l.n = s.n);
+ count 
+-------
+     2
+(1 row)
+
+set enable_correlated_any_transform = off;
+explain (costs off) select count(*) from s where s.u not in (select l.u from l where l.n = s.n);
+               QUERY PLAN                
+-----------------------------------------
+ Aggregate
+   ->  Seq Scan on s
+         Filter: (NOT (SubPlan 1))
+         SubPlan 1
+           ->  Index Scan using l_n on l
+                 Index Cond: (n = s.n)
+(6 rows)
+
+--result should match previous result
+select count(*) from s where s.u not in (select l.u from l where l.n = s.n);
+ count 
+-------
+     2
+(1 row)
+
+reset enable_correlated_any_transform;
+explain (costs off) select count(*) from s where s.n not in (select l.n from l where l.u != s.u);
+             QUERY PLAN             
+------------------------------------
+ Aggregate
+   ->  Seq Scan on s
+         Filter: (NOT (SubPlan 1))
+         SubPlan 1
+           ->  Seq Scan on l
+                 Filter: (u <> s.u)
+(6 rows)
+
+select count(*) from s where s.n not in (select l.n from l where l.u != s.u);
+ count 
+-------
+     0
+(1 row)
+
+set enable_correlated_any_transform = off;
+explain (costs off) select count(*) from s where s.n not in (select l.n from l where l.u != s.u);
+             QUERY PLAN             
+------------------------------------
+ Aggregate
+   ->  Seq Scan on s
+         Filter: (NOT (SubPlan 1))
+         SubPlan 1
+           ->  Seq Scan on l
+                 Filter: (u <> s.u)
+(6 rows)
+
+--result should match previous result
+select count(*) from s where s.n not in (select l.n from l where l.u != s.u);
+ count 
+-------
+     0
+(1 row)
+
+reset enable_correlated_any_transform;
+explain (costs off) select count(*) from s where s.n not in (select l.n from l where l.u > s.u);
+               QUERY PLAN                
+-----------------------------------------
+ Aggregate
+   ->  Seq Scan on s
+         Filter: (NOT (SubPlan 1))
+         SubPlan 1
+           ->  Index Scan using l_u on l
+                 Index Cond: (u > s.u)
+(6 rows)
+
+select count(*) from s where s.n not in (select l.n from l where l.u > s.u);
+ count 
+-------
+     1
+(1 row)
+
+set enable_correlated_any_transform = off;
+explain (costs off) select count(*) from s where s.n not in (select l.n from l where l.u > s.u);
+               QUERY PLAN                
+-----------------------------------------
+ Aggregate
+   ->  Seq Scan on s
+         Filter: (NOT (SubPlan 1))
+         SubPlan 1
+           ->  Index Scan using l_u on l
+                 Index Cond: (u > s.u)
+(6 rows)
+
+--result should match previous result
+select count(*) from s where s.n not in (select l.n from l where l.u > s.u);
+ count 
+-------
+     1
+(1 row)
+
+reset enable_correlated_any_transform;
+--correlated empty subquery
+explain (costs off) select count(*) from l where n not in (select n from empty where u != l.u);
+             QUERY PLAN             
+------------------------------------
+ Aggregate
+   ->  Seq Scan on l
+         Filter: (NOT (SubPlan 1))
+         SubPlan 1
+           ->  Seq Scan on empty
+                 Filter: (u <> l.u)
+(6 rows)
+
+select count(*) from l where n not in (select n from empty where u != l.u);
+ count 
+-------
+ 10000
+(1 row)
+
+set enable_correlated_any_transform = off;
+explain (costs off) select count(*) from l where n not in (select n from empty where u != l.u);
+             QUERY PLAN             
+------------------------------------
+ Aggregate
+   ->  Seq Scan on l
+         Filter: (NOT (SubPlan 1))
+         SubPlan 1
+           ->  Seq Scan on empty
+                 Filter: (u <> l.u)
+(6 rows)
+
+select count(*) from l where n not in (select n from empty where u != l.u);
+ count 
+-------
+ 10000
+(1 row)
+
+reset enable_correlated_any_transform;
+--nested correlated in, correlated var is two levels deep
+explain (costs off) select * from s where n in (select n from s1 where n in (select n from l where u != s.u));
+                 QUERY PLAN                  
+---------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on s
+   ->  Subquery Scan on "ANY_subquery"
+         Filter: (s.n = "ANY_subquery".n)
+         ->  Nested Loop Semi Join
+               ->  Seq Scan on s1
+               ->  Index Scan using l_n on l
+                     Index Cond: (n = s1.n)
+                     Filter: (u <> s.u)
+(9 rows)
+
+select * from s where n in (select n from s1 where n in (select n from l where u != s.u));
+ u | n | nn | p 
+---+---+----+---
+(0 rows)
+
+set enable_correlated_any_transform = off;
+explain (costs off) select * from s where n in (select n from s1 where n in (select n from l where u != s.u));
+               QUERY PLAN                
+-----------------------------------------
+ Seq Scan on s
+   Filter: (SubPlan 1)
+   SubPlan 1
+     ->  Nested Loop Semi Join
+           ->  Seq Scan on s1
+           ->  Index Scan using l_n on l
+                 Index Cond: (n = s1.n)
+                 Filter: (u <> s.u)
+(8 rows)
+
+select * from s where n in (select n from s1 where n in (select n from l where u != s.u));
+ u | n | nn | p 
+---+---+----+---
+(0 rows)
+
+reset enable_correlated_any_transform;
+--nested correlated in, correlated var is one level deep
+explain (costs off) select * from s where n in (select n from s1 where n in (select n from l) and u != s.u);
+                    QUERY PLAN                    
+--------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on s
+   ->  Subquery Scan on "ANY_subquery"
+         Filter: (s.n = "ANY_subquery".n)
+         ->  Nested Loop Semi Join
+               ->  Seq Scan on s1
+                     Filter: (u <> s.u)
+               ->  Index Only Scan using l_n on l
+                     Index Cond: (n = s1.n)
+(9 rows)
+
+select * from s where n in (select n from s1 where n in (select n from l) and u != s.u);
+ u | n | nn | p 
+---+---+----+---
+(0 rows)
+
+set enable_correlated_any_transform = off;
+explain (costs off) select * from s where n in (select n from s1 where n in (select n from l) and u != s.u);
+                  QUERY PLAN                  
+----------------------------------------------
+ Seq Scan on s
+   Filter: (SubPlan 1)
+   SubPlan 1
+     ->  Nested Loop Semi Join
+           ->  Seq Scan on s1
+                 Filter: (u <> s.u)
+           ->  Index Only Scan using l_n on l
+                 Index Cond: (n = s1.n)
+(8 rows)
+
+select * from s where n in (select n from s1 where n in (select n from l) and u != s.u);
+ u | n | nn | p 
+---+---+----+---
+(0 rows)
+
+reset enable_correlated_any_transform;
+--nested correlated in, correlated var is both one level and two levels deep
+explain (costs off) select * from s where n in (select n from s1 where n in (select n from l where u != s.u) and u != s.u);
+                 QUERY PLAN                  
+---------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on s
+   ->  Subquery Scan on "ANY_subquery"
+         Filter: (s.n = "ANY_subquery".n)
+         ->  Nested Loop Semi Join
+               ->  Seq Scan on s1
+                     Filter: (u <> s.u)
+               ->  Index Scan using l_n on l
+                     Index Cond: (n = s1.n)
+                     Filter: (u <> s.u)
+(10 rows)
+
+select * from s where n in (select n from s1 where n in (select n from l where u != s.u) and u != s.u);
+ u | n | nn | p 
+---+---+----+---
+(0 rows)
+
+set enable_correlated_any_transform = off;
+explain (costs off) select * from s where n in (select n from s1 where n in (select n from l where u != s.u) and u != s.u);
+               QUERY PLAN                
+-----------------------------------------
+ Seq Scan on s
+   Filter: (SubPlan 1)
+   SubPlan 1
+     ->  Nested Loop Semi Join
+           ->  Seq Scan on s1
+                 Filter: (u <> s.u)
+           ->  Index Scan using l_n on l
+                 Index Cond: (n = s1.n)
+                 Filter: (u <> s.u)
+(9 rows)
+
+select * from s where n in (select n from s1 where n in (select n from l where u != s.u) and u != s.u);
+ u | n | nn | p 
+---+---+----+---
+(0 rows)
+
+reset enable_correlated_any_transform;
+--nested correlated not in, correlated var is both one level and two levels deep
+explain (costs off) select * from s where n not in (select n from s1 where n not in (select n from l where u != s.u) and u != s.u);
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Seq Scan on s
+   Filter: (NOT (SubPlan 2))
+   SubPlan 2
+     ->  Seq Scan on s1
+           Filter: ((NOT (hashed SubPlan 1)) AND (u <> s.u))
+           SubPlan 1
+             ->  Seq Scan on l
+                   Filter: (u <> s.u)
+(8 rows)
+
+select * from s where n not in (select n from s1 where n not in (select n from l where u != s.u) and u != s.u);
+    u    |    n    |   nn    |   p    
+---------+---------+---------+--------
+       1 |       1 |       1 | foo
+       2 |       2 |       2 | foo
+ 1000002 | 1000002 | 1000002 | foofoo
+       3 |         |       3 | foo
+(4 rows)
+
+set enable_correlated_any_transform = off;
+explain (costs off) select * from s where n not in (select n from s1 where n not in (select n from l where u != s.u) and u != s.u);
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Seq Scan on s
+   Filter: (NOT (SubPlan 2))
+   SubPlan 2
+     ->  Seq Scan on s1
+           Filter: ((NOT (hashed SubPlan 1)) AND (u <> s.u))
+           SubPlan 1
+             ->  Seq Scan on l
+                   Filter: (u <> s.u)
+(8 rows)
+
+select * from s where n not in (select n from s1 where n not in (select n from l where u != s.u) and u != s.u);
+    u    |    n    |   nn    |   p    
+---------+---------+---------+--------
+       1 |       1 |       1 | foo
+       2 |       2 |       2 | foo
+ 1000002 | 1000002 | 1000002 | foofoo
+       3 |         |       3 | foo
+(4 rows)
+
+reset enable_correlated_any_transform;
+--nested correlated in and not in, correlated var is both one level and two levels deep
+explain (costs off) select * from s where n in (select n from s1 where n not in (select n from l where u != s.u) and u != s.u);
+                      QUERY PLAN                      
+------------------------------------------------------
+ Hash Semi Join
+   Hash Cond: (s.n = s1.n)
+   Join Filter: ((s1.u <> s.u) AND (NOT (SubPlan 1)))
+   ->  Seq Scan on s
+   ->  Hash
+         ->  Seq Scan on s1
+   SubPlan 1
+     ->  Seq Scan on l
+           Filter: (u <> s.u)
+(9 rows)
+
+select * from s where n in (select n from s1 where n not in (select n from l where u != s.u) and u != s.u);
+ u | n | nn | p 
+---+---+----+---
+(0 rows)
+
+set enable_correlated_any_transform = off;
+explain (costs off) select * from s where n in (select n from s1 where n not in (select n from l where u != s.u) and u != s.u);
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Seq Scan on s
+   Filter: (SubPlan 2)
+   SubPlan 2
+     ->  Seq Scan on s1
+           Filter: ((NOT (hashed SubPlan 1)) AND (u <> s.u))
+           SubPlan 1
+             ->  Seq Scan on l
+                   Filter: (u <> s.u)
+(8 rows)
+
+select * from s where n in (select n from s1 where n not in (select n from l where u != s.u) and u != s.u);
+ u | n | nn | p 
+---+---+----+---
+(0 rows)
+
+reset enable_correlated_any_transform;
+--recursive cte & correlated not in
+EXPLAIN (COSTS FALSE) WITH RECURSIVE managertree AS (
+  SELECT id, name, manager_id
+  FROM employees
+  WHERE id = 2
+  UNION ALL
+  SELECT e.id, e.name, e.manager_id
+  FROM employees e
+  INNER JOIN managertree mtree ON mtree.id = e.manager_id
+)
+SELECT *
+FROM managertree mt WHERE mt.manager_id NOT IN (SELECT id FROM managertree where managertree.name != mt.name);
+                          QUERY PLAN                           
+---------------------------------------------------------------
+ CTE Scan on managertree mt
+   Filter: (NOT (SubPlan 2))
+   CTE managertree
+     ->  Recursive Union
+           ->  Seq Scan on employees
+                 Filter: (id = 2)
+           ->  Hash Join
+                 Hash Cond: (e.manager_id = mtree.id)
+                 ->  Seq Scan on employees e
+                 ->  Hash
+                       ->  WorkTable Scan on managertree mtree
+   SubPlan 2
+     ->  CTE Scan on managertree
+           Filter: ((name)::text <> (mt.name)::text)
+(14 rows)
+
+WITH RECURSIVE managertree AS (
+  SELECT id, name, manager_id
+  FROM employees
+  WHERE id = 2
+  UNION ALL
+  SELECT e.id, e.name, e.manager_id
+  FROM employees e
+  INNER JOIN managertree mtree ON mtree.id = e.manager_id
+)
+SELECT *
+FROM managertree mt WHERE mt.manager_id NOT IN (SELECT id FROM managertree where managertree.name != mt.name);
+ id | name | manager_id 
+----+------+------------
+  2 | John |          1
+(1 row)
+
+set enable_correlated_any_transform = off;
+EXPLAIN (COSTS FALSE) WITH RECURSIVE managertree AS (
+  SELECT id, name, manager_id
+  FROM employees
+  WHERE id = 2
+  UNION ALL
+  SELECT e.id, e.name, e.manager_id
+  FROM employees e
+  INNER JOIN managertree mtree ON mtree.id = e.manager_id
+)
+SELECT *
+FROM managertree mt WHERE mt.manager_id NOT IN (SELECT id FROM managertree where managertree.name != mt.name);
+                          QUERY PLAN                           
+---------------------------------------------------------------
+ CTE Scan on managertree mt
+   Filter: (NOT (SubPlan 2))
+   CTE managertree
+     ->  Recursive Union
+           ->  Seq Scan on employees
+                 Filter: (id = 2)
+           ->  Hash Join
+                 Hash Cond: (e.manager_id = mtree.id)
+                 ->  Seq Scan on employees e
+                 ->  Hash
+                       ->  WorkTable Scan on managertree mtree
+   SubPlan 2
+     ->  CTE Scan on managertree
+           Filter: ((name)::text <> (mt.name)::text)
+(14 rows)
+
+--result should match previous result
+WITH RECURSIVE managertree AS (
+  SELECT id, name, manager_id
+  FROM employees
+  WHERE id = 2
+  UNION ALL
+  SELECT e.id, e.name, e.manager_id
+  FROM employees e
+  INNER JOIN managertree mtree ON mtree.id = e.manager_id
+)
+SELECT *
+FROM managertree mt WHERE mt.manager_id NOT IN (SELECT id FROM managertree where managertree.name != mt.name);
+ id | name | manager_id 
+----+------+------------
+  2 | John |          1
+(1 row)
+
+reset enable_correlated_any_transform;
+--correlated var in select list, disallow transform
+explain (costs off) select count(*) from s where (s.n, s.u) in (select l.n, s.nn from l where l.u != s.u);
+             QUERY PLAN             
+------------------------------------
+ Aggregate
+   ->  Seq Scan on s
+         Filter: (SubPlan 1)
+         SubPlan 1
+           ->  Seq Scan on l
+                 Filter: (u <> s.u)
+(6 rows)
+
+select count(*) from s where (s.n, s.u) in (select l.n, s.nn from l where l.u != s.u);
+ count 
+-------
+     0
+(1 row)
+
+--correlated var in select list and buried in an expression, disallow transform
+explain (costs off) select count(*) from s where s.n in (select l.n * s.nn from l where l.u != s.u);
+             QUERY PLAN             
+------------------------------------
+ Aggregate
+   ->  Seq Scan on s
+         Filter: (SubPlan 1)
+         SubPlan 1
+           ->  Seq Scan on l
+                 Filter: (u <> s.u)
+(6 rows)
+
+select count(*) from s where s.n in (select l.n * s.nn from l where l.u != s.u);
+ count 
+-------
+     2
+(1 row)
+
+--correlated var in join ... on (...), disallow transform
+explain (costs off)  select count(*) from s s1 where s1.n in (select l.n from l left join s s2 on s2.nn*s1.nn = l.nn where s1.u != l.u);
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Aggregate
+   ->  Seq Scan on s s1
+         Filter: (SubPlan 1)
+         SubPlan 1
+           ->  Hash Left Join
+                 Hash Cond: (l.nn = (s2.nn * s1.nn))
+                 ->  Seq Scan on l
+                       Filter: (s1.u <> u)
+                 ->  Hash
+                       ->  Seq Scan on s s2
+(10 rows)
+
+select count(*) from s s1 where s1.n in (select l.n from l left join s s2 on s2.nn*s1.nn = l.nn where s1.u != l.u);
+ count 
+-------
+     0
+(1 row)
+
+--correlated var in order by, disallow transform
+explain (costs off) select count(*) from s where s.n in (select l.n from l where l.u != s.u order by s.nn);
+             QUERY PLAN             
+------------------------------------
+ Aggregate
+   ->  Seq Scan on s
+         Filter: (SubPlan 1)
+         SubPlan 1
+           ->  Seq Scan on l
+                 Filter: (u <> s.u)
+(6 rows)
+
+select count(*) from s where s.n in (select l.n from l where l.u != s.u order by s.nn);
+ count 
+-------
+     0
+(1 row)
+
+explain (costs off) select count(*) from s where s.n in (select l.n from l where l.u != s.u order by s.nn - l.nn);
+                QUERY PLAN                 
+-------------------------------------------
+ Aggregate
+   ->  Seq Scan on s
+         Filter: (SubPlan 1)
+         SubPlan 1
+           ->  Sort
+                 Sort Key: ((s.nn - l.nn))
+                 ->  Seq Scan on l
+                       Filter: (u <> s.u)
+(8 rows)
+
+select count(*) from s where s.n in (select l.n from l where l.u != s.u order by s.nn - l.nn);
+ count 
+-------
+     0
+(1 row)
+
+--correlated var in group by, disallow transfer since correlated var must also be in the select list
+explain (costs off) select count(*) from s where (s.n, s.nn) in (select l.n, s.nn from l where l.u != s.u group by l.n, s.nn);
+                  QUERY PLAN                   
+-----------------------------------------------
+ Aggregate
+   ->  Seq Scan on s
+         Filter: (SubPlan 1)
+         SubPlan 1
+           ->  Group
+                 Group Key: l.n, s.nn
+                 ->  Index Scan using l_n on l
+                       Filter: (u <> s.u)
+(8 rows)
+
+select count(*) from s where (s.n, s.nn) in (select l.n, s.nn from l where l.u != s.u group by l.n, s.nn);
+ count 
+-------
+     0
+(1 row)
+
+explain (costs off) select count(*) from s where (s.n, s.nn) in (select l.n, s.nn from l where l.u != s.u group by l.n, s.nn - l.nn);
+                  QUERY PLAN                   
+-----------------------------------------------
+ Aggregate
+   ->  Seq Scan on s
+         Filter: (SubPlan 1)
+         SubPlan 1
+           ->  HashAggregate
+                 Group Key: l.n, (s.nn - l.nn)
+                 ->  Seq Scan on l
+                       Filter: (u <> s.u)
+(8 rows)
+
+select count(*) from s where (s.n, s.nn) in (select l.n, s.nn from l where l.u != s.u group by l.n, s.nn - l.nn);
+ count 
+-------
+     0
+(1 row)
+
+--correlated var in having clause, disallow transfer sinnce correlated var must also be in the select list
+explain (costs off) select count(*) from s where (s.n, s.nn) in (select l.n, s.nn from l where l.u != s.u group by l.n having s.nn > 3);
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Aggregate
+   ->  Seq Scan on s
+         Filter: (SubPlan 1)
+         SubPlan 1
+           ->  Group
+                 Group Key: l.n
+                 ->  Result
+                       One-Time Filter: (s.nn > 3)
+                       ->  Index Scan using l_n on l
+                             Filter: (u <> s.u)
+(10 rows)
+
+select count(*) from s where (s.n, s.nn) in (select l.n, s.nn from l where l.u != s.u group by l.n having s.nn > 3);
+ count 
+-------
+     0
+(1 row)
+
+-- clean up
+drop table s;
+drop table s1;
+drop table l;
+drop table empty;
+drop table employees;
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index 715842b87a..2caf87a565 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -70,28 +70,29 @@ select count(*) >= 0 as ok from pg_prepared_xacts;
 -- This is to record the prevailing planner enable_foo settings during
 -- a regression test run.
 select name, setting from pg_settings where name like 'enable%';
-              name              | setting 
---------------------------------+---------
- enable_bitmapscan              | on
- enable_gathermerge             | on
- enable_groupingsets_hash_disk  | off
- enable_hashagg                 | on
- enable_hashagg_disk            | on
- enable_hashjoin                | on
- enable_indexonlyscan           | on
- enable_indexscan               | on
- enable_material                | on
- enable_mergejoin               | on
- enable_nestloop                | on
- enable_parallel_append         | on
- enable_parallel_hash           | on
- enable_partition_pruning       | on
- enable_partitionwise_aggregate | off
- enable_partitionwise_join      | off
- enable_seqscan                 | on
- enable_sort                    | on
- enable_tidscan                 | on
-(19 rows)
+              name               | setting 
+---------------------------------+---------
+ enable_bitmapscan               | on
+ enable_correlated_any_transform | on
+ enable_gathermerge              | on
+ enable_groupingsets_hash_disk   | off
+ enable_hashagg                  | on
+ enable_hashagg_disk             | on
+ enable_hashjoin                 | on
+ enable_indexonlyscan            | on
+ enable_indexscan                | on
+ enable_material                 | on
+ enable_mergejoin                | on
+ enable_nestloop                 | on
+ enable_parallel_append          | on
+ enable_parallel_hash            | on
+ enable_partition_pruning        | on
+ enable_partitionwise_aggregate  | off
+ enable_partitionwise_join       | off
+ enable_seqscan                  | on
+ enable_sort                     | on
+ enable_tidscan                  | on
+(20 rows)
 
 -- Test that the pg_timezone_names and pg_timezone_abbrevs views are
 -- more-or-less working.  We can't test their contents in any great detail
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql
index 893d8d0f62..c0e75603d9 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -831,3 +831,333 @@ select * from (with x as (select 2 as y) select * from x) ss;
 explain (verbose, costs off)
 with x as (select * from subselect_tbl)
 select * from x for update;
+
+CREATE TABLE s (u INTEGER NOT NULL, n INTEGER NULL, nn INTEGER NOT NULL, p VARCHAR(128) NULL);
+insert into s (u, n, nn, p)
+    select
+    generate_series(1,3) as u,
+    generate_series(1,3) as n,
+    generate_series(1,3) as nn,
+    'foo' as p;
+insert into s values(1000002, 1000002, 1000002, 'foofoo');
+UPDATE s set n = NULL WHERE n = 3;
+analyze s;
+
+CREATE TABLE l (u INTEGER NOT NULL, n INTEGER NULL, nn INTEGER NOT NULL, p VARCHAR(128) NULL);
+insert into l (u, n, nn, p)
+    select
+    generate_series(1,10000 ) as u,
+    generate_series(1,10000 ) as n,
+    generate_series(1,10000 ) as nn,
+    'bar' as p;
+UPDATE l set n = NULL WHERE n = 7;
+
+CREATE TABLE s1 (u INTEGER NOT NULL, n INTEGER NULL, n1 INTEGER NULL, nn INTEGER NOT NULL, p VARCHAR(128) NULL);
+insert into s1 (u, n, n1, nn, p)
+    select
+    generate_series(1,3) as u,
+    generate_series(1,3) as n,
+    generate_series(1,3) as n1,
+    generate_series(1,3) as nn,
+    'foo' as p;
+insert into s1 values(1000003, 1000003, 1000003, 1000003, 'foofoo');
+insert into s1 values(1003, 1003, 1003, 1003, 'foofoo');
+UPDATE s1 set n = NULL WHERE n = 3;
+UPDATE s1 set n1 = NULL WHERE n = 2;
+UPDATE s1 set n1 = NULL WHERE n1 = 3;
+analyze s1;
+
+CREATE UNIQUE INDEX l_u ON l (u);
+CREATE INDEX l_n ON l (n);
+CREATE INDEX l_nn ON l (nn);
+analyze l;
+
+CREATE TABLE empty (u INTEGER NOT NULL, n INTEGER NULL, nn INTEGER NOT NULL, p VARCHAR(128) NULL);
+analyze empty;
+
+-- Recursive CTE
+CREATE TABLE employees (
+  id serial,
+  name varchar(255),
+  manager_id int
+);
+
+INSERT INTO employees VALUES (1, 'Mark', null);
+INSERT INTO employees VALUES (2, 'John', 1);
+INSERT INTO employees VALUES (3, 'Dan', 2);
+INSERT INTO employees VALUES (4, 'Clark', 1);
+INSERT INTO employees VALUES (5, 'Linda', 2);
+INSERT INTO employees VALUES (6, 'Willy', 2);
+INSERT INTO employees VALUES (7, 'Barack', 2);
+INSERT INTO employees VALUES (8, 'Elen', 2);
+INSERT INTO employees VALUES (9, 'Kate', 3);
+INSERT INTO employees VALUES (10, 'Terry', 4);
+
+WITH RECURSIVE managertree AS (
+  SELECT id, name, manager_id
+  FROM employees
+  WHERE id = 2
+  UNION ALL
+  SELECT e.id, e.name, e.manager_id
+  FROM employees e
+  INNER JOIN managertree mtree ON mtree.id = e.manager_id
+)
+SELECT *
+FROM managertree;
+
+--test corrrelated IN subquery
+explain (costs off) select count(*) from s where s.u in (select l.u from l where l.n = s.n);
+
+select count(*) from s where s.u in (select l.u from l where l.n = s.n);
+
+set enable_correlated_any_transform = off;
+
+explain (costs off) select count(*) from s where s.u in (select l.u from l where l.n = s.n);
+
+--result should match previous result
+select count(*) from s where s.u in (select l.u from l where l.n = s.n);
+
+reset enable_correlated_any_transform;
+
+explain (costs off) select count(*) from s where s.n in (select l.n from l where l.u != s.u);
+
+select count(*) from s where s.n in (select l.n from l where l.u != s.u);
+
+set enable_correlated_any_transform = off;
+
+explain (costs off) select count(*) from s where s.n in (select l.n from l where l.u != s.u);
+
+--result should match previous result
+select count(*) from s where s.n in (select l.n from l where l.u != s.u);
+
+reset enable_correlated_any_transform;
+
+explain (costs off) select count(*) from s where s.n in (select l.n from l where l.u > s.u);
+
+select count(*) from s where s.n in (select l.n from l where l.u > s.u);
+
+set enable_correlated_any_transform = off;
+
+explain (costs off) select count(*) from s where s.n in (select l.n from l where l.u > s.u);
+
+--result should match previous result
+select count(*) from s where s.n in (select l.n from l where l.u > s.u);
+
+reset enable_correlated_any_transform;
+
+--test corrrelated NOT IN subquery
+explain (costs off) select count(*) from s where s.u not in (select l.u from l where l.n = s.n);
+
+select count(*) from s where s.u not in (select l.u from l where l.n = s.n);
+
+set enable_correlated_any_transform = off;
+
+explain (costs off) select count(*) from s where s.u not in (select l.u from l where l.n = s.n);
+
+--result should match previous result
+select count(*) from s where s.u not in (select l.u from l where l.n = s.n);
+
+reset enable_correlated_any_transform;
+
+explain (costs off) select count(*) from s where s.n not in (select l.n from l where l.u != s.u);
+
+select count(*) from s where s.n not in (select l.n from l where l.u != s.u);
+
+set enable_correlated_any_transform = off;
+
+explain (costs off) select count(*) from s where s.n not in (select l.n from l where l.u != s.u);
+
+--result should match previous result
+select count(*) from s where s.n not in (select l.n from l where l.u != s.u);
+
+reset enable_correlated_any_transform;
+
+explain (costs off) select count(*) from s where s.n not in (select l.n from l where l.u > s.u);
+
+select count(*) from s where s.n not in (select l.n from l where l.u > s.u);
+
+set enable_correlated_any_transform = off;
+
+explain (costs off) select count(*) from s where s.n not in (select l.n from l where l.u > s.u);
+
+--result should match previous result
+select count(*) from s where s.n not in (select l.n from l where l.u > s.u);
+
+reset enable_correlated_any_transform;
+
+--correlated empty subquery
+explain (costs off) select count(*) from l where n not in (select n from empty where u != l.u);
+
+select count(*) from l where n not in (select n from empty where u != l.u);
+
+set enable_correlated_any_transform = off;
+
+explain (costs off) select count(*) from l where n not in (select n from empty where u != l.u);
+
+select count(*) from l where n not in (select n from empty where u != l.u);
+
+reset enable_correlated_any_transform;
+
+--nested correlated in, correlated var is two levels deep
+explain (costs off) select * from s where n in (select n from s1 where n in (select n from l where u != s.u));
+
+select * from s where n in (select n from s1 where n in (select n from l where u != s.u));
+
+set enable_correlated_any_transform = off;
+
+explain (costs off) select * from s where n in (select n from s1 where n in (select n from l where u != s.u));
+
+select * from s where n in (select n from s1 where n in (select n from l where u != s.u));
+
+reset enable_correlated_any_transform;
+
+--nested correlated in, correlated var is one level deep
+explain (costs off) select * from s where n in (select n from s1 where n in (select n from l) and u != s.u);
+
+select * from s where n in (select n from s1 where n in (select n from l) and u != s.u);
+
+set enable_correlated_any_transform = off;
+
+explain (costs off) select * from s where n in (select n from s1 where n in (select n from l) and u != s.u);
+
+select * from s where n in (select n from s1 where n in (select n from l) and u != s.u);
+
+reset enable_correlated_any_transform;
+
+--nested correlated in, correlated var is both one level and two levels deep
+explain (costs off) select * from s where n in (select n from s1 where n in (select n from l where u != s.u) and u != s.u);
+
+select * from s where n in (select n from s1 where n in (select n from l where u != s.u) and u != s.u);
+
+set enable_correlated_any_transform = off;
+
+explain (costs off) select * from s where n in (select n from s1 where n in (select n from l where u != s.u) and u != s.u);
+
+select * from s where n in (select n from s1 where n in (select n from l where u != s.u) and u != s.u);
+
+reset enable_correlated_any_transform;
+
+--nested correlated not in, correlated var is both one level and two levels deep
+explain (costs off) select * from s where n not in (select n from s1 where n not in (select n from l where u != s.u) and u != s.u);
+
+select * from s where n not in (select n from s1 where n not in (select n from l where u != s.u) and u != s.u);
+
+set enable_correlated_any_transform = off;
+
+explain (costs off) select * from s where n not in (select n from s1 where n not in (select n from l where u != s.u) and u != s.u);
+
+select * from s where n not in (select n from s1 where n not in (select n from l where u != s.u) and u != s.u);
+
+reset enable_correlated_any_transform;
+
+--nested correlated in and not in, correlated var is both one level and two levels deep
+explain (costs off) select * from s where n in (select n from s1 where n not in (select n from l where u != s.u) and u != s.u);
+
+select * from s where n in (select n from s1 where n not in (select n from l where u != s.u) and u != s.u);
+
+set enable_correlated_any_transform = off;
+
+explain (costs off) select * from s where n in (select n from s1 where n not in (select n from l where u != s.u) and u != s.u);
+
+select * from s where n in (select n from s1 where n not in (select n from l where u != s.u) and u != s.u);
+
+reset enable_correlated_any_transform;
+
+--recursive cte & correlated not in
+EXPLAIN (COSTS FALSE) WITH RECURSIVE managertree AS (
+  SELECT id, name, manager_id
+  FROM employees
+  WHERE id = 2
+  UNION ALL
+  SELECT e.id, e.name, e.manager_id
+  FROM employees e
+  INNER JOIN managertree mtree ON mtree.id = e.manager_id
+)
+SELECT *
+FROM managertree mt WHERE mt.manager_id NOT IN (SELECT id FROM managertree where managertree.name != mt.name);
+
+WITH RECURSIVE managertree AS (
+  SELECT id, name, manager_id
+  FROM employees
+  WHERE id = 2
+  UNION ALL
+  SELECT e.id, e.name, e.manager_id
+  FROM employees e
+  INNER JOIN managertree mtree ON mtree.id = e.manager_id
+)
+SELECT *
+FROM managertree mt WHERE mt.manager_id NOT IN (SELECT id FROM managertree where managertree.name != mt.name);
+
+set enable_correlated_any_transform = off;
+
+EXPLAIN (COSTS FALSE) WITH RECURSIVE managertree AS (
+  SELECT id, name, manager_id
+  FROM employees
+  WHERE id = 2
+  UNION ALL
+  SELECT e.id, e.name, e.manager_id
+  FROM employees e
+  INNER JOIN managertree mtree ON mtree.id = e.manager_id
+)
+SELECT *
+FROM managertree mt WHERE mt.manager_id NOT IN (SELECT id FROM managertree where managertree.name != mt.name);
+
+--result should match previous result
+WITH RECURSIVE managertree AS (
+  SELECT id, name, manager_id
+  FROM employees
+  WHERE id = 2
+  UNION ALL
+  SELECT e.id, e.name, e.manager_id
+  FROM employees e
+  INNER JOIN managertree mtree ON mtree.id = e.manager_id
+)
+SELECT *
+FROM managertree mt WHERE mt.manager_id NOT IN (SELECT id FROM managertree where managertree.name != mt.name);
+
+reset enable_correlated_any_transform;
+
+--correlated var in select list, disallow transform
+explain (costs off) select count(*) from s where (s.n, s.u) in (select l.n, s.nn from l where l.u != s.u);
+
+select count(*) from s where (s.n, s.u) in (select l.n, s.nn from l where l.u != s.u);
+
+--correlated var in select list and buried in an expression, disallow transform
+explain (costs off) select count(*) from s where s.n in (select l.n * s.nn from l where l.u != s.u);
+
+select count(*) from s where s.n in (select l.n * s.nn from l where l.u != s.u);
+
+--correlated var in join ... on (...), disallow transform
+explain (costs off)  select count(*) from s s1 where s1.n in (select l.n from l left join s s2 on s2.nn*s1.nn = l.nn where s1.u != l.u);
+
+select count(*) from s s1 where s1.n in (select l.n from l left join s s2 on s2.nn*s1.nn = l.nn where s1.u != l.u);
+
+--correlated var in order by, disallow transform
+explain (costs off) select count(*) from s where s.n in (select l.n from l where l.u != s.u order by s.nn);
+
+select count(*) from s where s.n in (select l.n from l where l.u != s.u order by s.nn);
+
+explain (costs off) select count(*) from s where s.n in (select l.n from l where l.u != s.u order by s.nn - l.nn);
+
+select count(*) from s where s.n in (select l.n from l where l.u != s.u order by s.nn - l.nn);
+
+--correlated var in group by, disallow transfer since correlated var must also be in the select list
+explain (costs off) select count(*) from s where (s.n, s.nn) in (select l.n, s.nn from l where l.u != s.u group by l.n, s.nn);
+
+select count(*) from s where (s.n, s.nn) in (select l.n, s.nn from l where l.u != s.u group by l.n, s.nn);
+
+explain (costs off) select count(*) from s where (s.n, s.nn) in (select l.n, s.nn from l where l.u != s.u group by l.n, s.nn - l.nn);
+
+select count(*) from s where (s.n, s.nn) in (select l.n, s.nn from l where l.u != s.u group by l.n, s.nn - l.nn);
+
+--correlated var in having clause, disallow transfer sinnce correlated var must also be in the select list
+explain (costs off) select count(*) from s where (s.n, s.nn) in (select l.n, s.nn from l where l.u != s.u group by l.n having s.nn > 3);
+
+select count(*) from s where (s.n, s.nn) in (select l.n, s.nn from l where l.u != s.u group by l.n having s.nn > 3);
+
+-- clean up
+drop table s;
+drop table s1;
+drop table l;
+drop table empty;
+drop table employees;
#2Tom Lane
tgl@sss.pgh.pa.us
In reply to: Li, Zheng (#1)
Re: Correlated IN/Any Subquery Transformation

"Li, Zheng" <zhelli@amazon.com> writes:

This patch enables correlated IN/Any subquery to be transformed to join, the transformation is allowed only when the correlated Var is in the where clause of the subquery. It covers the most common correlated cases and follows the same criteria that is followed by the correlated Exists transformation code.

It's too late to include this in v13, but please add the patch to the
next commitfest so that we remember to consider it for v14.

https://commitfest.postgresql.org

regards, tom lane