Is it worth pushing conditions to sublink/subplan?
Hi Hackers,
Recently, a issue has been bothering me, This is about conditional push-down in SQL.
I use cases from regression testing as an example.
I found that the conditions (B =1) can be pushed down into the subquery, However, it cannot be pushed down to sublink/subplan.
If a sublink/subplan clause contains a partition table, it can be useful to get the conditions for pruning.
So, is it worth pushing conditions to sublink/subplan?
Anybody have any ideas?
regards,
Wenjing
example:
create table p (a int, b int, c int) partition by list (a);
create table p1 partition of p for values in (1);
create table p2 partition of p for values in (2);
create table q (a int, b int, c int) partition by list (a);
create table q1 partition of q for values in (1) partition by list (b);
create table q11 partition of q1 for values in (1) partition by list (c);
create table q111 partition of q11 for values in (1);
create table q2 partition of q for values in (2) partition by list (b);
create table q21 partition of q2 for values in (1);
create table q22 partition of q2 for values in (2);
insert into q22 values (2, 2, 3);
postgres-# explain (costs off)
postgres-# select temp.b from
postgres-# (
postgres(# select a,b from ab x where x.a = 1
postgres(# union all
postgres(# (values(1,1))
postgres(# ) temp,
postgres-# ab y
postgres-# where y.b = temp.b and y.a = 1 and y.b=1;
QUERY PLAN
---------------------------------------------------
Nested Loop
-> Seq Scan on ab_a1_b1 y
Filter: ((b = 1) AND (a = 1))
-> Append
-> Subquery Scan on "*SELECT* 1"
-> Seq Scan on ab_a1_b1 x
Filter: ((a = 1) AND (b = 1))
-> Result
(8 rows)
The conditions (B =1) can be pushed down into the subquery.
postgres=# explain (costs off)
postgres-# select
postgres-# y.a,
postgres-# (Select x.b from ab x where y.a =x.a and y.b=x.b) as b
postgres-# from ab y where a = 1 and b = 1;
QUERY PLAN
---------------------------------------------------
Seq Scan on ab_a1_b1 y
Filter: ((a = 1) AND (b = 1))
SubPlan 1
-> Append
-> Seq Scan on ab_a1_b1 x_1
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a1_b2 x_2
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a1_b3 x_3
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b1 x_4
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b2 x_5
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b3 x_6
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b1 x_7
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b2 x_8
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b3 x_9
Filter: ((y.a = a) AND (y.b = b))
(22 rows)
The conditions (B = 1 and A = 1) cannot be pushed down to sublink/subplan in targetlist.
postgres=# explain (costs off)
postgres-# select y.a
postgres-# from ab y
postgres-# where
postgres-# (select x.a > x.b from ab x where y.a =x.a and y.b=x.b) and
postgres-# y.a = 1 and y.b = 1;
QUERY PLAN
---------------------------------------------------
Seq Scan on ab_a1_b1 y
Filter: ((a = 1) AND (b = 1) AND (SubPlan 1))
SubPlan 1
-> Append
-> Seq Scan on ab_a1_b1 x_1
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a1_b2 x_2
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a1_b3 x_3
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b1 x_4
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b2 x_5
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b3 x_6
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b1 x_7
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b2 x_8
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b3 x_9
Filter: ((y.a = a) AND (y.b = b))
(22 rows)
The conditions (B=1 and A=1) cannot be pushed down to sublink/subplan in where clause.
Attachments:
2021年8月16日 17:15,Wenjing <wenjing.zwj@alibaba-inc.com> 写道:
Hi Hackers,
Recently, a issue has been bothering me, This is about conditional push-down in SQL.
I use cases from regression testing as an example.
I found that the conditions (B =1) can be pushed down into the subquery, However, it cannot be pushed down to sublink/subplan.
If a sublink/subplan clause contains a partition table, it can be useful to get the conditions for pruning.
So, is it worth pushing conditions to sublink/subplan?
Anybody have any ideas?regards,
Wenjingexample:
create table p (a int, b int, c int) partition by list (a);
create table p1 partition of p for values in (1);
create table p2 partition of p for values in (2);
create table q (a int, b int, c int) partition by list (a);
create table q1 partition of q for values in (1) partition by list (b);
create table q11 partition of q1 for values in (1) partition by list (c);
create table q111 partition of q11 for values in (1);
create table q2 partition of q for values in (2) partition by list (b);
create table q21 partition of q2 for values in (1);
create table q22 partition of q2 for values in (2);
insert into q22 values (2, 2, 3);
Sorry, I messed up the structure of the table.
It is should be:
create table ab (a int not null, b int not null) partition by list (a);
create table ab_a2 partition of ab for values in(2) partition by list (b);
create table ab_a2_b1 partition of ab_a2 for values in (1);
create table ab_a2_b2 partition of ab_a2 for values in (2);
create table ab_a2_b3 partition of ab_a2 for values in (3);
create table ab_a1 partition of ab for values in(1) partition by list (b);
create table ab_a1_b1 partition of ab_a1 for values in (1);
create table ab_a1_b2 partition of ab_a1 for values in (2);
create table ab_a1_b3 partition of ab_a1 for values in (3);
create table ab_a3 partition of ab for values in(3) partition by list (b);
create table ab_a3_b1 partition of ab_a3 for values in (1);
create table ab_a3_b2 partition of ab_a3 for values in (2);
create table ab_a3_b3 partition of ab_a3 for values in (3);
Show quoted text
postgres-# explain (costs off)
postgres-# select temp.b from
postgres-# (
postgres(# select a,b from ab x where x.a = 1
postgres(# union all
postgres(# (values(1,1))
postgres(# ) temp,
postgres-# ab y
postgres-# where y.b = temp.b and y.a = 1 and y.b=1;
QUERY PLAN
---------------------------------------------------
Nested Loop
-> Seq Scan on ab_a1_b1 y
Filter: ((b = 1) AND (a = 1))
-> Append
-> Subquery Scan on "*SELECT* 1"
-> Seq Scan on ab_a1_b1 x
Filter: ((a = 1) AND (b = 1))
-> Result
(8 rows)The conditions (B =1) can be pushed down into the subquery.
postgres=# explain (costs off)
postgres-# select
postgres-# y.a,
postgres-# (Select x.b from ab x where y.a =x.a and y.b=x.b) as b
postgres-# from ab y where a = 1 and b = 1;
QUERY PLAN
---------------------------------------------------
Seq Scan on ab_a1_b1 y
Filter: ((a = 1) AND (b = 1))
SubPlan 1
-> Append
-> Seq Scan on ab_a1_b1 x_1
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a1_b2 x_2
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a1_b3 x_3
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b1 x_4
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b2 x_5
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b3 x_6
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b1 x_7
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b2 x_8
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b3 x_9
Filter: ((y.a = a) AND (y.b = b))
(22 rows)The conditions (B = 1 and A = 1) cannot be pushed down to sublink/subplan in targetlist.
postgres=# explain (costs off)
postgres-# select y.a
postgres-# from ab y
postgres-# where
postgres-# (select x.a > x.b from ab x where y.a =x.a and y.b=x.b) and
postgres-# y.a = 1 and y.b = 1;
QUERY PLAN
---------------------------------------------------
Seq Scan on ab_a1_b1 y
Filter: ((a = 1) AND (b = 1) AND (SubPlan 1))
SubPlan 1
-> Append
-> Seq Scan on ab_a1_b1 x_1
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a1_b2 x_2
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a1_b3 x_3
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b1 x_4
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b2 x_5
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b3 x_6
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b1 x_7
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b2 x_8
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b3 x_9
Filter: ((y.a = a) AND (y.b = b))
(22 rows)The conditions (B=1 and A=1) cannot be pushed down to sublink/subplan in where clause.
Attachments:
Indeed, this may be useful for partition pruning.
I am also curious about why this has not been achieved.
Wenjing <wenjing.zwj@alibaba-inc.com> 于2021年8月23日周一 上午10:46写道:
Show quoted text
Hi Hackers,
Recently, a issue has been bothering me, This is about conditional
push-down in SQL.
I use cases from regression testing as an example.
I found that the conditions (B =1) can be pushed down into the
subquery, However, it cannot be pushed down to sublink/subplan.
If a sublink/subplan clause contains a partition table, it can be useful
to get the conditions for pruning.
So, is it worth pushing conditions to sublink/subplan?
Anybody have any ideas?regards,
Wenjingexample:
create table p (a int, b int, c int) partition by list (a);
create table p1 partition of p for values in (1);
create table p2 partition of p for values in (2);
create table q (a int, b int, c int) partition by list (a);
create table q1 partition of q for values in (1) partition by list (b);
create table q11 partition of q1 for values in (1) partition by list (c);
create table q111 partition of q11 for values in (1);
create table q2 partition of q for values in (2) partition by list (b);
create table q21 partition of q2 for values in (1);
create table q22 partition of q2 for values in (2);
insert into q22 values (2, 2, 3);postgres-# explain (costs off)
postgres-# select temp.b from
postgres-# (
postgres(# select a,b from ab x where x.a = 1
postgres(# union all
postgres(# (values(1,1))
postgres(# ) temp,
postgres-# ab y
postgres-# where y.b = temp.b and y.a = 1 and y.b=1;
QUERY PLAN
---------------------------------------------------
Nested Loop
-> Seq Scan on ab_a1_b1 y
Filter: ((b = 1) AND (a = 1))
-> Append
-> Subquery Scan on "*SELECT* 1"
-> Seq Scan on ab_a1_b1 x
Filter: ((a = 1) AND (b = 1))
-> Result
(8 rows)The conditions (B =1) can be pushed down into the subquery.
postgres=# explain (costs off)
postgres-# select
postgres-# y.a,
postgres-# (Select x.b from ab x where y.a =x.a and y.b=x.b) as b
postgres-# from ab y where a = 1 and b = 1;
QUERY PLAN
---------------------------------------------------
Seq Scan on ab_a1_b1 y
Filter: ((a = 1) AND (b = 1))
SubPlan 1
-> Append
-> Seq Scan on ab_a1_b1 x_1
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a1_b2 x_2
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a1_b3 x_3
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b1 x_4
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b2 x_5
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b3 x_6
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b1 x_7
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b2 x_8
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b3 x_9
Filter: ((y.a = a) AND (y.b = b))
(22 rows)The conditions (B = 1 and A = 1) cannot be pushed down to sublink/subplan
in targetlist.postgres=# explain (costs off)
postgres-# select y.a
postgres-# from ab y
postgres-# where
postgres-# (select x.a > x.b from ab x where y.a =x.a and y.b=x.b) and
postgres-# y.a = 1 and y.b = 1;
QUERY PLAN
---------------------------------------------------
Seq Scan on ab_a1_b1 y
Filter: ((a = 1) AND (b = 1) AND (SubPlan 1))
SubPlan 1
-> Append
-> Seq Scan on ab_a1_b1 x_1
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a1_b2 x_2
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a1_b3 x_3
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b1 x_4
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b2 x_5
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b3 x_6
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b1 x_7
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b2 x_8
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b3 x_9
Filter: ((y.a = a) AND (y.b = b))
(22 rows)The conditions (B=1 and A=1) cannot be pushed down to sublink/subplan in
where clause.
I tested it the way you said and increased the number of sub-tables.
I created a hash partition table of 1000 sub-tables.
Test according to your first SQL, the optimizer cuts the unnecessary
sub-tables well.
You can see the plan:
postgres=# explain analyze
postgres-# select temp.p1 from
postgres-# (
postgres(# select p1,p2 from test1.test1hashtable x where x.p1 = '1'
postgres(# union all
postgres(# (values('1','1'))
postgres(# ) temp,
postgres-# test1.test1hashtable y
postgres-# where y.p2 = temp.p2 and y.p1 = '1' and y.p1='1';
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------
Nested Loop (cost=0.00..25.55 rows=1 width=32) (actual time=0.004..0.004
rows=0 loops=1)
Join Filter: (x.p2 = y.p2)
-> Seq Scan on test1hashtable826 y (cost=0.00..12.75 rows=1 width=32)
(actual time=0.002..0.002 rows=0 loops=1)
Filter: (p1 = '1'::text)
-> Append (cost=0.00..12.78 rows=2 width=64) (never executed)
-> Seq Scan on test1hashtable826 x (cost=0.00..12.75 rows=1
width=64) (never executed)
Filter: (p1 = '1'::text)
-> Result (cost=0.00..0.01 rows=1 width=64) (never executed)
Planning Time: 0.158 ms
Execution Time: 0.022 ms
(10 rows)
But when the second one runs, the planning time reaches 13.942ms.
The plan:
postgres=# explain analyze
postgres-# select
postgres-# y.p1,
postgres-# (Select x.p2 from test1.test1hashtable x where y.p1 =x.p1 and
y.p2=x.p2) as b
postgres-# from test1.test1hashtable y where p1 = '1' and p2 = '1';
QUERY PLAN
------------------------------------------------------------------------------------------------------------------
Seq Scan on test1hashtable826 y (cost=0.00..13318.30 rows=1 width=64)
(actual time=0.004..0.047 rows=0 loops=1)
Filter: ((p1 = '1'::text) AND (p2 = '1'::text))
SubPlan 1
-> Append (cost=0.00..13305.00 rows=1000 width=32) (never executed)
-> Seq Scan on test1hashtable1 x_1 (cost=0.00..13.30 rows=1
width=32) (never executed)
Filter: ((y.p1 = p1) AND (y.p2 = p2))
-> Seq Scan on test1hashtable1000 x_1000 (cost=0.00..13.30
rows=1 width=32) (never executed)
Filter: ((y.p1 = p1) AND (y.p2 = p2))
Planning Time: 13.942 ms
Execution Time: 4.899 ms
(2006 rows)
This is a very worthwhile thing to do. In a relatively large business
system, a large number of partition tables and high concurrency are often
used. If the planning time is too long, this will greatly affect the
business.
regards,
Shawn.
Wenjing <wenjing.zwj@alibaba-inc.com> 于2021年8月17日周二 上午10:31写道:
Show quoted text
2021年8月16日 17:15,Wenjing <wenjing.zwj@alibaba-inc.com> 写道:
Hi Hackers,
Recently, a issue has been bothering me, This is about conditional
push-down in SQL.
I use cases from regression testing as an example.
I found that the conditions (B =1) can be pushed down into the
subquery, However, it cannot be pushed down to sublink/subplan.
If a sublink/subplan clause contains a partition table, it can be useful
to get the conditions for pruning.
So, is it worth pushing conditions to sublink/subplan?
Anybody have any ideas?regards,
Wenjingexample:
create table p (a int, b int, c int) partition by list (a);
create table p1 partition of p for values in (1);
create table p2 partition of p for values in (2);
create table q (a int, b int, c int) partition by list (a);
create table q1 partition of q for values in (1) partition by list (b);
create table q11 partition of q1 for values in (1) partition by list (c);
create table q111 partition of q11 for values in (1);
create table q2 partition of q for values in (2) partition by list (b);
create table q21 partition of q2 for values in (1);
create table q22 partition of q2 for values in (2);
insert into q22 values (2, 2, 3);Sorry, I messed up the structure of the table.
It is should be:
create table ab (a int not null, b int not null) partition by list (a);
create table ab_a2 partition of ab for values in(2) partition by list (b);
create table ab_a2_b1 partition of ab_a2 for values in (1);
create table ab_a2_b2 partition of ab_a2 for values in (2);
create table ab_a2_b3 partition of ab_a2 for values in (3);
create table ab_a1 partition of ab for values in(1) partition by list (b);
create table ab_a1_b1 partition of ab_a1 for values in (1);
create table ab_a1_b2 partition of ab_a1 for values in (2);
create table ab_a1_b3 partition of ab_a1 for values in (3);
create table ab_a3 partition of ab for values in(3) partition by list (b);
create table ab_a3_b1 partition of ab_a3 for values in (1);
create table ab_a3_b2 partition of ab_a3 for values in (2);
create table ab_a3_b3 partition of ab_a3 for values in (3);postgres-# explain (costs off)
postgres-# select temp.b from
postgres-# (
postgres(# select a,b from ab x where x.a = 1
postgres(# union all
postgres(# (values(1,1))
postgres(# ) temp,
postgres-# ab y
postgres-# where y.b = temp.b and y.a = 1 and y.b=1;
QUERY PLAN
---------------------------------------------------
Nested Loop
-> Seq Scan on ab_a1_b1 y
Filter: ((b = 1) AND (a = 1))
-> Append
-> Subquery Scan on "*SELECT* 1"
-> Seq Scan on ab_a1_b1 x
Filter: ((a = 1) AND (b = 1))
-> Result
(8 rows)The conditions (B =1) can be pushed down into the subquery.
postgres=# explain (costs off)
postgres-# select
postgres-# y.a,
postgres-# (Select x.b from ab x where y.a =x.a and y.b=x.b) as b
postgres-# from ab y where a = 1 and b = 1;
QUERY PLAN
---------------------------------------------------
Seq Scan on ab_a1_b1 y
Filter: ((a = 1) AND (b = 1))
SubPlan 1
-> Append
-> Seq Scan on ab_a1_b1 x_1
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a1_b2 x_2
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a1_b3 x_3
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b1 x_4
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b2 x_5
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b3 x_6
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b1 x_7
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b2 x_8
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b3 x_9
Filter: ((y.a = a) AND (y.b = b))
(22 rows)The conditions (B = 1 and A = 1) cannot be pushed down to sublink/subplan
in targetlist.postgres=# explain (costs off)
postgres-# select y.a
postgres-# from ab y
postgres-# where
postgres-# (select x.a > x.b from ab x where y.a =x.a and y.b=x.b) and
postgres-# y.a = 1 and y.b = 1;
QUERY PLAN
---------------------------------------------------
Seq Scan on ab_a1_b1 y
Filter: ((a = 1) AND (b = 1) AND (SubPlan 1))
SubPlan 1
-> Append
-> Seq Scan on ab_a1_b1 x_1
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a1_b2 x_2
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a1_b3 x_3
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b1 x_4
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b2 x_5
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b3 x_6
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b1 x_7
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b2 x_8
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b3 x_9
Filter: ((y.a = a) AND (y.b = b))
(22 rows)The conditions (B=1 and A=1) cannot be pushed down to sublink/subplan in
where clause.
Hi Hackers
For my previous proposal, I developed a prototype and passed regression testing.
It works similarly to subquery's qual pushdown. We know that sublink expands
at the beginning of each level of query. At this stage, The query's conditions and
equivalence classes are not processed. But after generate_base_implied_equalities
the conditions are processed, which is why qual can push down to subquery but sublink not.
My POC implementation chose to delay the sublink expansion in the SELECT clause (targetList)
and where clause. Specifically, it is delayed after generate_base_implied_equalities. Thus,
the equivalent conditions already established in the Up level query can be easily obtained
in the sublink expansion process (make_subplan).
For example, if the up level query has a.id = 10 and the sublink query has a.id = b.id, then
we get b.id = 10 and push it down to the sublink quey. If b is a partitioned table and is
partitioned by id, then a large number of unrelated subpartitions are pruned out,
This optimizes a significant amount of Planner and SQL execution time, especially
if the partitioned table has a large number of subpartitions and is what I want.
Currently, There were two SQL failures in the regression test, because the expansion
order of sublink was changed, which did not affect the execution result of SQL.
Look forward to your suggestions on this proposal.
Thanks
Wenjing
------------------原始邮件 ------------------
发件人:shawn wang <shawn.wang.pg@gmail.com>
发送时间:Wed Sep 1 10:54:50 2021
收件人:曾文旌(义从) <wenjing.zwj@alibaba-inc.com>
抄送:PostgreSQL Hackers <pgsql-hackers@postgresql.org>, wjzeng <wjzeng2012@gmail.com>
主题:Re: Is it worth pushing conditions to sublink/subplan?
I tested it the way you said and increased the number of sub-tables.
I created a hash partition table of 1000 sub-tables.
Test according to your first SQL, the optimizer cuts the unnecessary sub-tables well.
You can see the plan:
postgres=# explain analyze
postgres-# select temp.p1 from
postgres-# (
postgres(# select p1,p2 from test1.test1hashtable x where x.p1 = '1'
postgres(# union all
postgres(# (values('1','1'))
postgres(# ) temp,
postgres-# test1.test1hashtable y
postgres-# where y.p2 = temp.p2 and y.p1 = '1' and y.p1='1';
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------
Nested Loop (cost=0.00..25.55 rows=1 width=32) (actual time=0.004..0.004 rows=0 loops=1)
Join Filter: (x.p2 = y.p2)
-> Seq Scan on test1hashtable826 y (cost=0.00..12.75 rows=1 width=32) (actual time=0.002..0.002 rows=0 loops=1)
Filter: (p1 = '1'::text)
-> Append (cost=0.00..12.78 rows=2 width=64) (never executed)
-> Seq Scan on test1hashtable826 x (cost=0.00..12.75 rows=1 width=64) (never executed)
Filter: (p1 = '1'::text)
-> Result (cost=0.00..0.01 rows=1 width=64) (never executed)
Planning Time: 0.158 ms
Execution Time: 0.022 ms
(10 rows)
But when the second one runs, the planning time reaches 13.942ms.
The plan:
postgres=# explain analyze
postgres-# select
postgres-# y.p1,
postgres-# (Select x.p2 from test1.test1hashtable x where y.p1 =x.p1 and y.p2=x.p2) as b
postgres-# from test1.test1hashtable y where p1 = '1' and p2 = '1';
QUERY PLAN
------------------------------------------------------------------------------------------------------------------
Seq Scan on test1hashtable826 y (cost=0.00..13318.30 rows=1 width=64) (actual time=0.004..0.047 rows=0 loops=1)
Filter: ((p1 = '1'::text) AND (p2 = '1'::text))
SubPlan 1
-> Append (cost=0.00..13305.00 rows=1000 width=32) (never executed)
-> Seq Scan on test1hashtable1 x_1 (cost=0.00..13.30 rows=1 width=32) (never executed)
Filter: ((y.p1 = p1) AND (y.p2 = p2))
-> Seq Scan on test1hashtable1000 x_1000 (cost=0.00..13.30 rows=1 width=32) (never executed)
Filter: ((y.p1 = p1) AND (y.p2 = p2))
Planning Time: 13.942 ms
Execution Time: 4.899 ms
(2006 rows)
This is a very worthwhile thing to do. In a relatively large business system, a large number of partition tables and high concurrency are often used. If the planning time is too long, this will greatly affect the business.
regards,
Shawn.
Wenjing <wenjing.zwj@alibaba-inc.com> 于2021年8月17日周二 上午10:31写道:
2021年8月16日 17:15,Wenjing <wenjing.zwj@alibaba-inc.com> 写道:
Hi Hackers,
Recently, a issue has been bothering me, This is about conditional push-down in SQL.
I use cases from regression testing as an example.
I found that the conditions (B =1) can be pushed down into the subquery, However, it cannot be pushed down to sublink/subplan.
If a sublink/subplan clause contains a partition table, it can be useful to get the conditions for pruning.
So, is it worth pushing conditions to sublink/subplan?
Anybody have any ideas?
regards,
Wenjing
example:
create table p (a int, b int, c int) partition by list (a);
create table p1 partition of p for values in (1);
create table p2 partition of p for values in (2);
create table q (a int, b int, c int) partition by list (a);
create table q1 partition of q for values in (1) partition by list (b);
create table q11 partition of q1 for values in (1) partition by list (c);
create table q111 partition of q11 for values in (1);
create table q2 partition of q for values in (2) partition by list (b);
create table q21 partition of q2 for values in (1);
create table q22 partition of q2 for values in (2);
insert into q22 values (2, 2, 3);
Sorry, I messed up the structure of the table.
It is should be:
create table ab (a int not null, b int not null) partition by list (a);
create table ab_a2 partition of ab for values in(2) partition by list (b);
create table ab_a2_b1 partition of ab_a2 for values in (1);
create table ab_a2_b2 partition of ab_a2 for values in (2);
create table ab_a2_b3 partition of ab_a2 for values in (3);
create table ab_a1 partition of ab for values in(1) partition by list (b);
create table ab_a1_b1 partition of ab_a1 for values in (1);
create table ab_a1_b2 partition of ab_a1 for values in (2);
create table ab_a1_b3 partition of ab_a1 for values in (3);
create table ab_a3 partition of ab for values in(3) partition by list (b);
create table ab_a3_b1 partition of ab_a3 for values in (1);
create table ab_a3_b2 partition of ab_a3 for values in (2);
create table ab_a3_b3 partition of ab_a3 for values in (3);
postgres-# explain (costs off)
postgres-# select temp.b from
postgres-# (
postgres(# select a,b from ab x where x.a = 1
postgres(# union all
postgres(# (values(1,1))
postgres(# ) temp,
postgres-# ab y
postgres-# where y.b = temp.b and y.a = 1 and y.b=1;
QUERY PLAN
---------------------------------------------------
Nested Loop
-> Seq Scan on ab_a1_b1 y
Filter: ((b = 1) AND (a = 1))
-> Append
-> Subquery Scan on "*SELECT* 1"
-> Seq Scan on ab_a1_b1 x
Filter: ((a = 1) AND (b = 1))
-> Result
(8 rows)
The conditions (B =1) can be pushed down into the subquery.
postgres=# explain (costs off)
postgres-# select
postgres-# y.a,
postgres-# (Select x.b from ab x where y.a =x.a and y.b=x.b) as b
postgres-# from ab y where a = 1 and b = 1;
QUERY PLAN
---------------------------------------------------
Seq Scan on ab_a1_b1 y
Filter: ((a = 1) AND (b = 1))
SubPlan 1
-> Append
-> Seq Scan on ab_a1_b1 x_1
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a1_b2 x_2
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a1_b3 x_3
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b1 x_4
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b2 x_5
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b3 x_6
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b1 x_7
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b2 x_8
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b3 x_9
Filter: ((y.a = a) AND (y.b = b))
(22 rows)
The conditions (B = 1 and A = 1) cannot be pushed down to sublink/subplan in targetlist.
postgres=# explain (costs off)
postgres-# select y.a
postgres-# from ab y
postgres-# where
postgres-# (select x.a > x.b from ab x where y.a =x.a and y.b=x.b) and
postgres-# y.a = 1 and y.b = 1;
QUERY PLAN
---------------------------------------------------
Seq Scan on ab_a1_b1 y
Filter: ((a = 1) AND (b = 1) AND (SubPlan 1))
SubPlan 1
-> Append
-> Seq Scan on ab_a1_b1 x_1
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a1_b2 x_2
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a1_b3 x_3
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b1 x_4
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b2 x_5
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a2_b3 x_6
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b1 x_7
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b2 x_8
Filter: ((y.a = a) AND (y.b = b))
-> Seq Scan on ab_a3_b3 x_9
Filter: ((y.a = a) AND (y.b = b))
(22 rows)
The conditions (B=1 and A=1) cannot be pushed down to sublink/subplan in where clause.
Attachments:
0001-poc-pushdown-qual-to-sublink.patchapplication/octet-streamDownload
From 1579f75d946f55a7bdbcc55e719332623bfecbad Mon Sep 17 00:00:00 2001
From: "wenjing.zwj" <wenjing.zwj@alibaba-inc.com>
Date: Thu, 15 Jul 2021 19:20:26 +0800
Subject: [PATCH] poc pushdown qual to sublink
---
src/backend/optimizer/path/allpaths.c | 53 +++++
src/backend/optimizer/path/equivclass.c | 10 +-
src/backend/optimizer/plan/initsplan.c | 198 ++++++++++++++++++
src/backend/optimizer/plan/planagg.c | 23 +-
src/backend/optimizer/plan/planmain.c | 12 +-
src/backend/optimizer/plan/planner.c | 49 +++--
src/backend/optimizer/plan/subselect.c | 267 +++++++++++++++++++++++-
src/backend/utils/adt/ruleutils.c | 3 +-
src/backend/utils/misc/guc.c | 13 ++
src/include/nodes/pathnodes.h | 4 +
src/include/optimizer/paths.h | 8 +-
src/include/optimizer/planmain.h | 12 +-
src/include/optimizer/planner.h | 1 +
src/include/optimizer/subselect.h | 4 +-
src/include/utils/guc.h | 2 +
src/include/utils/ruleutils.h | 1 +
src/test/regress/expected/join_hash.out | 6 +-
src/test/regress/expected/subselect.out | 2 +-
18 files changed, 620 insertions(+), 48 deletions(-)
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 296dd75c1b6..3ab6cb15c07 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -40,8 +40,10 @@
#include "optimizer/paths.h"
#include "optimizer/plancat.h"
#include "optimizer/planner.h"
+#include "optimizer/planmain.h"
#include "optimizer/restrictinfo.h"
#include "optimizer/tlist.h"
+#include "optimizer/subselect.h"
#include "parser/parse_clause.h"
#include "parser/parsetree.h"
#include "partitioning/partbounds.h"
@@ -3895,6 +3897,57 @@ generate_partitionwise_join_paths(PlannerInfo *root, RelOptInfo *rel)
list_free(live_children);
}
+bool
+try_push_outer_qual_to_sublink_query(PlannerInfo *parent, Query *subquery, List *conditions)
+{
+ pushdown_safety_info safetyInfo;
+ ListCell *lc1;
+ bool found = false;
+ bool query_is_pushdown_safe = false;
+
+ if (conditions == NIL)
+ return false;
+
+ memset(&safetyInfo, 0, sizeof(safetyInfo));
+ safetyInfo.unsafeColumns = (bool *)
+ palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
+
+ query_is_pushdown_safe = subquery_is_pushdown_safe(subquery, subquery, &safetyInfo);
+ pfree(safetyInfo.unsafeColumns);
+ if (!query_is_pushdown_safe)
+ return false;
+
+ foreach(lc1, conditions)
+ {
+ pushdown_expr_info *expr_info = (pushdown_expr_info *) lfirst(lc1);
+ Index levelsup = 0;
+ RelOptInfo *rel;
+ ListCell *lc2;
+ PlannerInfo *tmproot = parent;
+
+ for (levelsup = expr_info->outer->varlevelsup - 1; levelsup > 0; levelsup--)
+ tmproot = tmproot->parent_root;
+
+ expr_info->outer->varlevelsup = 0;
+ rel = find_base_rel(tmproot, expr_info->outer->varno);
+ if (rel == NULL || rel->baserestrictinfo == NULL)
+ continue;
+
+ foreach(lc2, rel->baserestrictinfo)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc2);
+
+ if (condition_is_safe_pushdown_to_sublink(rinfo, expr_info->outer))
+ {
+ /* replace restrict expr from outer var = const to inner var = const and push down to subquery */
+ sublink_query_push_qual(subquery, (Node *)copyObject(rinfo->clause), expr_info->outer, expr_info->inner);
+ found = true;
+ }
+ }
+ }
+
+ return found;
+}
/*****************************************************************************
* DEBUG SUPPORT
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 6f1abbe47d6..ae27d1cbca4 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -388,6 +388,7 @@ process_equivalence(PlannerInfo *root,
restrictinfo->security_level);
ec1->ec_max_security = Max(ec1->ec_max_security,
restrictinfo->security_level);
+ ec1->ec_processed = false;
/* mark the RI as associated with this eclass */
restrictinfo->left_ec = ec1;
restrictinfo->right_ec = ec1;
@@ -450,6 +451,7 @@ process_equivalence(PlannerInfo *root,
ec->ec_min_security = restrictinfo->security_level;
ec->ec_max_security = restrictinfo->security_level;
ec->ec_merged = NULL;
+ ec->ec_processed = false;
em1 = add_eq_member(ec, item1, item1_relids, item1_nullable_relids,
false, item1_type);
em2 = add_eq_member(ec, item2, item2_relids, item2_nullable_relids,
@@ -574,6 +576,7 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids,
ec->ec_relids = bms_add_members(ec->ec_relids, relids);
}
ec->ec_members = lappend(ec->ec_members, em);
+ ec->ec_processed = false;
return em;
}
@@ -711,6 +714,7 @@ get_eclass_for_sort_expr(PlannerInfo *root,
newec->ec_min_security = UINT_MAX;
newec->ec_max_security = 0;
newec->ec_merged = NULL;
+ newec->ec_processed = false;
if (newec->ec_has_volatile && sortref == 0) /* should not happen */
elog(ERROR, "volatile EquivalenceClass has no sortref");
@@ -1116,11 +1120,15 @@ generate_base_implied_equalities(PlannerInfo *root)
*/
if (list_length(ec->ec_members) > 1)
{
- if (ec->ec_has_const)
+ if (ec->ec_processed)
+ ;
+ else if (ec->ec_has_const)
generate_base_implied_equalities_const(root, ec);
else
generate_base_implied_equalities_no_const(root, ec);
+ ec->ec_processed = true;
+
/* Recover if we failed to generate required derived clauses */
if (ec->ec_broken)
generate_base_implied_equalities_broken(root, ec);
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index f6a202d900f..371874f0246 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -30,10 +30,12 @@
#include "optimizer/planner.h"
#include "optimizer/prep.h"
#include "optimizer/restrictinfo.h"
+#include "optimizer/subselect.h"
#include "parser/analyze.h"
#include "rewrite/rewriteManip.h"
#include "utils/lsyscache.h"
#include "utils/typcache.h"
+#include "utils/guc.h"
/* These parameters are set by GUC */
int from_collapse_limit;
@@ -80,6 +82,16 @@ static void check_mergejoinable(RestrictInfo *restrictinfo);
static void check_hashjoinable(RestrictInfo *restrictinfo);
static void check_memoizable(RestrictInfo *restrictinfo);
+static void remember_qual_info_for_lazy_process_sublink(PlannerInfo *root,
+ Node *clause,
+ bool below_outer_join,
+ JoinType jointype,
+ Index security_level,
+ Relids qualscope,
+ Relids ojscope,
+ Relids outerjoin_nonnullable,
+ List *postponed_qual_list);
+static void *search_sublink_from_lazy_process_list(PlannerInfo *root, Node *node);
/*****************************************************************************
*
@@ -1621,6 +1633,17 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
Relids nullable_relids;
RestrictInfo *restrictinfo;
+ /* Before lazy transform sublink has not been converted, so backup it */
+ if (checkExprHasSubLink(clause))
+ {
+ remember_qual_info_for_lazy_process_sublink(root, clause, below_outer_join, jointype, security_level,
+ qualscope, ojscope, outerjoin_nonnullable, *postponed_qual_list);
+
+ relids = pull_varnos(root, clause);
+ Assert(bms_is_subset(relids, qualscope));
+ return;
+ }
+
/*
* Retrieve all relids mentioned within the clause.
*/
@@ -2750,3 +2773,178 @@ check_memoizable(RestrictInfo *restrictinfo)
if (OidIsValid(typentry->hash_proc) && OidIsValid(typentry->eq_opr))
restrictinfo->right_hasheqoperator = typentry->eq_opr;
}
+
+bool
+query_has_sublink_try_pushdown_qual(PlannerInfo *root)
+{
+ Query *parse = root->parse;
+
+ if (!parse->hasSubLinks)
+ return false;
+
+ if (parse->commandType != CMD_SELECT ||
+ parse->hasWindowFuncs ||
+ parse->hasTargetSRFs ||
+ parse->hasRecursive ||
+ parse->hasModifyingCTE ||
+ parse->hasForUpdate ||
+ parse->hasRowSecurity ||
+ parse->setOperations ||
+ parse->havingQual ||
+ parse->cteList != NIL)
+ return false;
+
+ if (condition_push_down)
+ return true;
+
+ return false;
+}
+
+void
+lazy_process_sublinks(PlannerInfo *root, bool single_result_rte)
+{
+ Query *parse = root->parse;
+ List *tlist_vars;
+
+ if (!has_unexpand_sublink(root))
+ return;
+
+ /* process sublink in targetlist */
+ root->processed_tlist = (List *)SS_process_sublinks(root, (Node *)root->processed_tlist, false, true, true);
+
+ /* process sublink in where clause */
+ if (parse->jointree && parse->jointree->quals)
+ {
+ FromExpr *f = parse->jointree;
+ List *newquals = NIL;
+ ListCell *l;
+
+ Assert(IsA(f->quals, List));
+ foreach(l, (List *) f->quals)
+ {
+ Node *qual = (Node *) lfirst(l);
+
+ if (checkExprHasSubLink(qual))
+ {
+ qual = lazy_process_sublink_qual(root, qual);
+ newquals = lappend(newquals, qual);
+ }
+ else
+ newquals = lappend(newquals, qual);
+ }
+
+ Assert(list_length((List *)f->quals) == list_length(newquals));
+ f->quals = (Node *)newquals;
+ }
+
+ /* process agg functions */
+ if(parse->hasAggs)
+ {
+ preprocess_aggrefs(root, (Node *) root->processed_tlist);
+ preprocess_minmax_aggregates(root, true);
+ }
+
+ /* empty from clause no need prcess targetlist or from clause */
+ if (!single_result_rte)
+ {
+ /* Put the mutated sublink info into the targetList */
+ tlist_vars = pull_var_clause((Node *) root->processed_tlist,
+ PVC_RECURSE_AGGREGATES |
+ PVC_RECURSE_WINDOWFUNCS |
+ PVC_INCLUDE_PLACEHOLDERS);
+
+ if (tlist_vars != NIL)
+ {
+ add_vars_to_targetlist(root, tlist_vars, bms_make_singleton(0), true);
+ list_free(tlist_vars);
+ }
+
+ generate_base_implied_equalities(root);
+ }
+
+ if (has_unexpand_sublink(root))
+ elog(ERROR, "sublink is not fully expanded yet");
+
+ return;
+}
+
+typedef struct sublink_node
+{
+ Node *expr;
+ bool below_outer_join;
+ JoinType jointype;
+ Index security_level;
+ Relids qualscope;
+ Relids ojscope;
+ Relids outerjoin_nonnullable;
+ List *postponed_qual_list;
+} sublink_node;
+
+static void
+remember_qual_info_for_lazy_process_sublink(PlannerInfo *root,
+ Node *clause,
+ bool below_outer_join,
+ JoinType jointype,
+ Index security_level,
+ Relids qualscope,
+ Relids ojscope,
+ Relids outerjoin_nonnullable,
+ List *postponed_qual_list)
+{
+ sublink_node *sublink_info = palloc0(sizeof(sublink_node));
+
+ sublink_info->expr= copyObject(clause);
+ sublink_info->below_outer_join = below_outer_join;
+ sublink_info->jointype = jointype;
+ sublink_info->security_level = security_level;
+ sublink_info->qualscope = bms_copy(qualscope);
+ sublink_info->ojscope = bms_copy(ojscope);
+ sublink_info->outerjoin_nonnullable = bms_copy(outerjoin_nonnullable);
+ sublink_info->postponed_qual_list = list_copy_deep(postponed_qual_list);
+
+ root->unexpand_sublink_expr_list = lappend(root->unexpand_sublink_expr_list, sublink_info);
+
+ return;
+}
+
+Node *
+lazy_process_sublink_qual(PlannerInfo *root, Node *node)
+{
+ Node *qual = NULL;
+ sublink_node *sublink_info = NULL;
+
+ qual = SS_process_sublinks(root, node, true, true, true);
+ sublink_info = (sublink_node *)search_sublink_from_lazy_process_list(root, node);
+ if (sublink_info)
+ {
+ List *postponed_qual_list = NIL;
+ distribute_qual_to_rels(root, qual, sublink_info->below_outer_join, sublink_info->jointype, sublink_info->security_level,
+ sublink_info->qualscope, sublink_info->ojscope, sublink_info->outerjoin_nonnullable,
+ &postponed_qual_list);
+
+ Assert(postponed_qual_list == NIL);
+ root->unexpand_sublink_expr_list = list_delete(root->unexpand_sublink_expr_list, sublink_info);
+ }
+
+ return qual;
+}
+
+static void *
+search_sublink_from_lazy_process_list(PlannerInfo *root, Node *node)
+{
+ ListCell *lc = NULL;
+ sublink_node *sublink_info = NULL;
+
+ foreach(lc, root->unexpand_sublink_expr_list)
+ {
+ sublink_node *tmp = lfirst(lc);
+ Assert(tmp->expr);
+ if (equal(tmp->expr, node))
+ {
+ sublink_info = tmp;
+ break;
+ }
+ }
+
+ return (void *)sublink_info;
+}
diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c
index c1634d16669..03088648919 100644
--- a/src/backend/optimizer/plan/planagg.c
+++ b/src/backend/optimizer/plan/planagg.c
@@ -49,7 +49,7 @@
static bool can_minmax_aggs(PlannerInfo *root, List **context);
static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
- Oid eqop, Oid sortop, bool nulls_first);
+ Oid eqop, Oid sortop, bool nulls_first, bool lazy_process_sublink);
static void minmax_qp_callback(PlannerInfo *root, void *extra);
static Oid fetch_agg_sort_op(Oid aggfnoid);
@@ -70,7 +70,7 @@ static Oid fetch_agg_sort_op(Oid aggfnoid);
* root->agginfos, so preprocess_aggrefs() must have been called already, too.
*/
void
-preprocess_minmax_aggregates(PlannerInfo *root)
+preprocess_minmax_aggregates(PlannerInfo *root, bool lazy_process_sublink)
{
Query *parse = root->parse;
FromExpr *jtnode;
@@ -173,9 +173,9 @@ preprocess_minmax_aggregates(PlannerInfo *root)
* FIRST is more likely to be available if the operator is a
* reverse-sort operator, so try that first if reverse.
*/
- if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse))
+ if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse, lazy_process_sublink))
continue;
- if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, !reverse))
+ if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, !reverse, lazy_process_sublink))
continue;
/* No indexable path for this aggregate, so fail */
@@ -315,7 +315,7 @@ can_minmax_aggs(PlannerInfo *root, List **context)
*/
static bool
build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
- Oid eqop, Oid sortop, bool nulls_first)
+ Oid eqop, Oid sortop, bool nulls_first, bool lazy_process_sublink)
{
PlannerInfo *subroot;
Query *parse;
@@ -352,12 +352,23 @@ build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
/* append_rel_list might contain outer Vars? */
subroot->append_rel_list = copyObject(root->append_rel_list);
IncrementVarSublevelsUp((Node *) subroot->append_rel_list, 1, 1);
+
+ if (lazy_process_sublink)
+ {
+ /* under lazy process sublink, parent root may have some data that child not need, so set it to NULL */
+ subroot->join_info_list = NIL;
+ subroot->eq_classes = NIL;
+ subroot->placeholder_list = NIL;
+ }
+ else
+ {
/* There shouldn't be any OJ info to translate, as yet */
Assert(subroot->join_info_list == NIL);
/* and we haven't made equivalence classes, either */
Assert(subroot->eq_classes == NIL);
/* and we haven't created PlaceHolderInfos, either */
Assert(subroot->placeholder_list == NIL);
+ }
/*----------
* Generate modified query of the form
@@ -418,7 +429,7 @@ build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
subroot->tuple_fraction = 1.0;
subroot->limit_tuples = 1.0;
- final_rel = query_planner(subroot, minmax_qp_callback, NULL);
+ final_rel = query_planner(subroot, minmax_qp_callback, NULL, NULL);
/*
* Since we didn't go through subquery_planner() to handle the subquery,
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index 273ac0acf7e..bd526f8785d 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -53,7 +53,8 @@
*/
RelOptInfo *
query_planner(PlannerInfo *root,
- query_pathkeys_callback qp_callback, void *qp_extra)
+ query_pathkeys_callback qp_callback, void *qp_extra,
+ lazy_process_sublinks_callback lps_callback)
{
Query *parse = root->parse;
List *joinlist;
@@ -78,6 +79,9 @@ query_planner(PlannerInfo *root,
root->fkey_list = NIL;
root->initial_rels = NIL;
+ if (has_unexpand_sublink(root) && lps_callback == NULL)
+ elog(ERROR, "lazy_process_sublinks_callback must be set for expanded sublink");
+
/*
* Set up arrays for accessing base relations and AppendRelInfos.
*/
@@ -102,6 +106,9 @@ query_planner(PlannerInfo *root,
Assert(rte != NULL);
if (rte->rtekind == RTE_RESULT)
{
+ if (lps_callback)
+ (*lps_callback)(root, true);
+
/* Make the RelOptInfo for it directly */
final_rel = build_simple_rel(root, varno, NULL);
@@ -197,6 +204,9 @@ query_planner(PlannerInfo *root,
*/
generate_base_implied_equalities(root);
+ if (lps_callback)
+ (*lps_callback)(root, false);
+
/*
* We have completed merging equivalence sets, so it's now possible to
* generate pathkeys in canonical form; so compute query_pathkeys and
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index bd01ec0526f..21795deb012 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -64,6 +64,7 @@
#include "utils/rel.h"
#include "utils/selfuncs.h"
#include "utils/syscache.h"
+#include "utils/guc.h"
/* GUC parameters */
double cursor_tuple_fraction = DEFAULT_CURSOR_TUPLE_FRACTION;
@@ -128,8 +129,8 @@ typedef struct
/* Local functions */
static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
-static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
static void grouping_planner(PlannerInfo *root, double tuple_fraction);
+static Node *preprocess_expression_ext(PlannerInfo *root, Node *expr, int kind, bool process_sublink);
static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root);
static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
int *tleref_to_colnum_map);
@@ -641,6 +642,8 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
root->wt_param_id = -1;
root->non_recursive_path = NULL;
root->partColsUpdated = false;
+ root->unexpand_sublink_counter = 0;
+ root->unexpand_sublink_expr_list = NIL;
/*
* If there is a WITH list, process each WITH query and either convert it
@@ -784,8 +787,8 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
* part of the targetlist.
*/
parse->targetList = (List *)
- preprocess_expression(root, (Node *) parse->targetList,
- EXPRKIND_TARGET);
+ preprocess_expression_ext(root, (Node *) parse->targetList,
+ EXPRKIND_TARGET, false);
/* Constant-folding might have removed all set-returning functions */
if (parse->hasTargetSRFs)
@@ -807,7 +810,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
preprocess_expression(root, (Node *) parse->returningList,
EXPRKIND_TARGET);
- preprocess_qual_conditions(root, (Node *) parse->jointree);
+ preprocess_qual_conditions(root, (Node *) parse->jointree, true);
parse->havingQual = preprocess_expression(root, parse->havingQual,
EXPRKIND_QUAL);
@@ -1049,6 +1052,12 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
return root;
}
+static Node *
+preprocess_expression(PlannerInfo *root, Node *expr, int kind)
+{
+ return preprocess_expression_ext(root, expr, kind, true);
+}
+
/*
* preprocess_expression
* Do subquery_planner's preprocessing work for an expression,
@@ -1056,7 +1065,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
* conditions), a HAVING clause, or a few other things.
*/
static Node *
-preprocess_expression(PlannerInfo *root, Node *expr, int kind)
+preprocess_expression_ext(PlannerInfo *root, Node *expr, int kind, bool process_sublink)
{
/*
* Fall out quickly if expression is empty. This occurs often enough to
@@ -1129,7 +1138,7 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind)
/* Expand SubLinks to SubPlans */
if (root->parse->hasSubLinks)
- expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
+ expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL), false, process_sublink);
/*
* XXX do not insert anything here unless you have grokked the comments in
@@ -1157,8 +1166,8 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind)
* Recursively scan the query's jointree and do subquery_planner's
* preprocessing work on each qual condition found therein.
*/
-static void
-preprocess_qual_conditions(PlannerInfo *root, Node *jtnode)
+void
+preprocess_qual_conditions(PlannerInfo *root, Node *jtnode, bool istop)
{
if (jtnode == NULL)
return;
@@ -1172,17 +1181,19 @@ preprocess_qual_conditions(PlannerInfo *root, Node *jtnode)
ListCell *l;
foreach(l, f->fromlist)
- preprocess_qual_conditions(root, lfirst(l));
+ preprocess_qual_conditions(root, lfirst(l), false);
- f->quals = preprocess_expression(root, f->quals, EXPRKIND_QUAL);
+ if (istop)
+ f->quals = preprocess_expression_ext(root, f->quals, EXPRKIND_QUAL, false);
+ else
+ f->quals = preprocess_expression(root, f->quals, EXPRKIND_QUAL);
}
else if (IsA(jtnode, JoinExpr))
{
JoinExpr *j = (JoinExpr *) jtnode;
- preprocess_qual_conditions(root, j->larg);
- preprocess_qual_conditions(root, j->rarg);
-
+ preprocess_qual_conditions(root, j->larg, false);
+ preprocess_qual_conditions(root, j->rarg, false);
j->quals = preprocess_expression(root, j->quals, EXPRKIND_QUAL);
}
else
@@ -1384,11 +1395,11 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
* pathtargets, else some copies of the Aggref nodes might escape
* being marked.
*/
- if (parse->hasAggs)
- {
+ if (parse->hasAggs && !has_unexpand_sublink(root))
preprocess_aggrefs(root, (Node *) root->processed_tlist);
+
+ if (parse->hasAggs)
preprocess_aggrefs(root, (Node *) parse->havingQual);
- }
/*
* Locate any window functions in the tlist. (We don't need to look
@@ -1412,8 +1423,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
* that is needed in MIN/MAX-optimizable cases will have to be
* duplicated in planagg.c.
*/
- if (parse->hasAggs)
- preprocess_minmax_aggregates(root);
+ if (parse->hasAggs && !has_unexpand_sublink(root))
+ preprocess_minmax_aggregates(root, false);
/*
* Figure out whether there's a hard limit on the number of rows that
@@ -1445,7 +1456,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
* We also generate (in standard_qp_callback) pathkey representations
* of the query's sort clause, distinct clause, etc.
*/
- current_rel = query_planner(root, standard_qp_callback, &qp_extra);
+ current_rel = query_planner(root, standard_qp_callback, &qp_extra, lazy_process_sublinks);
/*
* Convert the query's result tlist into PathTarget format.
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index c9f7a09d102..fbf62d50ae6 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -32,11 +32,13 @@
#include "optimizer/planner.h"
#include "optimizer/prep.h"
#include "optimizer/subselect.h"
+#include "optimizer/paths.h"
#include "parser/parse_relation.h"
#include "rewrite/rewriteManip.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
+#include "utils/ruleutils.h"
typedef struct convert_testexpr_context
@@ -49,6 +51,8 @@ typedef struct process_sublinks_context
{
PlannerInfo *root;
bool isTopQual;
+ bool lazy_process;
+ bool force_process;
} process_sublinks_context;
typedef struct finalize_primnode_context
@@ -65,6 +69,13 @@ typedef struct inline_cte_walker_context
Query *ctequery; /* query to substitute */
} inline_cte_walker_context;
+typedef struct equal_expr_info_context
+{
+ bool has_unexpected_expr;
+ bool has_const;
+ Var *outer_var;
+ Var *inner_var;
+} equal_expr_info_context;
static Node *build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
List *plan_params,
@@ -105,6 +116,11 @@ static Bitmapset *finalize_plan(PlannerInfo *root,
static bool finalize_primnode(Node *node, finalize_primnode_context *context);
static bool finalize_agg_primnode(Node *node, finalize_primnode_context *context);
+static Node *replace_vars_mutator(Node *node, void *context);
+static List *find_equal_conditions_contain_uplevelvar_in_sublink_query(Query *orig_subquery);
+static bool equal_expr_analyze_walker(Node *node, void *context);
+static bool equal_expr_safety_check(Node *node, equal_expr_info_context *context);
+
/*
* Get the datatype/typmod/collation of the first column of the plan's output.
@@ -162,7 +178,7 @@ get_first_col_type(Plan *plan, Oid *coltype, int32 *coltypmod,
static Node *
make_subplan(PlannerInfo *root, Query *orig_subquery,
SubLinkType subLinkType, int subLinkId,
- Node *testexpr, bool isTopQual)
+ Node *testexpr, bool isTopQual, bool lazy_process)
{
Query *subquery;
bool simple_exists = false;
@@ -173,6 +189,8 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
Plan *plan;
List *plan_params;
Node *result;
+ Query *optimized_subquery = NULL;
+ Query *optimized_subquery_copy = NULL;
/*
* Copy the source Query node. This is a quick and dirty kluge to resolve
@@ -218,8 +236,32 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
/* plan_params should not be in use in current query level */
Assert(root->plan_params == NIL);
+ if (lazy_process)
+ {
+ List *conditions = NIL;
+ Query *subquery_copy = copyObject(orig_subquery);
+
+ /*
+ * Search sublink query.
+ * If the query contains an outer condition equivalent expression,
+ * this means that there may be external conditions that can be pushed down to optimize the subquery.
+ */
+ conditions = find_equal_conditions_contain_uplevelvar_in_sublink_query(subquery_copy);
+ if (conditions)
+ {
+ /* Search outer queries, and if relevant equivalent expressions are found, push them down into subqueries. */
+ if (try_push_outer_qual_to_sublink_query(root, subquery_copy, conditions))
+ {
+ optimized_subquery = subquery_copy;
+ optimized_subquery_copy = copyObject(optimized_subquery);
+ }
+ list_free(conditions);
+ }
+ }
+
/* Generate Paths for the subquery */
- subroot = subquery_planner(root->glob, subquery,
+ subroot = subquery_planner(root->glob,
+ (optimized_subquery != NULL) ? optimized_subquery : subquery,
root,
false, tuple_fraction);
@@ -256,7 +298,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
List *paramIds;
/* Make a second copy of the original subquery */
- subquery = copyObject(orig_subquery);
+ subquery = copyObject((optimized_subquery_copy != NULL) ? optimized_subquery_copy : orig_subquery);
/* and re-simplify */
simple_exists = simplify_EXISTS_query(root, subquery);
Assert(simple_exists);
@@ -365,7 +407,7 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
*/
if (IsA(arg, PlaceHolderVar) ||
IsA(arg, Aggref))
- arg = SS_process_sublinks(root, arg, false);
+ arg = SS_process_sublinks(root, arg, false, false, true);
splan->parParam = lappend_int(splan->parParam, pitem->paramId);
splan->args = lappend(splan->args, arg);
@@ -1915,12 +1957,14 @@ replace_correlation_vars_mutator(Node *node, PlannerInfo *root)
* not distinguish FALSE from UNKNOWN return values.
*/
Node *
-SS_process_sublinks(PlannerInfo *root, Node *expr, bool isQual)
+SS_process_sublinks(PlannerInfo *root, Node *expr, bool isQual, bool lazy_process, bool force_process)
{
process_sublinks_context context;
context.root = root;
context.isTopQual = isQual;
+ context.lazy_process = lazy_process;
+ context.force_process = force_process;
return process_sublinks_mutator(expr, &context);
}
@@ -1930,20 +1974,34 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context)
process_sublinks_context locContext;
locContext.root = context->root;
+ locContext.lazy_process = context->lazy_process;
+ locContext.force_process = context->force_process;
if (node == NULL)
return NULL;
if (IsA(node, SubLink))
{
SubLink *sublink = (SubLink *) node;
- Node *testexpr;
/*
* First, recursively process the lefthand-side expressions, if any.
* They're not top-level anymore.
*/
locContext.isTopQual = false;
- testexpr = process_sublinks_mutator(sublink->testexpr, &locContext);
+ locContext.lazy_process = context->lazy_process;
+ locContext.force_process = context->force_process;
+ sublink->testexpr = process_sublinks_mutator(sublink->testexpr, &locContext);
+
+ if (!context->force_process &&
+ query_has_sublink_try_pushdown_qual(context->root))
+ {
+ Assert(context->lazy_process == false);
+ context->root->unexpand_sublink_counter++;
+ return node;
+ }
+
+ if (context->lazy_process)
+ context->root->unexpand_sublink_counter--;
/*
* Now build the SubPlan node and make the expr to return.
@@ -1952,8 +2010,8 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context)
(Query *) sublink->subselect,
sublink->subLinkType,
sublink->subLinkId,
- testexpr,
- context->isTopQual);
+ sublink->testexpr,
+ context->isTopQual, locContext.lazy_process);
}
/*
@@ -1978,8 +2036,8 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context)
* the very routine that creates 'em to begin with). We shouldn't find
* ourselves invoked directly on a Query, either.
*/
- Assert(!IsA(node, SubPlan));
- Assert(!IsA(node, AlternativeSubPlan));
+ Assert(!IsA(node, SubPlan) || context->lazy_process);
+ Assert(!IsA(node, AlternativeSubPlan) || context->lazy_process);
Assert(!IsA(node, Query));
/*
@@ -2003,6 +2061,8 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context)
/* Still at qual top-level */
locContext.isTopQual = context->isTopQual;
+ locContext.lazy_process = context->lazy_process;
+ locContext.force_process = context->force_process;
foreach(l, ((BoolExpr *) node)->args)
{
@@ -2024,6 +2084,8 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context)
/* Still at qual top-level */
locContext.isTopQual = context->isTopQual;
+ locContext.lazy_process = context->lazy_process;
+ locContext.force_process = context->force_process;
foreach(l, ((BoolExpr *) node)->args)
{
@@ -2989,3 +3051,186 @@ SS_make_initplan_from_plan(PlannerInfo *root,
/* Set costs of SubPlan using info from the plan tree */
cost_subplan(subroot, node, plan);
}
+
+void
+sublink_query_push_qual(Query *subquery, Node *qual, Var *outer, Var *inner)
+{
+ pushdown_expr_info context;
+ Node *new_qual;
+
+ context.outer = outer;
+ context.inner = inner;
+
+ new_qual = expression_tree_mutator(qual, replace_vars_mutator, (void *)&context);
+ subquery->jointree->quals = make_and_qual(subquery->jointree->quals, new_qual);
+
+ return;
+}
+
+static Node *
+replace_vars_mutator(Node *node, void *context)
+{
+ pushdown_expr_info *info = (pushdown_expr_info *) context;
+
+ if (IsA(node, Var) && equal(node, (Node *)info->outer))
+ {
+ node = copyObject((Node *)info->inner);
+ return node;
+ }
+
+ return expression_tree_mutator(node, replace_vars_mutator, context);
+}
+
+/* condition has to be (var = const value) */
+bool
+condition_is_safe_pushdown_to_sublink(RestrictInfo *rinfo, Var *var)
+{
+ Node *clause = (Node *) rinfo->clause;
+ Var *cvar = NULL;
+ equal_expr_info_context context;
+
+ if (clause == NULL)
+ return false;
+
+ if (rinfo->pseudoconstant)
+ return false;
+
+ if (contain_leaked_vars(clause))
+ return false;
+
+ memset(&context, 0, sizeof(equal_expr_info_context));
+ if (!equal_expr_safety_check(clause, &context))
+ return false;
+
+ /* It needs to be something like inner var = const */
+ if (context.inner_var &&
+ context.outer_var == NULL &&
+ context.has_unexpected_expr == false &&
+ context.has_const)
+ cvar = context.inner_var;
+
+ /* restrict contains the same table and the same column and varattno is not a system columns */
+ if (cvar && cvar->varattno > 0 && equal(cvar, var))
+ return true;
+
+ return false;
+}
+
+static List *
+find_equal_conditions_contain_uplevelvar_in_sublink_query(Query *orig_subquery)
+{
+ Node *quals;
+ ListCell *lc;
+ List *conditions = NIL;
+
+ if (orig_subquery->jointree == NULL ||
+ orig_subquery->jointree->quals == NULL)
+ return NIL;
+
+ quals = copyObject(orig_subquery->jointree->quals);
+ quals = (Node *) canonicalize_qual((Expr *) quals, false);
+ quals = (Node *) make_ands_implicit((Expr *) quals);
+
+ Assert(IsA(quals, List));
+ foreach(lc, (List *)quals)
+ {
+ Node *node = (Node *) lfirst(lc);
+ equal_expr_info_context context;
+ pushdown_expr_info *expr_info = NULL;
+
+ memset(&context, 0, sizeof(equal_expr_info_context));
+ if (equal_expr_safety_check(node, &context))
+ {
+ /* It needs to be something like outer var = inner var */
+ if (context.inner_var &&
+ context.outer_var &&
+ context.has_unexpected_expr == false &&
+ context.has_const == false)
+ {
+ expr_info = palloc0(sizeof(pushdown_expr_info));
+ expr_info->inner = context.inner_var;
+ expr_info->outer = context.outer_var;
+ conditions = lappend(conditions, expr_info);
+ }
+ }
+ }
+
+ return conditions;
+}
+
+static bool
+equal_expr_safety_check(Node *node, equal_expr_info_context *context)
+{
+ const char *op;
+
+ if (!IsA(node, OpExpr))
+ return false;
+
+ op = get_simple_binary_op_name((OpExpr *) node);
+ if (op == NULL || strcmp(op, "=") != 0)
+ return false;
+
+ if (contain_volatile_functions(node) ||
+ contain_mutable_functions(node) ||
+ contain_nonstrict_functions(node))
+ return false;
+
+ equal_expr_analyze_walker(node, context);
+
+ return true;
+}
+
+static bool
+equal_expr_analyze_walker(Node *node, void *context)
+{
+ equal_expr_info_context *info = (equal_expr_info_context *)context;
+
+ if (node == NULL)
+ return false;
+
+ switch (nodeTag(node))
+ {
+ case T_Var:
+ {
+ if (((Var *) node)->varlevelsup > 0)
+ {
+ if (info->outer_var)
+ info->has_unexpected_expr = true;
+ else
+ info->outer_var = (Var *)copyObject(node);
+
+ return info->has_unexpected_expr;
+ }
+ else
+ {
+ if (info->inner_var)
+ info->has_unexpected_expr = true;
+ else
+ info->inner_var = (Var *)copyObject(node);
+
+ return info->has_unexpected_expr;
+ }
+ }
+ break;
+
+ case T_Const:
+ {
+ info->has_const = true;
+ return false;
+ }
+ break;
+
+ case T_Param:
+ case T_FuncExpr:
+ {
+ info->has_unexpected_expr = true;
+ return true;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return expression_tree_walker(node, equal_expr_analyze_walker, context);
+}
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 6b4022c3bcc..ed6bae79dd8 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -429,7 +429,6 @@ static void resolve_special_varno(Node *node, deparse_context *context,
static Node *find_param_referent(Param *param, deparse_context *context,
deparse_namespace **dpns_p, ListCell **ancestor_cell_p);
static void get_parameter(Param *param, deparse_context *context);
-static const char *get_simple_binary_op_name(OpExpr *expr);
static bool isSimpleNode(Node *node, Node *parentNode, int prettyFlags);
static void appendContextKeyword(deparse_context *context, const char *str,
int indentBefore, int indentAfter, int indentPlus);
@@ -7973,7 +7972,7 @@ get_parameter(Param *param, deparse_context *context)
* helper function for isSimpleNode
* will return single char binary operator name, or NULL if it's not
*/
-static const char *
+const char *
get_simple_binary_op_name(OpExpr *expr)
{
List *args = expr->args;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index ee6a838b3af..b90fdef3d1d 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -684,6 +684,8 @@ static char *recovery_target_lsn_string;
/* should be static, but commands/variable.c needs to get at this */
char *role_string;
+bool condition_push_down = true;
+
/*
* Displayable names for context types (enum GucContext)
@@ -973,6 +975,17 @@ static const unit_conversion time_unit_conversion_table[] =
static struct config_bool ConfigureNamesBool[] =
{
+ {
+ {"condition_push_down", PGC_USERSET, QUERY_TUNING_METHOD,
+ gettext_noop("condition_push_down."),
+ NULL,
+ GUC_EXPLAIN
+ },
+ &condition_push_down,
+ true,
+ NULL, NULL, NULL
+ },
+
{
{"enable_seqscan", PGC_USERSET, QUERY_TUNING_METHOD,
gettext_noop("Enables the planner's use of sequential-scan plans."),
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 324d92880b5..8389e333579 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -377,6 +377,9 @@ struct PlannerInfo
/* Does this query modify any partition key columns? */
bool partColsUpdated;
+
+ int unexpand_sublink_counter;
+ List *unexpand_sublink_expr_list;
};
@@ -995,6 +998,7 @@ typedef struct EquivalenceClass
bool ec_has_volatile; /* the (sole) member is a volatile expr */
bool ec_below_outer_join; /* equivalence applies below an OJ */
bool ec_broken; /* failed to generate needed clauses? */
+ bool ec_processed;
Index ec_sortref; /* originating sortclause label, or 0 */
Index ec_min_security; /* minimum security_level in ec_sources */
Index ec_max_security; /* maximum security_level in ec_sources */
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index f1d111063c2..425b5c68131 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -25,6 +25,12 @@ extern PGDLLIMPORT int geqo_threshold;
extern PGDLLIMPORT int min_parallel_table_scan_size;
extern PGDLLIMPORT int min_parallel_index_scan_size;
+typedef struct pushdown_expr_info
+{
+ Var *outer;
+ Var *inner;
+} pushdown_expr_info;
+
/* Hook for plugins to get control in set_rel_pathlist() */
typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,
RelOptInfo *rel,
@@ -62,7 +68,7 @@ extern void create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel,
Path *bitmapqual);
extern void generate_partitionwise_join_paths(PlannerInfo *root,
RelOptInfo *rel);
-
+extern bool try_push_outer_qual_to_sublink_query(PlannerInfo *parent, Query *subquery, List *conditions);
#ifdef OPTIMIZER_DEBUG
extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel);
#endif
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index bf1adfc52ac..b5f1db3a3b8 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -23,17 +23,20 @@ extern double cursor_tuple_fraction;
/* query_planner callback to compute query_pathkeys */
typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra);
+typedef void (*lazy_process_sublinks_callback) (PlannerInfo *root, bool single_result_rte);
/*
* prototypes for plan/planmain.c
*/
extern RelOptInfo *query_planner(PlannerInfo *root,
- query_pathkeys_callback qp_callback, void *qp_extra);
+ query_pathkeys_callback qp_callback,
+ void *qp_extra,
+ lazy_process_sublinks_callback lps_callback);
/*
* prototypes for plan/planagg.c
*/
-extern void preprocess_minmax_aggregates(PlannerInfo *root);
+extern void preprocess_minmax_aggregates(PlannerInfo *root, bool lazy_process_sublink);
/*
* prototypes for plan/createplan.c
@@ -67,6 +70,8 @@ extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount,
extern int from_collapse_limit;
extern int join_collapse_limit;
+#define has_unexpand_sublink(root) ((root)->unexpand_sublink_counter != 0)
+
extern void add_base_rels_to_query(PlannerInfo *root, Node *jtnode);
extern void add_other_rels_to_query(PlannerInfo *root);
extern void build_base_rel_tlists(PlannerInfo *root, List *final_tlist);
@@ -96,6 +101,9 @@ extern RestrictInfo *build_implied_join_equality(PlannerInfo *root,
Relids nullable_relids,
Index security_level);
extern void match_foreign_keys_to_quals(PlannerInfo *root);
+extern void lazy_process_sublinks(PlannerInfo *root, bool single_result_rte);
+extern bool query_has_sublink_try_pushdown_qual(PlannerInfo *root);
+extern Node *lazy_process_sublink_qual(PlannerInfo *root, Node *node);
/*
* prototypes for plan/analyzejoins.c
diff --git a/src/include/optimizer/planner.h b/src/include/optimizer/planner.h
index 9a15de50259..14ff94f60e3 100644
--- a/src/include/optimizer/planner.h
+++ b/src/include/optimizer/planner.h
@@ -57,5 +57,6 @@ extern Path *get_cheapest_fractional_path(RelOptInfo *rel,
double tuple_fraction);
extern Expr *preprocess_phv_expression(PlannerInfo *root, Expr *expr);
+extern void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode, bool istop);
#endif /* PLANNER_H */
diff --git a/src/include/optimizer/subselect.h b/src/include/optimizer/subselect.h
index 059bdf941ef..396c4c6117e 100644
--- a/src/include/optimizer/subselect.h
+++ b/src/include/optimizer/subselect.h
@@ -25,7 +25,7 @@ extern JoinExpr *convert_EXISTS_sublink_to_join(PlannerInfo *root,
bool under_not,
Relids available_rels);
extern Node *SS_replace_correlation_vars(PlannerInfo *root, Node *expr);
-extern Node *SS_process_sublinks(PlannerInfo *root, Node *expr, bool isQual);
+extern Node *SS_process_sublinks(PlannerInfo *root, Node *expr, bool isQual, bool lazy_process, bool force_process);
extern void SS_identify_outer_params(PlannerInfo *root);
extern void SS_charge_for_initplans(PlannerInfo *root, RelOptInfo *final_rel);
extern void SS_attach_initplans(PlannerInfo *root, Plan *plan);
@@ -36,5 +36,7 @@ extern Param *SS_make_initplan_output_param(PlannerInfo *root,
extern void SS_make_initplan_from_plan(PlannerInfo *root,
PlannerInfo *subroot, Plan *plan,
Param *prm);
+extern bool condition_is_safe_pushdown_to_sublink(RestrictInfo *rinfo, Var *var);
+extern void sublink_query_push_qual(Query *subquery, Node *qual, Var *var, Var *replace);
#endif /* SUBSELECT_H */
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index aa18d304ac0..1631e9603e9 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -288,6 +288,8 @@ extern int tcp_user_timeout;
extern bool trace_sort;
#endif
+extern bool condition_push_down;
+
/*
* Functions exported by guc.c
*/
diff --git a/src/include/utils/ruleutils.h b/src/include/utils/ruleutils.h
index d333e5e8a56..d4ccca3fe3c 100644
--- a/src/include/utils/ruleutils.h
+++ b/src/include/utils/ruleutils.h
@@ -42,5 +42,6 @@ extern char *generate_opclass_name(Oid opclass);
extern char *get_range_partbound_string(List *bound_datums);
extern char *pg_get_statisticsobjdef_string(Oid statextid);
+extern const char *get_simple_binary_op_name(OpExpr *expr);
#endif /* RULEUTILS_H */
diff --git a/src/test/regress/expected/join_hash.out b/src/test/regress/expected/join_hash.out
index 3a91c144a27..232ee6d15a1 100644
--- a/src/test/regress/expected/join_hash.out
+++ b/src/test/regress/expected/join_hash.out
@@ -926,9 +926,9 @@ WHERE
-> Result
Output: (hjtest_1.b * 5)
-> Hash
- Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b
+ Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.b, hjtest_2.id, hjtest_2.c
-> Seq Scan on public.hjtest_2
- Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b
+ Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.b, hjtest_2.id, hjtest_2.c
Filter: ((SubPlan 5) < 55)
SubPlan 5
-> Result
@@ -974,7 +974,7 @@ WHERE
Hash Cond: (((SubPlan 1) = hjtest_1.id) AND ((SubPlan 3) = (SubPlan 2)))
Join Filter: (hjtest_1.a <> hjtest_2.b)
-> Seq Scan on public.hjtest_2
- Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b
+ Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.b, hjtest_2.id, hjtest_2.c
Filter: ((SubPlan 5) < 55)
SubPlan 5
-> Result
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 4e8ddc70613..2df4d6e15b5 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -1063,7 +1063,7 @@ where o.ten = 0;
SubPlan 1
-> Seq Scan on public.int4_tbl
Output: int4_tbl.f1
- Filter: (int4_tbl.f1 <= $0)
+ Filter: (int4_tbl.f1 <= $1)
(14 rows)
select sum(ss.tst::int) from
--
2.30.1 (Apple Git-130)
Hi,
On 12/7/21 10:44, 曾文旌(义从) wrote:
Hi Hackers
For my previous proposal, I developed a prototype and passed
regression testing. It works similarly to subquery's qual pushdown.
We know that sublink expands at the beginning of each level of
query. At this stage, The query's conditions and equivalence classes
are not processed. But after generate_base_implied_equalities the
conditions are processed, which is why qual can push down to
subquery but sublink not.My POC implementation chose to delay the sublink expansion in the
SELECT clause (targetList) and where clause. Specifically, it is
delayed after generate_base_implied_equalities. Thus, the equivalent
conditions already established in the Up level query can be easily
obtained in the sublink expansion process (make_subplan).For example, if the up level query has a.id = 10 and the sublink
query has a.id = b.id, then we get b.id = 10 and push it down to the
sublink quey. If b is a partitioned table and is partitioned by id,
then a large number of unrelated subpartitions are pruned out, This
optimizes a significant amount of Planner and SQL execution time,
especially if the partitioned table has a large number of
subpartitions and is what I want.Currently, There were two SQL failures in the regression test,
because the expansion order of sublink was changed, which did not
affect the execution result of SQL.Look forward to your suggestions on this proposal.
I took a quick look, and while I don't see / can't think of any problems
with delaying it until after generating implied equalities, there seems
to be a number of gaps.
1) Are there any regression tests exercising this modified behavior?
Maybe there are, but if the only changes are due to change in order of
targetlist entries, that doesn't seem like a clear proof.
It'd be good to add a couple tests exercising both the positive and
negative case (i.e. when we can and can't pushdown a qual).
2) apparently, contrib/postgres_fdw does crash like this:
#3 0x000000000077b412 in adjust_appendrel_attrs_mutator
(node=0x13f7ea0, context=0x7fffc3351b30) at appendinfo.c:470
470 Assert(!IsA(node, SubLink));
(gdb) p node
$1 = (Node *) 0x13f7ea0
(gdb) p *node
$2 = {type = T_SubLink}
Backtrace attached.
3) various parts of the patch really need at least some comments, like:
- try_push_outer_qual_to_sublink_query really needs some docs
- new stuff at the end of initsplan.c
4) generate_base_implied_equalities
shouldn't this
if (ec->ec_processed)
;
really be?
if (ec->ec_processed)
continue;
5) I'm not sure why we need the new ec_processed flag.
6) So we now have lazy_process_sublink callback? Does that mean we
expand sublinks in two places - sometimes lazily, sometimes not?
regards
--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
Attachments:
------------------原始邮件 ------------------
发件人:Tomas Vondra <tomas.vondra@enterprisedb.com>
发送时间:Wed Dec 8 11:26:35 2021
收件人:曾文旌(义从) <wenjing.zwj@alibaba-inc.com>, shawn wang <shawn.wang.pg@gmail.com>, ggysxcq@gmail.com <ggysxcq@gmail.com>, PostgreSQL Hackers <pgsql-hackers@postgresql.org>
抄送:wjzeng <wjzeng2012@gmail.com>
主题:Re: 回复:Re: Is it worth pushing conditions to sublink/subplan?
Hi,
On 12/7/21 10:44, 曾文旌(义从) wrote:
Hi Hackers
For my previous proposal, I developed a prototype and passed
regression testing. It works similarly to subquery's qual pushdown.
We know that sublink expands at the beginning of each level of
query. At this stage, The query's conditions and equivalence classes
are not processed. But after generate_base_implied_equalities the
conditions are processed, which is why qual can push down to
subquery but sublink not.My POC implementation chose to delay the sublink expansion in the
SELECT clause (targetList) and where clause. Specifically, it is
delayed after generate_base_implied_equalities. Thus, the equivalent
conditions already established in the Up level query can be easily
obtained in the sublink expansion process (make_subplan).For example, if the up level query has a.id = 10 and the sublink
query has a.id = b.id, then we get b.id = 10 and push it down to the
sublink quey. If b is a partitioned table and is partitioned by id,
then a large number of unrelated subpartitions are pruned out, This
optimizes a significant amount of Planner and SQL execution time,
especially if the partitioned table has a large number of
subpartitions and is what I want.Currently, There were two SQL failures in the regression test,
because the expansion order of sublink was changed, which did not
affect the execution result of SQL.Look forward to your suggestions on this proposal.
I took a quick look, and while I don't see / can't think of any problems
with delaying it until after generating implied equalities, there seems
to be a number of gaps.
Thank you for your attention.
1) Are there any regression tests exercising this modified behavior?
Maybe there are, but if the only changes are due to change in order of
targetlist entries, that doesn't seem like a clear proof.
It'd be good to add a couple tests exercising both the positive and
negative case (i.e. when we can and can't pushdown a qual).
I added several samples to the regress(qual_pushdown_to_sublink.sql).
and I used the partition table to show the plan status of qual being pushed down into sublink.
Hopefully this will help you understand the details of this patch. Later, I will add more cases.
2) apparently, contrib/postgres_fdw does crash like this:
#3 0x000000000077b412 in adjust_appendrel_attrs_mutator
(node=0x13f7ea0, context=0x7fffc3351b30) at appendinfo.c:470
470 Assert(!IsA(node, SubLink));
(gdb) p node
$1 = (Node *) 0x13f7ea0
(gdb) p *node
$2 = {type = T_SubLink}
Backtrace attached.
For the patch attached in the last email, I passed all the tests under src/test/regress.
As you pointed out, there was a problem with regression under contrib(in contrib/postgres_fdw).
This time I fixed it and the current patch (V2) can pass the check-world.
3) various parts of the patch really need at least some comments, like:
- try_push_outer_qual_to_sublink_query really needs some docs
- new stuff at the end of initsplan.c
Ok, I added some comments and will add more. If you have questions about any details,
please point them out directly.
4) generate_base_implied_equalities
shouldn't this
if (ec->ec_processed)
;
really be?
if (ec->ec_processed)
continue;
You are right. I fixed it.
5) I'm not sure why we need the new ec_processed flag.
I did this to eliminate duplicate equalities from the two generate_base_implied_equalities calls
1) I need the base equivalent expression generated after generate_base_implied_equalities,
which is used to pushdown qual to sublink(lazy_process_sublinks)
2) The expansion of sublink may result in an equivalent expression with parameters, such as a = $1,
which needs to deal with the equivalence classes again.
3) So, I added ec_processed and asked to process it again (generate_base_implied_equalities)
after the equivalence class changed (add_eq_member/process_equivalence).
Maybe you have a better suggestion, please let me know.
6) So we now have lazy_process_sublink callback? Does that mean we
expand sublinks in two places - sometimes lazily, sometimes not?
Yes, not all sublink is delayed. Let me explain this:
1) I added a GUC switch enable_lazy_process_sublink. If it is turned off, all lazy process sublink will not happen,
qual pushdown to sublink depend on lazy procee sublink, which means no quals will be pushed down.
2) Even if enable_lazy_process_sublink = true If Query in this level contains some complex features,
sublink in this level query will not try do qual pushdown. (see function query_has_sublink_try_pushdown_qual).
I want to support a minimum subset first. Then consider complex features such as CTE/DML.
3) Finally, under conditions 1 and 2, all kinds of sublink contained in the SELECT clause or
WHERE clause will delays expansion and try pushdown qual. The sublink elsewhere in the SQL statement
does not delay process.
The current status meets my requirements for now. Of course, after this scheme is proved to be feasible, maybe
we can discuss that all sublinks are processed by overall delay, just like qual pushdown to subquery.
thanks
Wenjing
regards
--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
Attachments:
0001-poc-pushdown-qual-to-sublink-v2.patchapplication/octet-streamDownload
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 296dd75c1b6..c9719c90183 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -40,8 +40,10 @@
#include "optimizer/paths.h"
#include "optimizer/plancat.h"
#include "optimizer/planner.h"
+#include "optimizer/planmain.h"
#include "optimizer/restrictinfo.h"
#include "optimizer/tlist.h"
+#include "optimizer/subselect.h"
#include "parser/parse_clause.h"
#include "parser/parsetree.h"
#include "partitioning/partbounds.h"
@@ -3895,6 +3897,68 @@ generate_partitionwise_join_paths(PlannerInfo *root, RelOptInfo *rel)
list_free(live_children);
}
+bool
+try_push_outer_qual_to_sublink_query(PlannerInfo *parent, Query *subquery, List *conditions)
+{
+ pushdown_safety_info safetyInfo;
+ ListCell *lc1;
+ bool found = false;
+ bool query_is_pushdown_safe = false;
+
+ if (conditions == NIL)
+ return false;
+
+ memset(&safetyInfo, 0, sizeof(safetyInfo));
+ safetyInfo.unsafeColumns = (bool *)
+ palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
+
+ /* Check whether pushdown qual to sublink query is safe. */
+ query_is_pushdown_safe = subquery_is_pushdown_safe(subquery, subquery, &safetyInfo);
+ pfree(safetyInfo.unsafeColumns);
+ if (!query_is_pushdown_safe)
+ return false;
+
+ /*
+ * Currently, we have some conditional expressions in sublink (out var = local var)
+ * Now, the outer query looks for related equivalent expressions that have been generated (outer var = const).
+ * If out var = local var and outer var = const, then we get local var = const and push it down to sublink
+ */
+ foreach(lc1, conditions)
+ {
+ pushdown_expr_info *expr_info = (pushdown_expr_info *) lfirst(lc1);
+ Index levelsup = 0;
+ RelOptInfo *rel;
+ ListCell *lc2;
+ PlannerInfo *tmproot = parent;
+
+ /* The outer var could exist in any of the upper-level queries to find these roots */
+ for (levelsup = expr_info->outer->varlevelsup - 1; levelsup > 0; levelsup--)
+ tmproot = tmproot->parent_root;
+
+ /* Flatten varLevelsup, for find conditions from BaserestrictInfo. */
+ expr_info->outer->varlevelsup = 0;
+
+ /* Find if there is an available qual in relation of this var from root */
+ rel = find_base_rel(tmproot, expr_info->outer->varno);
+ if (rel == NULL || rel->baserestrictinfo == NULL)
+ continue;
+
+ foreach(lc2, rel->baserestrictinfo)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc2);
+
+ /* Make sure that qual in restrictInfo that is var = const and can safely pushdown */
+ if (condition_is_safe_pushdown_to_sublink(rinfo, expr_info->outer))
+ {
+ /* replace qual expr from outer var = const to var = const and push down to sublink query */
+ sublink_query_push_qual(subquery, (Node *)copyObject(rinfo->clause), expr_info->outer, expr_info->inner);
+ found = true;
+ }
+ }
+ }
+
+ return found;
+}
/*****************************************************************************
* DEBUG SUPPORT
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 6f1abbe47d6..f4aeb716a59 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -388,6 +388,7 @@ process_equivalence(PlannerInfo *root,
restrictinfo->security_level);
ec1->ec_max_security = Max(ec1->ec_max_security,
restrictinfo->security_level);
+ ec1->ec_processed = false;
/* mark the RI as associated with this eclass */
restrictinfo->left_ec = ec1;
restrictinfo->right_ec = ec1;
@@ -450,6 +451,7 @@ process_equivalence(PlannerInfo *root,
ec->ec_min_security = restrictinfo->security_level;
ec->ec_max_security = restrictinfo->security_level;
ec->ec_merged = NULL;
+ ec->ec_processed = false;
em1 = add_eq_member(ec, item1, item1_relids, item1_nullable_relids,
false, item1_type);
em2 = add_eq_member(ec, item2, item2_relids, item2_nullable_relids,
@@ -574,6 +576,7 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids,
ec->ec_relids = bms_add_members(ec->ec_relids, relids);
}
ec->ec_members = lappend(ec->ec_members, em);
+ ec->ec_processed = false;
return em;
}
@@ -711,6 +714,7 @@ get_eclass_for_sort_expr(PlannerInfo *root,
newec->ec_min_security = UINT_MAX;
newec->ec_max_security = 0;
newec->ec_merged = NULL;
+ newec->ec_processed = false;
if (newec->ec_has_volatile && sortref == 0) /* should not happen */
elog(ERROR, "volatile EquivalenceClass has no sortref");
@@ -1114,7 +1118,12 @@ generate_base_implied_equalities(PlannerInfo *root)
* Single-member ECs won't generate any deductions, either here or at
* the join level.
*/
- if (list_length(ec->ec_members) > 1)
+ if (ec->ec_processed)
+ {
+ ec_index++;
+ continue;
+ }
+ else if (list_length(ec->ec_members) > 1)
{
if (ec->ec_has_const)
generate_base_implied_equalities_const(root, ec);
@@ -1151,6 +1160,7 @@ generate_base_implied_equalities(PlannerInfo *root)
rel->has_eclass_joins = true;
}
+ ec->ec_processed = true;
ec_index++;
}
}
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index f6a202d900f..ba74a61aeef 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -30,10 +30,12 @@
#include "optimizer/planner.h"
#include "optimizer/prep.h"
#include "optimizer/restrictinfo.h"
+#include "optimizer/subselect.h"
#include "parser/analyze.h"
#include "rewrite/rewriteManip.h"
#include "utils/lsyscache.h"
#include "utils/typcache.h"
+#include "utils/guc.h"
/* These parameters are set by GUC */
int from_collapse_limit;
@@ -80,6 +82,16 @@ static void check_mergejoinable(RestrictInfo *restrictinfo);
static void check_hashjoinable(RestrictInfo *restrictinfo);
static void check_memoizable(RestrictInfo *restrictinfo);
+static void remember_qual_info_for_lazy_process_sublink(PlannerInfo *root,
+ Node *clause,
+ bool below_outer_join,
+ JoinType jointype,
+ Index security_level,
+ Relids qualscope,
+ Relids ojscope,
+ Relids outerjoin_nonnullable,
+ List *postponed_qual_list);
+static void *search_sublink_from_lazy_process_list(PlannerInfo *root, Node *node);
/*****************************************************************************
*
@@ -262,7 +274,16 @@ add_vars_to_targetlist(PlannerInfo *root, List *vars,
else if (IsA(node, PlaceHolderVar))
{
PlaceHolderVar *phv = (PlaceHolderVar *) node;
- PlaceHolderInfo *phinfo = find_placeholder_info(root, phv,
+ PlaceHolderInfo *phinfo = NULL;
+
+ /*
+ * Since there may be an unexpanded sublink in the targetList,
+ * we'll skip it for now. Don't worry let lazy_process_sublinks do it later.
+ */
+ if (has_unexpand_sublink(root) && checkExprHasSubLink(node))
+ continue;
+
+ phinfo = find_placeholder_info(root, phv,
create_new_ph);
phinfo->ph_needed = bms_add_members(phinfo->ph_needed,
@@ -1621,6 +1642,17 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
Relids nullable_relids;
RestrictInfo *restrictinfo;
+ /* Before lazy transform sublink has not been converted, so backup it */
+ if (checkExprHasSubLink(clause))
+ {
+ remember_qual_info_for_lazy_process_sublink(root, clause, below_outer_join, jointype, security_level,
+ qualscope, ojscope, outerjoin_nonnullable, *postponed_qual_list);
+
+ relids = pull_varnos(root, clause);
+ Assert(bms_is_subset(relids, qualscope));
+ return;
+ }
+
/*
* Retrieve all relids mentioned within the clause.
*/
@@ -2750,3 +2782,189 @@ check_memoizable(RestrictInfo *restrictinfo)
if (OidIsValid(typentry->hash_proc) && OidIsValid(typentry->eq_opr))
restrictinfo->right_hasheqoperator = typentry->eq_opr;
}
+
+/*
+ * query at this level has sublink and It is safe to try lazy process and pushdown qual.
+ * Use a switch to control it. This is a minimal subset, then try to support more scenarios.
+ */
+bool
+query_has_sublink_try_pushdown_qual(PlannerInfo *root)
+{
+ Query *parse = root->parse;
+
+ if (!parse->hasSubLinks)
+ return false;
+
+ if (parse->commandType != CMD_SELECT ||
+ parse->hasWindowFuncs ||
+ parse->hasTargetSRFs ||
+ parse->hasRecursive ||
+ parse->hasModifyingCTE ||
+ parse->hasForUpdate ||
+ parse->hasRowSecurity ||
+ parse->setOperations ||
+ parse->havingQual ||
+ parse->cteList != NIL)
+ return false;
+
+ if (enable_lazy_process_sublink)
+ return true;
+
+ return false;
+}
+
+/*
+ * Handle sublink that is not expanded.
+ * Convert these sublinks to subplans and handles the associated targetList expr and equivalence classes.
+ */
+void
+lazy_process_sublinks(PlannerInfo *root, bool single_result_rte)
+{
+ Query *parse = root->parse;
+ List *tlist_vars;
+
+ /* Exit the function if no unprocessed sublink is recorded. */
+ if (!has_unexpand_sublink(root))
+ return;
+
+ /* process sublink in targetlist */
+ root->processed_tlist = (List *)SS_process_sublinks(root, (Node *)root->processed_tlist, false, true, true);
+
+ /* process sublink in where clause */
+ if (parse->jointree && parse->jointree->quals)
+ {
+ FromExpr *f = parse->jointree;
+ List *newquals = NIL;
+ ListCell *l;
+
+ Assert(IsA(f->quals, List));
+ foreach(l, (List *) f->quals)
+ {
+ Node *qual = (Node *) lfirst(l);
+
+ if (checkExprHasSubLink(qual))
+ {
+ qual = lazy_process_sublink_qual(root, qual);
+ newquals = lappend(newquals, qual);
+ }
+ else
+ newquals = lappend(newquals, qual);
+ }
+
+ Assert(list_length((List *)f->quals) == list_length(newquals));
+ f->quals = (Node *)newquals;
+ }
+
+ /* process agg functions */
+ if(parse->hasAggs)
+ {
+ preprocess_aggrefs(root, (Node *) root->processed_tlist);
+ preprocess_minmax_aggregates(root, true);
+ }
+
+ /* empty from clause no need prcess targetlist or from clause */
+ if (!single_result_rte)
+ {
+ /* Put the mutated sublink info into the targetList */
+ tlist_vars = pull_var_clause((Node *) root->processed_tlist,
+ PVC_RECURSE_AGGREGATES |
+ PVC_RECURSE_WINDOWFUNCS |
+ PVC_INCLUDE_PLACEHOLDERS);
+
+ if (tlist_vars != NIL)
+ {
+ add_vars_to_targetlist(root, tlist_vars, bms_make_singleton(0), true);
+ list_free(tlist_vars);
+ }
+
+ generate_base_implied_equalities(root);
+ }
+
+ /* Make sure all sublinks are processed. */
+ if (has_unexpand_sublink(root))
+ elog(ERROR, "sublink is not fully expanded yet");
+
+ return;
+}
+
+typedef struct sublink_node
+{
+ Node *expr;
+ bool below_outer_join;
+ JoinType jointype;
+ Index security_level;
+ Relids qualscope;
+ Relids ojscope;
+ Relids outerjoin_nonnullable;
+ List *postponed_qual_list;
+} sublink_node;
+
+/* Log unexpanded sublink for future do distribute_qual_to_rels in lazy process sublink */
+static void
+remember_qual_info_for_lazy_process_sublink(PlannerInfo *root,
+ Node *clause,
+ bool below_outer_join,
+ JoinType jointype,
+ Index security_level,
+ Relids qualscope,
+ Relids ojscope,
+ Relids outerjoin_nonnullable,
+ List *postponed_qual_list)
+{
+ sublink_node *sublink_info = palloc0(sizeof(sublink_node));
+
+ sublink_info->expr= copyObject(clause);
+ sublink_info->below_outer_join = below_outer_join;
+ sublink_info->jointype = jointype;
+ sublink_info->security_level = security_level;
+ sublink_info->qualscope = bms_copy(qualscope);
+ sublink_info->ojscope = bms_copy(ojscope);
+ sublink_info->outerjoin_nonnullable = bms_copy(outerjoin_nonnullable);
+ sublink_info->postponed_qual_list = list_copy_deep(postponed_qual_list);
+
+ root->unexpand_sublink_expr_list = lappend(root->unexpand_sublink_expr_list, sublink_info);
+
+ return;
+}
+
+Node *
+lazy_process_sublink_qual(PlannerInfo *root, Node *node)
+{
+ Node *qual = NULL;
+ sublink_node *sublink_info = NULL;
+
+ qual = SS_process_sublinks(root, node, true, true, true);
+ sublink_info = (sublink_node *)search_sublink_from_lazy_process_list(root, node);
+ if (sublink_info)
+ {
+ List *postponed_qual_list = NIL;
+ distribute_qual_to_rels(root, qual, sublink_info->below_outer_join, sublink_info->jointype, sublink_info->security_level,
+ sublink_info->qualscope, sublink_info->ojscope, sublink_info->outerjoin_nonnullable,
+ &postponed_qual_list);
+
+ Assert(postponed_qual_list == NIL);
+ root->unexpand_sublink_expr_list = list_delete(root->unexpand_sublink_expr_list, sublink_info);
+ }
+
+ return qual;
+}
+
+static void *
+search_sublink_from_lazy_process_list(PlannerInfo *root, Node *node)
+{
+ ListCell *lc = NULL;
+ sublink_node *sublink_info = NULL;
+
+ foreach(lc, root->unexpand_sublink_expr_list)
+ {
+ sublink_node *tmp = lfirst(lc);
+ Assert(tmp->expr);
+ if (equal(tmp->expr, node))
+ {
+ sublink_info = tmp;
+ break;
+ }
+ }
+
+ return (void *)sublink_info;
+}
diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c
index c1634d16669..76fac0323f3 100644
--- a/src/backend/optimizer/plan/planagg.c
+++ b/src/backend/optimizer/plan/planagg.c
@@ -49,7 +49,7 @@
static bool can_minmax_aggs(PlannerInfo *root, List **context);
static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
- Oid eqop, Oid sortop, bool nulls_first);
+ Oid eqop, Oid sortop, bool nulls_first, bool lazy_process_sublink);
static void minmax_qp_callback(PlannerInfo *root, void *extra);
static Oid fetch_agg_sort_op(Oid aggfnoid);
@@ -70,7 +70,7 @@ static Oid fetch_agg_sort_op(Oid aggfnoid);
* root->agginfos, so preprocess_aggrefs() must have been called already, too.
*/
void
-preprocess_minmax_aggregates(PlannerInfo *root)
+preprocess_minmax_aggregates(PlannerInfo *root, bool lazy_process_sublink)
{
Query *parse = root->parse;
FromExpr *jtnode;
@@ -173,9 +173,9 @@ preprocess_minmax_aggregates(PlannerInfo *root)
* FIRST is more likely to be available if the operator is a
* reverse-sort operator, so try that first if reverse.
*/
- if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse))
+ if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse, lazy_process_sublink))
continue;
- if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, !reverse))
+ if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, !reverse, lazy_process_sublink))
continue;
/* No indexable path for this aggregate, so fail */
@@ -315,7 +315,7 @@ can_minmax_aggs(PlannerInfo *root, List **context)
*/
static bool
build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
- Oid eqop, Oid sortop, bool nulls_first)
+ Oid eqop, Oid sortop, bool nulls_first, bool lazy_process_sublink)
{
PlannerInfo *subroot;
Query *parse;
@@ -352,12 +352,23 @@ build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
/* append_rel_list might contain outer Vars? */
subroot->append_rel_list = copyObject(root->append_rel_list);
IncrementVarSublevelsUp((Node *) subroot->append_rel_list, 1, 1);
+
+ if (lazy_process_sublink)
+ {
+ /* under lazy process sublink, parent root may have some data that child not need, so set it to NULL */
+ subroot->join_info_list = NIL;
+ subroot->eq_classes = NIL;
+ subroot->placeholder_list = NIL;
+ }
+ else
+ {
/* There shouldn't be any OJ info to translate, as yet */
Assert(subroot->join_info_list == NIL);
/* and we haven't made equivalence classes, either */
Assert(subroot->eq_classes == NIL);
/* and we haven't created PlaceHolderInfos, either */
Assert(subroot->placeholder_list == NIL);
+ }
/*----------
* Generate modified query of the form
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index 273ac0acf7e..7042c96b09b 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -102,6 +102,8 @@ query_planner(PlannerInfo *root,
Assert(rte != NULL);
if (rte->rtekind == RTE_RESULT)
{
+ lazy_process_sublinks(root, true);
+
/* Make the RelOptInfo for it directly */
final_rel = build_simple_rel(root, varno, NULL);
@@ -197,6 +199,8 @@ query_planner(PlannerInfo *root,
*/
generate_base_implied_equalities(root);
+ lazy_process_sublinks(root, false);
+
/*
* We have completed merging equivalence sets, so it's now possible to
* generate pathkeys in canonical form; so compute query_pathkeys and
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index bd01ec0526f..fe79751e265 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -64,6 +64,7 @@
#include "utils/rel.h"
#include "utils/selfuncs.h"
#include "utils/syscache.h"
+#include "utils/guc.h"
/* GUC parameters */
double cursor_tuple_fraction = DEFAULT_CURSOR_TUPLE_FRACTION;
@@ -128,8 +129,8 @@ typedef struct
/* Local functions */
static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
-static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
static void grouping_planner(PlannerInfo *root, double tuple_fraction);
+static Node *preprocess_expression_ext(PlannerInfo *root, Node *expr, int kind, bool process_sublink);
static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root);
static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
int *tleref_to_colnum_map);
@@ -641,6 +642,8 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
root->wt_param_id = -1;
root->non_recursive_path = NULL;
root->partColsUpdated = false;
+ root->unexpand_sublink_counter = 0;
+ root->unexpand_sublink_expr_list = NIL;
/*
* If there is a WITH list, process each WITH query and either convert it
@@ -784,8 +787,8 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
* part of the targetlist.
*/
parse->targetList = (List *)
- preprocess_expression(root, (Node *) parse->targetList,
- EXPRKIND_TARGET);
+ preprocess_expression_ext(root, (Node *) parse->targetList,
+ EXPRKIND_TARGET, false);
/* Constant-folding might have removed all set-returning functions */
if (parse->hasTargetSRFs)
@@ -807,7 +810,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
preprocess_expression(root, (Node *) parse->returningList,
EXPRKIND_TARGET);
- preprocess_qual_conditions(root, (Node *) parse->jointree);
+ preprocess_qual_conditions(root, (Node *) parse->jointree, true);
parse->havingQual = preprocess_expression(root, parse->havingQual,
EXPRKIND_QUAL);
@@ -1049,6 +1052,12 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
return root;
}
+static Node *
+preprocess_expression(PlannerInfo *root, Node *expr, int kind)
+{
+ return preprocess_expression_ext(root, expr, kind, true);
+}
+
/*
* preprocess_expression
* Do subquery_planner's preprocessing work for an expression,
@@ -1056,7 +1065,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
* conditions), a HAVING clause, or a few other things.
*/
static Node *
-preprocess_expression(PlannerInfo *root, Node *expr, int kind)
+preprocess_expression_ext(PlannerInfo *root, Node *expr, int kind, bool process_sublink)
{
/*
* Fall out quickly if expression is empty. This occurs often enough to
@@ -1129,7 +1138,7 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind)
/* Expand SubLinks to SubPlans */
if (root->parse->hasSubLinks)
- expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
+ expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL), false, process_sublink);
/*
* XXX do not insert anything here unless you have grokked the comments in
@@ -1157,8 +1166,8 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind)
* Recursively scan the query's jointree and do subquery_planner's
* preprocessing work on each qual condition found therein.
*/
-static void
-preprocess_qual_conditions(PlannerInfo *root, Node *jtnode)
+void
+preprocess_qual_conditions(PlannerInfo *root, Node *jtnode, bool istop)
{
if (jtnode == NULL)
return;
@@ -1172,17 +1181,19 @@ preprocess_qual_conditions(PlannerInfo *root, Node *jtnode)
ListCell *l;
foreach(l, f->fromlist)
- preprocess_qual_conditions(root, lfirst(l));
+ preprocess_qual_conditions(root, lfirst(l), false);
- f->quals = preprocess_expression(root, f->quals, EXPRKIND_QUAL);
+ if (istop)
+ f->quals = preprocess_expression_ext(root, f->quals, EXPRKIND_QUAL, false);
+ else
+ f->quals = preprocess_expression(root, f->quals, EXPRKIND_QUAL);
}
else if (IsA(jtnode, JoinExpr))
{
JoinExpr *j = (JoinExpr *) jtnode;
- preprocess_qual_conditions(root, j->larg);
- preprocess_qual_conditions(root, j->rarg);
-
+ preprocess_qual_conditions(root, j->larg, false);
+ preprocess_qual_conditions(root, j->rarg, false);
j->quals = preprocess_expression(root, j->quals, EXPRKIND_QUAL);
}
else
@@ -1384,11 +1395,11 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
* pathtargets, else some copies of the Aggref nodes might escape
* being marked.
*/
- if (parse->hasAggs)
- {
+ if (parse->hasAggs && !has_unexpand_sublink(root))
preprocess_aggrefs(root, (Node *) root->processed_tlist);
+
+ if (parse->hasAggs)
preprocess_aggrefs(root, (Node *) parse->havingQual);
- }
/*
* Locate any window functions in the tlist. (We don't need to look
@@ -1412,8 +1423,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
* that is needed in MIN/MAX-optimizable cases will have to be
* duplicated in planagg.c.
*/
- if (parse->hasAggs)
- preprocess_minmax_aggregates(root);
+ if (parse->hasAggs && !has_unexpand_sublink(root))
+ preprocess_minmax_aggregates(root, false);
/*
* Figure out whether there's a hard limit on the number of rows that
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index c9f7a09d102..fbf62d50ae6 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -32,11 +32,13 @@
#include "optimizer/planner.h"
#include "optimizer/prep.h"
#include "optimizer/subselect.h"
+#include "optimizer/paths.h"
#include "parser/parse_relation.h"
#include "rewrite/rewriteManip.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
+#include "utils/ruleutils.h"
typedef struct convert_testexpr_context
@@ -49,6 +51,8 @@ typedef struct process_sublinks_context
{
PlannerInfo *root;
bool isTopQual;
+ bool lazy_process;
+ bool force_process;
} process_sublinks_context;
typedef struct finalize_primnode_context
@@ -65,6 +69,13 @@ typedef struct inline_cte_walker_context
Query *ctequery; /* query to substitute */
} inline_cte_walker_context;
+typedef struct equal_expr_info_context
+{
+ bool has_unexpected_expr;
+ bool has_const;
+ Var *outer_var;
+ Var *inner_var;
+} equal_expr_info_context;
static Node *build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
List *plan_params,
@@ -105,6 +116,11 @@ static Bitmapset *finalize_plan(PlannerInfo *root,
static bool finalize_primnode(Node *node, finalize_primnode_context *context);
static bool finalize_agg_primnode(Node *node, finalize_primnode_context *context);
+static Node *replace_vars_mutator(Node *node, void *context);
+static List *find_equal_conditions_contain_uplevelvar_in_sublink_query(Query *orig_subquery);
+static bool equal_expr_analyze_walker(Node *node, void *context);
+static bool equal_expr_safety_check(Node *node, equal_expr_info_context *context);
+
/*
* Get the datatype/typmod/collation of the first column of the plan's output.
@@ -162,7 +178,7 @@ get_first_col_type(Plan *plan, Oid *coltype, int32 *coltypmod,
static Node *
make_subplan(PlannerInfo *root, Query *orig_subquery,
SubLinkType subLinkType, int subLinkId,
- Node *testexpr, bool isTopQual)
+ Node *testexpr, bool isTopQual, bool lazy_process)
{
Query *subquery;
bool simple_exists = false;
@@ -173,6 +189,8 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
Plan *plan;
List *plan_params;
Node *result;
+ Query *optimized_subquery = NULL;
+ Query *optimized_subquery_copy = NULL;
/*
* Copy the source Query node. This is a quick and dirty kluge to resolve
@@ -218,8 +236,32 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
/* plan_params should not be in use in current query level */
Assert(root->plan_params == NIL);
+ if (lazy_process)
+ {
+ List *conditions = NIL;
+ Query *subquery_copy = copyObject(orig_subquery);
+
+ /*
+ * Search sublink query.
+ * If the query contains an outer condition equivalent expression,
+ * this means that there may be external conditions that can be pushed down to optimize the subquery.
+ */
+ conditions = find_equal_conditions_contain_uplevelvar_in_sublink_query(subquery_copy);
+ if (conditions)
+ {
+ /* Search outer queries, and if relevant equivalent expressions are found, push them down into subqueries. */
+ if (try_push_outer_qual_to_sublink_query(root, subquery_copy, conditions))
+ {
+ optimized_subquery = subquery_copy;
+ optimized_subquery_copy = copyObject(optimized_subquery);
+ }
+ list_free(conditions);
+ }
+ }
+
/* Generate Paths for the subquery */
- subroot = subquery_planner(root->glob, subquery,
+ subroot = subquery_planner(root->glob,
+ (optimized_subquery != NULL) ? optimized_subquery : subquery,
root,
false, tuple_fraction);
@@ -256,7 +298,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
List *paramIds;
/* Make a second copy of the original subquery */
- subquery = copyObject(orig_subquery);
+ subquery = copyObject((optimized_subquery_copy != NULL) ? optimized_subquery_copy : orig_subquery);
/* and re-simplify */
simple_exists = simplify_EXISTS_query(root, subquery);
Assert(simple_exists);
@@ -365,7 +407,7 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
*/
if (IsA(arg, PlaceHolderVar) ||
IsA(arg, Aggref))
- arg = SS_process_sublinks(root, arg, false);
+ arg = SS_process_sublinks(root, arg, false, false, true);
splan->parParam = lappend_int(splan->parParam, pitem->paramId);
splan->args = lappend(splan->args, arg);
@@ -1915,12 +1957,14 @@ replace_correlation_vars_mutator(Node *node, PlannerInfo *root)
* not distinguish FALSE from UNKNOWN return values.
*/
Node *
-SS_process_sublinks(PlannerInfo *root, Node *expr, bool isQual)
+SS_process_sublinks(PlannerInfo *root, Node *expr, bool isQual, bool lazy_process, bool force_process)
{
process_sublinks_context context;
context.root = root;
context.isTopQual = isQual;
+ context.lazy_process = lazy_process;
+ context.force_process = force_process;
return process_sublinks_mutator(expr, &context);
}
@@ -1930,20 +1974,34 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context)
process_sublinks_context locContext;
locContext.root = context->root;
+ locContext.lazy_process = context->lazy_process;
+ locContext.force_process = context->force_process;
if (node == NULL)
return NULL;
if (IsA(node, SubLink))
{
SubLink *sublink = (SubLink *) node;
- Node *testexpr;
/*
* First, recursively process the lefthand-side expressions, if any.
* They're not top-level anymore.
*/
locContext.isTopQual = false;
- testexpr = process_sublinks_mutator(sublink->testexpr, &locContext);
+ locContext.lazy_process = context->lazy_process;
+ locContext.force_process = context->force_process;
+ sublink->testexpr = process_sublinks_mutator(sublink->testexpr, &locContext);
+
+ if (!context->force_process &&
+ query_has_sublink_try_pushdown_qual(context->root))
+ {
+ Assert(context->lazy_process == false);
+ context->root->unexpand_sublink_counter++;
+ return node;
+ }
+
+ if (context->lazy_process)
+ context->root->unexpand_sublink_counter--;
/*
* Now build the SubPlan node and make the expr to return.
@@ -1952,8 +2010,8 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context)
(Query *) sublink->subselect,
sublink->subLinkType,
sublink->subLinkId,
- testexpr,
- context->isTopQual);
+ sublink->testexpr,
+ context->isTopQual, locContext.lazy_process);
}
/*
@@ -1978,8 +2036,8 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context)
* the very routine that creates 'em to begin with). We shouldn't find
* ourselves invoked directly on a Query, either.
*/
- Assert(!IsA(node, SubPlan));
- Assert(!IsA(node, AlternativeSubPlan));
+ Assert(!IsA(node, SubPlan) || context->lazy_process);
+ Assert(!IsA(node, AlternativeSubPlan) || context->lazy_process);
Assert(!IsA(node, Query));
/*
@@ -2003,6 +2061,8 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context)
/* Still at qual top-level */
locContext.isTopQual = context->isTopQual;
+ locContext.lazy_process = context->lazy_process;
+ locContext.force_process = context->force_process;
foreach(l, ((BoolExpr *) node)->args)
{
@@ -2024,6 +2084,8 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context)
/* Still at qual top-level */
locContext.isTopQual = context->isTopQual;
+ locContext.lazy_process = context->lazy_process;
+ locContext.force_process = context->force_process;
foreach(l, ((BoolExpr *) node)->args)
{
@@ -2989,3 +3051,186 @@ SS_make_initplan_from_plan(PlannerInfo *root,
/* Set costs of SubPlan using info from the plan tree */
cost_subplan(subroot, node, plan);
}
+
+void
+sublink_query_push_qual(Query *subquery, Node *qual, Var *outer, Var *inner)
+{
+ pushdown_expr_info context;
+ Node *new_qual;
+
+ context.outer = outer;
+ context.inner = inner;
+
+ new_qual = expression_tree_mutator(qual, replace_vars_mutator, (void *)&context);
+ subquery->jointree->quals = make_and_qual(subquery->jointree->quals, new_qual);
+
+ return;
+}
+
+static Node *
+replace_vars_mutator(Node *node, void *context)
+{
+ pushdown_expr_info *info = (pushdown_expr_info *) context;
+
+ if (IsA(node, Var) && equal(node, (Node *)info->outer))
+ {
+ node = copyObject((Node *)info->inner);
+ return node;
+ }
+
+ return expression_tree_mutator(node, replace_vars_mutator, context);
+}
+
+/* condition has to be (var = const value) */
+bool
+condition_is_safe_pushdown_to_sublink(RestrictInfo *rinfo, Var *var)
+{
+ Node *clause = (Node *) rinfo->clause;
+ Var *cvar = NULL;
+ equal_expr_info_context context;
+
+ if (clause == NULL)
+ return false;
+
+ if (rinfo->pseudoconstant)
+ return false;
+
+ if (contain_leaked_vars(clause))
+ return false;
+
+ memset(&context, 0, sizeof(equal_expr_info_context));
+ if (!equal_expr_safety_check(clause, &context))
+ return false;
+
+ /* It needs to be something like inner var = const */
+ if (context.inner_var &&
+ context.outer_var == NULL &&
+ context.has_unexpected_expr == false &&
+ context.has_const)
+ cvar = context.inner_var;
+
+ /* restrict contains the same table and the same column and varattno is not a system columns */
+ if (cvar && cvar->varattno > 0 && equal(cvar, var))
+ return true;
+
+ return false;
+}
+
+static List *
+find_equal_conditions_contain_uplevelvar_in_sublink_query(Query *orig_subquery)
+{
+ Node *quals;
+ ListCell *lc;
+ List *conditions = NIL;
+
+ if (orig_subquery->jointree == NULL ||
+ orig_subquery->jointree->quals == NULL)
+ return NIL;
+
+ quals = copyObject(orig_subquery->jointree->quals);
+ quals = (Node *) canonicalize_qual((Expr *) quals, false);
+ quals = (Node *) make_ands_implicit((Expr *) quals);
+
+ Assert(IsA(quals, List));
+ foreach(lc, (List *)quals)
+ {
+ Node *node = (Node *) lfirst(lc);
+ equal_expr_info_context context;
+ pushdown_expr_info *expr_info = NULL;
+
+ memset(&context, 0, sizeof(equal_expr_info_context));
+ if (equal_expr_safety_check(node, &context))
+ {
+ /* It needs to be something like outer var = inner var */
+ if (context.inner_var &&
+ context.outer_var &&
+ context.has_unexpected_expr == false &&
+ context.has_const == false)
+ {
+ expr_info = palloc0(sizeof(pushdown_expr_info));
+ expr_info->inner = context.inner_var;
+ expr_info->outer = context.outer_var;
+ conditions = lappend(conditions, expr_info);
+ }
+ }
+ }
+
+ return conditions;
+}
+
+static bool
+equal_expr_safety_check(Node *node, equal_expr_info_context *context)
+{
+ const char *op;
+
+ if (!IsA(node, OpExpr))
+ return false;
+
+ op = get_simple_binary_op_name((OpExpr *) node);
+ if (op == NULL || strcmp(op, "=") != 0)
+ return false;
+
+ if (contain_volatile_functions(node) ||
+ contain_mutable_functions(node) ||
+ contain_nonstrict_functions(node))
+ return false;
+
+ equal_expr_analyze_walker(node, context);
+
+ return true;
+}
+
+static bool
+equal_expr_analyze_walker(Node *node, void *context)
+{
+ equal_expr_info_context *info = (equal_expr_info_context *)context;
+
+ if (node == NULL)
+ return false;
+
+ switch (nodeTag(node))
+ {
+ case T_Var:
+ {
+ if (((Var *) node)->varlevelsup > 0)
+ {
+ if (info->outer_var)
+ info->has_unexpected_expr = true;
+ else
+ info->outer_var = (Var *)copyObject(node);
+
+ return info->has_unexpected_expr;
+ }
+ else
+ {
+ if (info->inner_var)
+ info->has_unexpected_expr = true;
+ else
+ info->inner_var = (Var *)copyObject(node);
+
+ return info->has_unexpected_expr;
+ }
+ }
+ break;
+
+ case T_Const:
+ {
+ info->has_const = true;
+ return false;
+ }
+ break;
+
+ case T_Param:
+ case T_FuncExpr:
+ {
+ info->has_unexpected_expr = true;
+ return true;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return expression_tree_walker(node, equal_expr_analyze_walker, context);
+}
diff --git a/src/backend/optimizer/util/placeholder.c b/src/backend/optimizer/util/placeholder.c
index 1c4202d864c..0e11ed22522 100644
--- a/src/backend/optimizer/util/placeholder.c
+++ b/src/backend/optimizer/util/placeholder.c
@@ -22,6 +22,7 @@
#include "optimizer/placeholder.h"
#include "optimizer/planmain.h"
#include "utils/lsyscache.h"
+#include "rewrite/rewriteManip.h"
/* Local functions */
static void find_placeholders_recurse(PlannerInfo *root, Node *jtnode);
@@ -87,6 +88,10 @@ find_placeholder_info(PlannerInfo *root, PlaceHolderVar *phv,
if (!create_new_ph)
elog(ERROR, "too late to create a new PlaceHolderInfo");
+ /* Unprocessed sublink is not accepted, it needs to go through SS_process_sublinks first */
+ if (checkExprHasSubLink((Node *)phv))
+ elog(ERROR, "can not add sublink to placeholder_list");
+
phinfo = makeNode(PlaceHolderInfo);
phinfo->phid = phv->phid;
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 1bb25738a52..5c25c2683ec 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -429,7 +429,6 @@ static void resolve_special_varno(Node *node, deparse_context *context,
static Node *find_param_referent(Param *param, deparse_context *context,
deparse_namespace **dpns_p, ListCell **ancestor_cell_p);
static void get_parameter(Param *param, deparse_context *context);
-static const char *get_simple_binary_op_name(OpExpr *expr);
static bool isSimpleNode(Node *node, Node *parentNode, int prettyFlags);
static void appendContextKeyword(deparse_context *context, const char *str,
int indentBefore, int indentAfter, int indentPlus);
@@ -7971,7 +7970,7 @@ get_parameter(Param *param, deparse_context *context)
* helper function for isSimpleNode
* will return single char binary operator name, or NULL if it's not
*/
-static const char *
+const char *
get_simple_binary_op_name(OpExpr *expr)
{
List *args = expr->args;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index e91d5a3cfda..1df80bf95fc 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -682,6 +682,7 @@ static char *recovery_target_lsn_string;
/* should be static, but commands/variable.c needs to get at this */
char *role_string;
+bool enable_lazy_process_sublink = true;
/*
* Displayable names for context types (enum GucContext)
@@ -971,6 +972,17 @@ static const unit_conversion time_unit_conversion_table[] =
static struct config_bool ConfigureNamesBool[] =
{
+ {
+ {"enable_lazy_process_sublink", PGC_USERSET, QUERY_TUNING_METHOD,
+ gettext_noop("enable lazy process sublink."),
+ NULL,
+ GUC_EXPLAIN
+ },
+ &enable_lazy_process_sublink,
+ true,
+ NULL, NULL, NULL
+ },
+
{
{"enable_seqscan", PGC_USERSET, QUERY_TUNING_METHOD,
gettext_noop("Enables the planner's use of sequential-scan plans."),
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 186e89905b2..1cbdf40477a 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -377,6 +377,9 @@ struct PlannerInfo
/* Does this query modify any partition key columns? */
bool partColsUpdated;
+
+ int unexpand_sublink_counter;
+ List *unexpand_sublink_expr_list;
};
@@ -995,6 +998,7 @@ typedef struct EquivalenceClass
bool ec_has_volatile; /* the (sole) member is a volatile expr */
bool ec_below_outer_join; /* equivalence applies below an OJ */
bool ec_broken; /* failed to generate needed clauses? */
+ bool ec_processed;
Index ec_sortref; /* originating sortclause label, or 0 */
Index ec_min_security; /* minimum security_level in ec_sources */
Index ec_max_security; /* maximum security_level in ec_sources */
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index f1d111063c2..425b5c68131 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -25,6 +25,12 @@ extern PGDLLIMPORT int geqo_threshold;
extern PGDLLIMPORT int min_parallel_table_scan_size;
extern PGDLLIMPORT int min_parallel_index_scan_size;
+typedef struct pushdown_expr_info
+{
+ Var *outer;
+ Var *inner;
+} pushdown_expr_info;
+
/* Hook for plugins to get control in set_rel_pathlist() */
typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,
RelOptInfo *rel,
@@ -62,7 +68,7 @@ extern void create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel,
Path *bitmapqual);
extern void generate_partitionwise_join_paths(PlannerInfo *root,
RelOptInfo *rel);
-
+extern bool try_push_outer_qual_to_sublink_query(PlannerInfo *parent, Query *subquery, List *conditions);
#ifdef OPTIMIZER_DEBUG
extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel);
#endif
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index bf1adfc52ac..5f018fe2b2b 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -28,12 +28,13 @@ typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra);
* prototypes for plan/planmain.c
*/
extern RelOptInfo *query_planner(PlannerInfo *root,
- query_pathkeys_callback qp_callback, void *qp_extra);
+ query_pathkeys_callback qp_callback,
+ void *qp_extra);
/*
* prototypes for plan/planagg.c
*/
-extern void preprocess_minmax_aggregates(PlannerInfo *root);
+extern void preprocess_minmax_aggregates(PlannerInfo *root, bool lazy_process_sublink);
/*
* prototypes for plan/createplan.c
@@ -67,6 +68,8 @@ extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount,
extern int from_collapse_limit;
extern int join_collapse_limit;
+#define has_unexpand_sublink(root) ((root)->unexpand_sublink_counter != 0)
+
extern void add_base_rels_to_query(PlannerInfo *root, Node *jtnode);
extern void add_other_rels_to_query(PlannerInfo *root);
extern void build_base_rel_tlists(PlannerInfo *root, List *final_tlist);
@@ -96,6 +99,9 @@ extern RestrictInfo *build_implied_join_equality(PlannerInfo *root,
Relids nullable_relids,
Index security_level);
extern void match_foreign_keys_to_quals(PlannerInfo *root);
+extern void lazy_process_sublinks(PlannerInfo *root, bool single_result_rte);
+extern bool query_has_sublink_try_pushdown_qual(PlannerInfo *root);
+extern Node *lazy_process_sublink_qual(PlannerInfo *root, Node *node);
/*
* prototypes for plan/analyzejoins.c
diff --git a/src/include/optimizer/planner.h b/src/include/optimizer/planner.h
index 9a15de50259..14ff94f60e3 100644
--- a/src/include/optimizer/planner.h
+++ b/src/include/optimizer/planner.h
@@ -57,5 +57,6 @@ extern Path *get_cheapest_fractional_path(RelOptInfo *rel,
double tuple_fraction);
extern Expr *preprocess_phv_expression(PlannerInfo *root, Expr *expr);
+extern void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode, bool istop);
#endif /* PLANNER_H */
diff --git a/src/include/optimizer/subselect.h b/src/include/optimizer/subselect.h
index 059bdf941ef..396c4c6117e 100644
--- a/src/include/optimizer/subselect.h
+++ b/src/include/optimizer/subselect.h
@@ -25,7 +25,7 @@ extern JoinExpr *convert_EXISTS_sublink_to_join(PlannerInfo *root,
bool under_not,
Relids available_rels);
extern Node *SS_replace_correlation_vars(PlannerInfo *root, Node *expr);
-extern Node *SS_process_sublinks(PlannerInfo *root, Node *expr, bool isQual);
+extern Node *SS_process_sublinks(PlannerInfo *root, Node *expr, bool isQual, bool lazy_process, bool force_process);
extern void SS_identify_outer_params(PlannerInfo *root);
extern void SS_charge_for_initplans(PlannerInfo *root, RelOptInfo *final_rel);
extern void SS_attach_initplans(PlannerInfo *root, Plan *plan);
@@ -36,5 +36,7 @@ extern Param *SS_make_initplan_output_param(PlannerInfo *root,
extern void SS_make_initplan_from_plan(PlannerInfo *root,
PlannerInfo *subroot, Plan *plan,
Param *prm);
+extern bool condition_is_safe_pushdown_to_sublink(RestrictInfo *rinfo, Var *var);
+extern void sublink_query_push_qual(Query *subquery, Node *qual, Var *var, Var *replace);
#endif /* SUBSELECT_H */
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index aa18d304ac0..06f77921449 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -288,6 +288,8 @@ extern int tcp_user_timeout;
extern bool trace_sort;
#endif
+extern bool enable_lazy_process_sublink;
+
/*
* Functions exported by guc.c
*/
diff --git a/src/include/utils/ruleutils.h b/src/include/utils/ruleutils.h
index d333e5e8a56..d4ccca3fe3c 100644
--- a/src/include/utils/ruleutils.h
+++ b/src/include/utils/ruleutils.h
@@ -42,5 +42,6 @@ extern char *generate_opclass_name(Oid opclass);
extern char *get_range_partbound_string(List *bound_datums);
extern char *pg_get_statisticsobjdef_string(Oid statextid);
+extern const char *get_simple_binary_op_name(OpExpr *expr);
#endif /* RULEUTILS_H */
diff --git a/src/test/regress/expected/join_hash.out b/src/test/regress/expected/join_hash.out
index 3a91c144a27..232ee6d15a1 100644
--- a/src/test/regress/expected/join_hash.out
+++ b/src/test/regress/expected/join_hash.out
@@ -926,9 +926,9 @@ WHERE
-> Result
Output: (hjtest_1.b * 5)
-> Hash
- Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b
+ Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.b, hjtest_2.id, hjtest_2.c
-> Seq Scan on public.hjtest_2
- Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b
+ Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.b, hjtest_2.id, hjtest_2.c
Filter: ((SubPlan 5) < 55)
SubPlan 5
-> Result
@@ -974,7 +974,7 @@ WHERE
Hash Cond: (((SubPlan 1) = hjtest_1.id) AND ((SubPlan 3) = (SubPlan 2)))
Join Filter: (hjtest_1.a <> hjtest_2.b)
-> Seq Scan on public.hjtest_2
- Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b
+ Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.b, hjtest_2.id, hjtest_2.c
Filter: ((SubPlan 5) < 55)
SubPlan 5
-> Result
diff --git a/src/test/regress/expected/qual_pushdown_to_sublink.out b/src/test/regress/expected/qual_pushdown_to_sublink.out
new file mode 100644
index 00000000000..52ce9b84325
--- /dev/null
+++ b/src/test/regress/expected/qual_pushdown_to_sublink.out
@@ -0,0 +1,176 @@
+CREATE SCHEMA IF NOT EXISTS test_push_qual_to_sublink;
+SET search_path=test_push_qual_to_sublink,sys;
+show enable_lazy_process_sublink;
+ enable_lazy_process_sublink
+-----------------------------
+ on
+(1 row)
+
+create table ab (a int not null, b int not null) partition by list (a);
+create table ab_a2 partition of ab for values in(2) partition by list (b);
+create table ab_a2_b1 partition of ab_a2 for values in (1);
+create table ab_a2_b2 partition of ab_a2 for values in (2);
+create table ab_a2_b3 partition of ab_a2 for values in (3);
+create table ab_a1 partition of ab for values in(1) partition by list (b);
+create table ab_a1_b1 partition of ab_a1 for values in (1);
+create table ab_a1_b2 partition of ab_a1 for values in (2);
+create table ab_a1_b3 partition of ab_a1 for values in (3);
+INSERT INTO ab VALUES (1,1);
+INSERT INTO ab VALUES (1,2);
+INSERT INTO ab VALUES (1,3);
+INSERT INTO ab VALUES (2,1);
+INSERT INTO ab VALUES (2,2);
+INSERT INTO ab VALUES (2,3);
+--1 sublink in select clause can do pushdown qual(a=1 and b=1)
+EXPLAIN (VERBOSE, COSTS OFF) SELECT
+y.a, (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b) AS b
+FROM ab y WHERE a = 1 AND b = 1;
+ QUERY PLAN
+----------------------------------------------------------------
+ Seq Scan on test_push_qual_to_sublink.ab_a1_b1 y
+ Output: y.a, (SubPlan 1)
+ Filter: ((y.a = 1) AND (y.b = 1))
+ SubPlan 1
+ -> Result
+ Output: x.b
+ One-Time Filter: ((y.b = 1) AND (y.a = 1))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 x
+ Output: x.b
+ Filter: ((x.b = 1) AND (x.a = 1))
+(10 rows)
+
+SELECT
+y.a, (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b) AS b
+FROM ab y WHERE a = 1 AND b = 1;
+ a | b
+---+---
+ 1 | 1
+(1 row)
+
+--2 sublink in where clause can do pushdown qual(a=1 and b=1)
+EXPLAIN (VERBOSE, COSTS OFF) SELECT y.a FROM ab y
+WHERE a = 1 AND b = 1 AND a in (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b);
+ QUERY PLAN
+----------------------------------------------------------------
+ Seq Scan on test_push_qual_to_sublink.ab_a1_b1 y
+ Output: y.a
+ Filter: ((y.a = 1) AND (y.b = 1) AND (SubPlan 1))
+ SubPlan 1
+ -> Result
+ Output: x.b
+ One-Time Filter: ((y.b = 1) AND (y.a = 1))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 x
+ Output: x.b
+ Filter: ((x.b = 1) AND (x.a = 1))
+(10 rows)
+
+SELECT y.a FROM ab y
+WHERE a = 1 AND b = 1 AND a in (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b);
+ a
+---
+ 1
+(1 row)
+
+--3 Nested sublink also supports pushdown qual
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT a.a, a.b,
+(SELECT max(b.a) AS max
+ FROM ab b, ab c
+ WHERE b.a=c.a AND b.b=c.b AND b.a=a.a AND b.b=a.b AND
+ (EXISTS
+ (SELECT a1.a
+ FROM ab a1
+ WHERE
+ a1.a = a.a AND --from uplevel 1
+ a1.b = b.b AND --frem uplevel 2
+ clock_timestamp() > '2020-12-11' --Keep sublink not eliminated
+ )
+ )
+) AS c
+FROM ab a WHERE a.a=1 AND a.b=1;
+ QUERY PLAN
+------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Seq Scan on test_push_qual_to_sublink.ab_a1_b1 a
+ Output: a.a, a.b, (SubPlan 2)
+ Filter: ((a.a = 1) AND (a.b = 1))
+ SubPlan 2
+ -> Aggregate
+ Output: max(b.a)
+ -> Result
+ Output: b.a
+ One-Time Filter: ((a.b = 1) AND (a.a = 1))
+ -> Nested Loop
+ Output: b.a
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 b
+ Output: b.a, b.b
+ Filter: ((b.b = 1) AND (b.a = 1) AND (SubPlan 1))
+ SubPlan 1
+ -> Result
+ Output: a1.a
+ One-Time Filter: ((b.b = 1) AND (a.a = 1))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 a1
+ Output: a1.a
+ Filter: ((a1.b = 1) AND (a1.a = 1) AND (clock_timestamp() > 'Fri Dec 11 00:00:00 2020 PST'::timestamp with time zone))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 c
+ Output: c.a, c.b
+ Filter: ((c.b = 1) AND (c.a = 1))
+(24 rows)
+
+--4 This feature does not conflict with pullUp sublink
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT a.a, a.b
+FROM ab a
+WHERE EXISTS (SELECT b.a
+ FROM ab b, ab c
+ WHERE b.a=c.a AND b.b=c.b AND b.a=a.a AND b.b=a.b AND
+ (EXISTS
+ (SELECT a1.a
+ FROM ab a1
+ WHERE a1.a = b.a AND a1.b = b.b
+ )
+ )
+) AND
+a.a=1 AND a.b=1;
+ QUERY PLAN
+--------------------------------------------------------------------
+ Nested Loop Semi Join
+ Output: a.a, a.b
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 a
+ Output: a.a, a.b
+ Filter: ((a.a = 1) AND (a.b = 1))
+ -> Nested Loop Semi Join
+ Output: b.a, b.b
+ -> Nested Loop
+ Output: b.a, b.b
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 b
+ Output: b.a, b.b
+ Filter: ((b.b = 1) AND (b.a = 1))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 c
+ Output: c.a, c.b
+ Filter: ((c.b = 1) AND (c.a = 1))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 a1
+ Output: a1.a, a1.b
+ Filter: ((a1.b = 1) AND (a1.a = 1))
+(18 rows)
+
+--5 sublink in join on clause can not do pushdown
+EXPLAIN SELECT y.a
+FROM ab y JOIN ab z on ((y.a=z.a) AND (y.b=z.b))
+WHERE y.a = 1 AND y.b = 1 AND exists (SELECT count(*) FROM ab x WHERE x.a=y.a AND x.b=y.b);
+ QUERY PLAN
+-------------------------------------------------------------------------------------
+ Nested Loop (cost=0.00..99341.36 rows=1 width=4)
+ -> Seq Scan on ab_a1_b1 y (cost=0.00..99297.45 rows=1 width=8)
+ Filter: ((b = 1) AND (a = 1) AND (SubPlan 1))
+ SubPlan 1
+ -> Aggregate (cost=43.91..43.92 rows=1 width=8)
+ -> Result (cost=0.01..43.91 rows=1 width=0)
+ One-Time Filter: ((y.b = 1) AND (y.a = 1))
+ -> Seq Scan on ab_a1_b1 x (cost=0.01..43.91 rows=1 width=0)
+ Filter: ((b = 1) AND (a = 1))
+ -> Seq Scan on ab_a1_b1 z (cost=0.00..43.90 rows=1 width=8)
+ Filter: ((b = 1) AND (a = 1))
+(11 rows)
+
+DROP SCHEMA test_push_qual_to_sublink CASCADE;
+NOTICE: drop cascades to table ab
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 07426260330..fd0079a8335 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -1063,7 +1063,7 @@ where o.ten = 0;
SubPlan 1
-> Seq Scan on public.int4_tbl
Output: int4_tbl.f1
- Filter: (int4_tbl.f1 <= $0)
+ Filter: (int4_tbl.f1 <= $1)
(14 rows)
select sum(ss.tst::int) from
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index 2088857615a..bb7a1dff156 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -110,6 +110,7 @@ select name, setting from pg_settings where name like 'enable%';
enable_incremental_sort | on
enable_indexonlyscan | on
enable_indexscan | on
+ enable_lazy_process_sublink | on
enable_material | on
enable_memoize | on
enable_mergejoin | on
@@ -122,7 +123,7 @@ select name, setting from pg_settings where name like 'enable%';
enable_seqscan | on
enable_sort | on
enable_tidscan | on
-(20 rows)
+(21 rows)
-- Test that the pg_timezone_names and pg_timezone_abbrevs views are
-- more-or-less working. We can't test their contents in any great detail
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 017e962fed2..58c51d582ad 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -131,3 +131,4 @@ test: fast_default
# run stats by itself because its delay may be insufficient under heavy load
test: stats
+test: qual_pushdown_to_sublink
diff --git a/src/test/regress/sql/qual_pushdown_to_sublink.sql b/src/test/regress/sql/qual_pushdown_to_sublink.sql
new file mode 100644
index 00000000000..6ca6d96da03
--- /dev/null
+++ b/src/test/regress/sql/qual_pushdown_to_sublink.sql
@@ -0,0 +1,78 @@
+CREATE SCHEMA IF NOT EXISTS test_push_qual_to_sublink;
+SET search_path=test_push_qual_to_sublink,sys;
+
+show enable_lazy_process_sublink;
+
+create table ab (a int not null, b int not null) partition by list (a);
+create table ab_a2 partition of ab for values in(2) partition by list (b);
+create table ab_a2_b1 partition of ab_a2 for values in (1);
+create table ab_a2_b2 partition of ab_a2 for values in (2);
+create table ab_a2_b3 partition of ab_a2 for values in (3);
+create table ab_a1 partition of ab for values in(1) partition by list (b);
+create table ab_a1_b1 partition of ab_a1 for values in (1);
+create table ab_a1_b2 partition of ab_a1 for values in (2);
+create table ab_a1_b3 partition of ab_a1 for values in (3);
+
+INSERT INTO ab VALUES (1,1);
+INSERT INTO ab VALUES (1,2);
+INSERT INTO ab VALUES (1,3);
+INSERT INTO ab VALUES (2,1);
+INSERT INTO ab VALUES (2,2);
+INSERT INTO ab VALUES (2,3);
+
+--1 sublink in select clause can do pushdown qual(a=1 and b=1)
+EXPLAIN (VERBOSE, COSTS OFF) SELECT
+y.a, (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b) AS b
+FROM ab y WHERE a = 1 AND b = 1;
+
+SELECT
+y.a, (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b) AS b
+FROM ab y WHERE a = 1 AND b = 1;
+
+--2 sublink in where clause can do pushdown qual(a=1 and b=1)
+EXPLAIN (VERBOSE, COSTS OFF) SELECT y.a FROM ab y
+WHERE a = 1 AND b = 1 AND a in (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b);
+
+SELECT y.a FROM ab y
+WHERE a = 1 AND b = 1 AND a in (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b);
+
+--3 Nested sublink also supports pushdown qual
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT a.a, a.b,
+(SELECT max(b.a) AS max
+ FROM ab b, ab c
+ WHERE b.a=c.a AND b.b=c.b AND b.a=a.a AND b.b=a.b AND
+ (EXISTS
+ (SELECT a1.a
+ FROM ab a1
+ WHERE
+ a1.a = a.a AND --from uplevel 1
+ a1.b = b.b AND --frem uplevel 2
+ clock_timestamp() > '2020-12-11' --Keep sublink not eliminated
+ )
+ )
+) AS c
+FROM ab a WHERE a.a=1 AND a.b=1;
+
+--4 This feature does not conflict with pullUp sublink
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT a.a, a.b
+FROM ab a
+WHERE EXISTS (SELECT b.a
+ FROM ab b, ab c
+ WHERE b.a=c.a AND b.b=c.b AND b.a=a.a AND b.b=a.b AND
+ (EXISTS
+ (SELECT a1.a
+ FROM ab a1
+ WHERE a1.a = b.a AND a1.b = b.b
+ )
+ )
+) AND
+a.a=1 AND a.b=1;
+
+--5 sublink in join on clause can not do pushdown
+EXPLAIN SELECT y.a
+FROM ab y JOIN ab z on ((y.a=z.a) AND (y.b=z.b))
+WHERE y.a = 1 AND y.b = 1 AND exists (SELECT count(*) FROM ab x WHERE x.a=y.a AND x.b=y.b);
+
+DROP SCHEMA test_push_qual_to_sublink CASCADE;
--
2.30.1 (Apple Git-130)
On Sat, Dec 11, 2021 at 7:31 AM 曾文旌(义从) <wenjing.zwj@alibaba-inc.com> wrote:
------------------原始邮件 ------------------
*发件人:*Tomas Vondra <tomas.vondra@enterprisedb.com>
*发送时间:*Wed Dec 8 11:26:35 2021
*收件人:*曾文旌(义从) <wenjing.zwj@alibaba-inc.com>, shawn wang <
shawn.wang.pg@gmail.com>, ggysxcq@gmail.com <ggysxcq@gmail.com>,
PostgreSQL Hackers <pgsql-hackers@postgresql.org>
*抄送:*wjzeng <wjzeng2012@gmail.com>
*主题:*Re: 回复:Re: Is it worth pushing conditions to sublink/subplan?Hi,
On 12/7/21 10:44, 曾文旌(义从) wrote:
Hi Hackers
For my previous proposal, I developed a prototype and passed
regression testing. It works similarly to subquery's qual pushdown.
We know that sublink expands at the beginning of each level of
query. At this stage, The query's conditions and equivalence classes
are not processed. But after generate_base_implied_equalities the
conditions are processed, which is why qual can push down to
subquery but sublink not.My POC implementation chose to delay the sublink expansion in the
SELECT clause (targetList) and where clause. Specifically, it is
delayed after generate_base_implied_equalities. Thus, the equivalent
conditions already established in the Up level query can be easily
obtained in the sublink expansion process (make_subplan).For example, if the up level query has a.id = 10 and the sublink
query has a.id = b.id, then we get b.id = 10 and push it down to the
sublink quey. If b is a partitioned table and is partitioned by id,
then a large number of unrelated subpartitions are pruned out, This
optimizes a significant amount of Planner and SQL execution time,
especially if the partitioned table has a large number of
subpartitions and is what I want.Currently, There were two SQL failures in the regression test,
because the expansion order of sublink was changed, which did not
affect the execution result of SQL.Look forward to your suggestions on this proposal.
I took a quick look, and while I don't see / can't think of any problems
with delaying it until after generating implied equalities, there seems
to be a number of gaps.*Thank you for your attention.*
1) Are there any regression tests exercising this modified behavior?
Maybe there are, but if the only changes are due to change in order of
targetlist entries, that doesn't seem like a clear proof.It'd be good to add a couple tests exercising both the positive and
negative case (i.e. when we can and can't pushdown a qual).*I added several samples to the regress(qual_pushdown_to_sublink.sql). *
*and I
used the partition table to show the plan status of qual being pushed down into sublink.**Hopefully this will help you understand the details of this patch. Later, I will add more cases.*
2) apparently, contrib/postgres_fdw does crash like this:
#3 0x000000000077b412 in adjust_appendrel_attrs_mutator
(node=0x13f7ea0, context=0x7fffc3351b30) at appendinfo.c:470
470 Assert(!IsA(node, SubLink));
(gdb) p node
$1 = (Node *) 0x13f7ea0
(gdb) p *node
$2 = {type = T_SubLink}Backtrace attached.
*For the patch attached in the last email, I passed all the tests under
src/test/regress.*
*As you pointed out, there was a problem with regression under contrib(in
contrib/postgres_fdw). *
*This time I fixed it and the current patch (V2) can pass the
check-world.*3) various parts of the patch really need at least some comments, like:
- try_push_outer_qual_to_sublink_query really needs some docs
- new stuff at the end of initsplan.c
*Ok, I added some comments and will
add more. If you have questions about any details,*
*please point them out directly.*4) generate_base_implied_equalities
shouldn't this
if (ec->ec_processed)
;really be?
if (ec->ec_processed)
continue;*You are right. I fixed it.*
5) I'm not sure why we need the new ec_processed flag.
*I did this to eliminate duplicate equalities from the two generate_base_implied_equalities calls*
*1) I need the base equivalent expression generated after generate_base_implied_equalities,*
*which is used to pushdown qual to sublink(lazy_process_sublinks)**2) The expansion of sublink may result in an equivalent expression with parameters, such as a = $1,*
*which needs to deal with the equivalence classes again.**3) So, I added ec_processed and asked to process it again (generate_base_implied_equalities)*
*after the equivalence class changed (add_eq_member/process_equivalence).**Maybe you have a better suggestion, please let me know.*
6) So we now have lazy_process_sublink callback? Does that mean we
expand sublinks in two places - sometimes lazily, sometimes not?*Yes, not all sublink is delayed. Let me explain this:*
*1) I added a GUC switch enable_lazy_process_sublink. If it is turned off, all
lazy process sublink will not happen,**qual pushdown to sublink depend on lazy procee sublink, which means no quals will be pushed down.*
*2) Even if enable_lazy_process_sublink = true
If Query in this level contains some complex features,*
*sublink in this level query will not try do qual pushdown. (see function
query_has_sublink_try_pushdown_qual).**I want to support a minimum subset first. Then consider complex features such as CTE/DML.*
*3) Finally, under conditions 1 and 2,
all kinds of sublink contained in the SELECT clause or*
*WHERE clause will delays expansion and try pushdown qual. The
sublink elsewhere in the SQL statement*
*does not delay process.**The current status meets my requirements for
now. Of course, after this scheme is proved to be feasible, maybe*
*we can discuss that all sublinks are processed by overall delay, just like
qual pushdown to subquery.**thanks*
*Wenjing*
regards
--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL CompanyHi,
+ /* The outer var could exist in any of the upper-level queries to
find these roots */
to find these roots -> so find these roots
+ if (has_unexpand_sublink(root) && checkExprHasSubLink(node))
has_unexpand_sublink -> has_unexpanded_sublink
+ if (enable_lazy_process_sublink)
+ return true;
The above can be simplified to:
return enable_lazy_process_sublink;
+ if (checkExprHasSubLink(qual))
+ {
+ qual = lazy_process_sublink_qual(root, qual);
+ newquals = lappend(newquals, qual);
+ }
+ else
+ newquals = lappend(newquals, qual);
Since the lappend() is common to both branches, you can remove the else
clause. In the if block, only call lazy_process_sublink_qual().
+ /* under lazy process sublink, parent root may have some data that
child not need, so set it to NULL */
+ subroot->join_info_list = NIL;
minor correction to the comment above:
under lazy process sublink, parent root may have some data that child
does not need, so set it to NIL
+void
+preprocess_qual_conditions(PlannerInfo *root, Node *jtnode, bool istop)
Please add a comment explaining the meaning of istop.
+ if (istop)
+ f->quals = preprocess_expression_ext(root, f->quals,
EXPRKIND_QUAL, false);
+ else
+ f->quals = preprocess_expression(root, f->quals, EXPRKIND_QUAL);
I think the code would be more readable if you replace
the preprocess_expression() call in else branch with call
to preprocess_expression_ext().
+ context->root->unexpand_sublink_counter++;
unexpand_sublink_counter -> unexpanded_sublink_counter++
For sublink_query_push_qual(), the return at the end is not needed.
For condition_is_safe_pushdown_to_sublink, you can initialize context this
way :
+ equal_expr_info_context context = {0};
+ if (cvar && cvar->varattno > 0 && equal(cvar, var))
+ return true;
The last few lines of condition_is_safe_pushdown_to_sublink() can be
written as:
return cvar && cvar->varattno > 0 && equal(cvar, var);
+ if (equal_expr_safety_check(node, &context))
+ {
+ /* It needs to be something like outer var = inner var */
+ if (context.inner_var &&
The nested if blocks can be merged into one if block.
Cheers
------------------原始邮件 ------------------
发件人:Zhihong Yu <zyu@yugabyte.com>
发送时间:Sun Dec 12 01:13:11 2021
收件人:曾文旌(义从) <wenjing.zwj@alibaba-inc.com>
抄送:Tomas Vondra <tomas.vondra@enterprisedb.com>, wjzeng <wjzeng2012@gmail.com>, PostgreSQL Hackers <pgsql-hackers@postgresql.org>, shawn wang <shawn.wang.pg@gmail.com>, ggysxcq@gmail.com <ggysxcq@gmail.com>
主题:Re: Re: 回复:Re: Is it worth pushing conditions to sublink/subplan?
On Sat, Dec 11, 2021 at 7:31 AM 曾文旌(义从) <wenjing.zwj@alibaba-inc.com> wrote:
------------------原始邮件 ------------------
发件人:Tomas Vondra <tomas.vondra@enterprisedb.com>
发送时间:Wed Dec 8 11:26:35 2021
收件人:曾文旌(义从) <wenjing.zwj@alibaba-inc.com>, shawn wang <shawn.wang.pg@gmail.com>, ggysxcq@gmail.com <ggysxcq@gmail.com>, PostgreSQL Hackers <pgsql-hackers@postgresql.org>
抄送:wjzeng <wjzeng2012@gmail.com>
主题:Re: 回复:Re: Is it worth pushing conditions to sublink/subplan?
Hi,
On 12/7/21 10:44, 曾文旌(义从) wrote:
Hi Hackers
For my previous proposal, I developed a prototype and passed
regression testing. It works similarly to subquery's qual pushdown.
We know that sublink expands at the beginning of each level of
query. At this stage, The query's conditions and equivalence classes
are not processed. But after generate_base_implied_equalities the
conditions are processed, which is why qual can push down to
subquery but sublink not.My POC implementation chose to delay the sublink expansion in the
SELECT clause (targetList) and where clause. Specifically, it is
delayed after generate_base_implied_equalities. Thus, the equivalent
conditions already established in the Up level query can be easily
obtained in the sublink expansion process (make_subplan).For example, if the up level query has a.id = 10 and the sublink
query has a.id = b.id, then we get b.id = 10 and push it down to the
sublink quey. If b is a partitioned table and is partitioned by id,
then a large number of unrelated subpartitions are pruned out, This
optimizes a significant amount of Planner and SQL execution time,
especially if the partitioned table has a large number of
subpartitions and is what I want.Currently, There were two SQL failures in the regression test,
because the expansion order of sublink was changed, which did not
affect the execution result of SQL.Look forward to your suggestions on this proposal.
I took a quick look, and while I don't see / can't think of any problems
with delaying it until after generating implied equalities, there seems
to be a number of gaps.
Thank you for your attention.
1) Are there any regression tests exercising this modified behavior?
Maybe there are, but if the only changes are due to change in order of
targetlist entries, that doesn't seem like a clear proof.
It'd be good to add a couple tests exercising both the positive and
negative case (i.e. when we can and can't pushdown a qual).
I added several samples to the regress(qual_pushdown_to_sublink.sql).
and I used the partition table to show the plan status of qual being pushed down into sublink.
Hopefully this will help you understand the details of this patch. Later, I will add more cases.
2) apparently, contrib/postgres_fdw does crash like this:
#3 0x000000000077b412 in adjust_appendrel_attrs_mutator
(node=0x13f7ea0, context=0x7fffc3351b30) at appendinfo.c:470
470 Assert(!IsA(node, SubLink));
(gdb) p node
$1 = (Node *) 0x13f7ea0
(gdb) p *node
$2 = {type = T_SubLink}
Backtrace attached.
For the patch attached in the last email, I passed all the tests under src/test/regress.
As you pointed out, there was a problem with regression under contrib(in contrib/postgres_fdw).
This time I fixed it and the current patch (V2) can pass the check-world.
3) various parts of the patch really need at least some comments, like:
- try_push_outer_qual_to_sublink_query really needs some docs
- new stuff at the end of initsplan.c
Ok, I added some comments and will add more. If you have questions about any details,
please point them out directly.
4) generate_base_implied_equalities
shouldn't this
if (ec->ec_processed)
;
really be?
if (ec->ec_processed)
continue;
You are right. I fixed it.
5) I'm not sure why we need the new ec_processed flag.
I did this to eliminate duplicate equalities from the two generate_base_implied_equalities calls
1) I need the base equivalent expression generated after generate_base_implied_equalities,
which is used to pushdown qual to sublink(lazy_process_sublinks)
2) The expansion of sublink may result in an equivalent expression with parameters, such as a = $1,
which needs to deal with the equivalence classes again.
3) So, I added ec_processed and asked to process it again (generate_base_implied_equalities)
after the equivalence class changed (add_eq_member/process_equivalence).
Maybe you have a better suggestion, please let me know.
6) So we now have lazy_process_sublink callback? Does that mean we
expand sublinks in two places - sometimes lazily, sometimes not?
Yes, not all sublink is delayed. Let me explain this:
1) I added a GUC switch enable_lazy_process_sublink. If it is turned off, all lazy process sublink will not happen,
qual pushdown to sublink depend on lazy procee sublink, which means no quals will be pushed down.
2) Even if enable_lazy_process_sublink = true If Query in this level contains some complex features,
sublink in this level query will not try do qual pushdown. (see function query_has_sublink_try_pushdown_qual).
I want to support a minimum subset first. Then consider complex features such as CTE/DML.
3) Finally, under conditions 1 and 2, all kinds of sublink contained in the SELECT clause or
WHERE clause will delays expansion and try pushdown qual. The sublink elsewhere in the SQL statement
does not delay process.
The current status meets my requirements for now. Of course, after this scheme is proved to be feasible, maybe
we can discuss that all sublinks are processed by overall delay, just like qual pushdown to subquery.
thanks
Wenjing
regards
--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
Hi,
+ /* The outer var could exist in any of the upper-level queries to find these roots */
to find these roots -> so find these roots
+ if (has_unexpand_sublink(root) && checkExprHasSubLink(node))
has_unexpand_sublink -> has_unexpanded_sublink
+ if (enable_lazy_process_sublink)
+ return true;
The above can be simplified to:
return enable_lazy_process_sublink;
+ if (checkExprHasSubLink(qual))
+ {
+ qual = lazy_process_sublink_qual(root, qual);
+ newquals = lappend(newquals, qual);
+ }
+ else
+ newquals = lappend(newquals, qual);
Since the lappend() is common to both branches, you can remove the else clause. In the if block, only call lazy_process_sublink_qual().
+ /* under lazy process sublink, parent root may have some data that child not need, so set it to NULL */
+ subroot->join_info_list = NIL;
minor correction to the comment above:
under lazy process sublink, parent root may have some data that child does not need, so set it to NIL
+void
+preprocess_qual_conditions(PlannerInfo *root, Node *jtnode, bool istop)
Please add a comment explaining the meaning of istop.
+ if (istop)
+ f->quals = preprocess_expression_ext(root, f->quals, EXPRKIND_QUAL, false);
+ else
+ f->quals = preprocess_expression(root, f->quals, EXPRKIND_QUAL);
I think the code would be more readable if you replace the preprocess_expression() call in else branch with call to preprocess_expression_ext().
+ context->root->unexpand_sublink_counter++;
unexpand_sublink_counter -> unexpanded_sublink_counter++
For sublink_query_push_qual(), the return at the end is not needed.
For condition_is_safe_pushdown_to_sublink, you can initialize context this way :
+ equal_expr_info_context context = {0};
I don't understand the benefits of doing this. Please give me some hints.
We can also see a number of memset initializations, such as get_range_partbound_string()
+ if (cvar && cvar->varattno > 0 && equal(cvar, var))
+ return true;
The last few lines of condition_is_safe_pushdown_to_sublink() can be written as:
return cvar && cvar->varattno > 0 && equal(cvar, var);
+ if (equal_expr_safety_check(node, &context))
+ {
+ /* It needs to be something like outer var = inner var */
+ if (context.inner_var &&
The nested if blocks can be merged into one if block.
Cheers
HI Zhihong Yu
Thank you for your attention.
Every suggestion you make makes the patch better.
I have completed the v3 patch according to your suggestions.
Looking forward to your feedback.
Wenjing
Attachments:
0001-poc-pushdown-qual-to-sublink-v3.patchapplication/octet-streamDownload
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 296dd75c1b6..22405bef5fc 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -40,8 +40,10 @@
#include "optimizer/paths.h"
#include "optimizer/plancat.h"
#include "optimizer/planner.h"
+#include "optimizer/planmain.h"
#include "optimizer/restrictinfo.h"
#include "optimizer/tlist.h"
+#include "optimizer/subselect.h"
#include "parser/parse_clause.h"
#include "parser/parsetree.h"
#include "partitioning/partbounds.h"
@@ -3895,6 +3897,68 @@ generate_partitionwise_join_paths(PlannerInfo *root, RelOptInfo *rel)
list_free(live_children);
}
+bool
+try_push_outer_qual_to_sublink_query(PlannerInfo *parent, Query *subquery, List *conditions)
+{
+ pushdown_safety_info safetyInfo;
+ ListCell *lc1;
+ bool found = false;
+ bool query_is_pushdown_safe = false;
+
+ if (conditions == NIL)
+ return false;
+
+ memset(&safetyInfo, 0, sizeof(safetyInfo));
+ safetyInfo.unsafeColumns = (bool *)
+ palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
+
+ /* Check whether pushdown qual to sublink query is safe. */
+ query_is_pushdown_safe = subquery_is_pushdown_safe(subquery, subquery, &safetyInfo);
+ pfree(safetyInfo.unsafeColumns);
+ if (!query_is_pushdown_safe)
+ return false;
+
+ /*
+ * Currently, we have some conditional expressions in sublink (out var = local var)
+ * Now, the outer query looks for related equivalent expressions that have been generated (outer var = const).
+ * If out var = local var and outer var = const, then we get local var = const and push it down to sublink
+ */
+ foreach(lc1, conditions)
+ {
+ pushdown_expr_info *expr_info = (pushdown_expr_info *) lfirst(lc1);
+ Index levelsup = 0;
+ RelOptInfo *rel;
+ ListCell *lc2;
+ PlannerInfo *tmproot = parent;
+
+ /* The outer var could exist in any of the upper-level queries so find these roots */
+ for (levelsup = expr_info->outer->varlevelsup - 1; levelsup > 0; levelsup--)
+ tmproot = tmproot->parent_root;
+
+ /* Flatten varLevelsup, for find conditions from BaserestrictInfo. */
+ expr_info->outer->varlevelsup = 0;
+
+ /* Find if there is an available qual in relation of this var from root */
+ rel = find_base_rel(tmproot, expr_info->outer->varno);
+ if (rel == NULL || rel->baserestrictinfo == NULL)
+ continue;
+
+ foreach(lc2, rel->baserestrictinfo)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc2);
+
+ /* Make sure that qual in restrictInfo that is var = const and can safely pushdown */
+ if (condition_is_safe_pushdown_to_sublink(rinfo, expr_info->outer))
+ {
+ /* replace qual expr from outer var = const to var = const and push down to sublink query */
+ sublink_query_push_qual(subquery, (Node *)copyObject(rinfo->clause), expr_info->outer, expr_info->inner);
+ found = true;
+ }
+ }
+ }
+
+ return found;
+}
/*****************************************************************************
* DEBUG SUPPORT
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 6f1abbe47d6..f4aeb716a59 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -388,6 +388,7 @@ process_equivalence(PlannerInfo *root,
restrictinfo->security_level);
ec1->ec_max_security = Max(ec1->ec_max_security,
restrictinfo->security_level);
+ ec1->ec_processed = false;
/* mark the RI as associated with this eclass */
restrictinfo->left_ec = ec1;
restrictinfo->right_ec = ec1;
@@ -450,6 +451,7 @@ process_equivalence(PlannerInfo *root,
ec->ec_min_security = restrictinfo->security_level;
ec->ec_max_security = restrictinfo->security_level;
ec->ec_merged = NULL;
+ ec->ec_processed = false;
em1 = add_eq_member(ec, item1, item1_relids, item1_nullable_relids,
false, item1_type);
em2 = add_eq_member(ec, item2, item2_relids, item2_nullable_relids,
@@ -574,6 +576,7 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids,
ec->ec_relids = bms_add_members(ec->ec_relids, relids);
}
ec->ec_members = lappend(ec->ec_members, em);
+ ec->ec_processed = false;
return em;
}
@@ -711,6 +714,7 @@ get_eclass_for_sort_expr(PlannerInfo *root,
newec->ec_min_security = UINT_MAX;
newec->ec_max_security = 0;
newec->ec_merged = NULL;
+ newec->ec_processed = false;
if (newec->ec_has_volatile && sortref == 0) /* should not happen */
elog(ERROR, "volatile EquivalenceClass has no sortref");
@@ -1114,7 +1118,12 @@ generate_base_implied_equalities(PlannerInfo *root)
* Single-member ECs won't generate any deductions, either here or at
* the join level.
*/
- if (list_length(ec->ec_members) > 1)
+ if (ec->ec_processed)
+ {
+ ec_index++;
+ continue;
+ }
+ else if (list_length(ec->ec_members) > 1)
{
if (ec->ec_has_const)
generate_base_implied_equalities_const(root, ec);
@@ -1151,6 +1160,7 @@ generate_base_implied_equalities(PlannerInfo *root)
rel->has_eclass_joins = true;
}
+ ec->ec_processed = true;
ec_index++;
}
}
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index f6a202d900f..65ae37bb926 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -30,10 +30,12 @@
#include "optimizer/planner.h"
#include "optimizer/prep.h"
#include "optimizer/restrictinfo.h"
+#include "optimizer/subselect.h"
#include "parser/analyze.h"
#include "rewrite/rewriteManip.h"
#include "utils/lsyscache.h"
#include "utils/typcache.h"
+#include "utils/guc.h"
/* These parameters are set by GUC */
int from_collapse_limit;
@@ -80,6 +82,16 @@ static void check_mergejoinable(RestrictInfo *restrictinfo);
static void check_hashjoinable(RestrictInfo *restrictinfo);
static void check_memoizable(RestrictInfo *restrictinfo);
+static void remember_qual_info_for_lazy_process_sublink(PlannerInfo *root,
+ Node *clause,
+ bool below_outer_join,
+ JoinType jointype,
+ Index security_level,
+ Relids qualscope,
+ Relids ojscope,
+ Relids outerjoin_nonnullable,
+ List *postponed_qual_list);
+static void *search_sublink_from_lazy_process_list(PlannerInfo *root, Node *node);
/*****************************************************************************
*
@@ -262,7 +274,16 @@ add_vars_to_targetlist(PlannerInfo *root, List *vars,
else if (IsA(node, PlaceHolderVar))
{
PlaceHolderVar *phv = (PlaceHolderVar *) node;
- PlaceHolderInfo *phinfo = find_placeholder_info(root, phv,
+ PlaceHolderInfo *phinfo = NULL;
+
+ /*
+ * Since there may be an unexpanded sublink in the targetList,
+ * we'll skip it for now. Don't worry let lazy_process_sublinks do it later.
+ */
+ if (has_unexpanded_sublink(root) && checkExprHasSubLink(node))
+ continue;
+
+ phinfo = find_placeholder_info(root, phv,
create_new_ph);
phinfo->ph_needed = bms_add_members(phinfo->ph_needed,
@@ -1621,6 +1642,17 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
Relids nullable_relids;
RestrictInfo *restrictinfo;
+ /* Before lazy transform sublink has not been converted, so backup it */
+ if (checkExprHasSubLink(clause))
+ {
+ remember_qual_info_for_lazy_process_sublink(root, clause, below_outer_join, jointype, security_level,
+ qualscope, ojscope, outerjoin_nonnullable, *postponed_qual_list);
+
+ relids = pull_varnos(root, clause);
+ Assert(bms_is_subset(relids, qualscope));
+ return;
+ }
+
/*
* Retrieve all relids mentioned within the clause.
*/
@@ -2750,3 +2782,182 @@ check_memoizable(RestrictInfo *restrictinfo)
if (OidIsValid(typentry->hash_proc) && OidIsValid(typentry->eq_opr))
restrictinfo->right_hasheqoperator = typentry->eq_opr;
}
+
+/*
+ * query at this level has sublink and It is safe to try lazy process and pushdown qual.
+ * Use a switch to control it. This is a minimal subset, then try to support more scenarios.
+ */
+bool
+query_has_sublink_try_pushdown_qual(PlannerInfo *root)
+{
+ Query *parse = root->parse;
+
+ if (!parse->hasSubLinks)
+ return false;
+
+ if (parse->commandType != CMD_SELECT ||
+ parse->hasWindowFuncs ||
+ parse->hasTargetSRFs ||
+ parse->hasRecursive ||
+ parse->hasModifyingCTE ||
+ parse->hasForUpdate ||
+ parse->hasRowSecurity ||
+ parse->setOperations ||
+ parse->havingQual ||
+ parse->cteList != NIL)
+ return false;
+
+ return enable_lazy_process_sublink;
+}
+
+/*
+ * Handle sublink that is not expanded.
+ * Convert these sublinks to subplans and handles the associated targetList expr and equivalence classes.
+ */
+void
+lazy_process_sublinks(PlannerInfo *root, bool single_result_rte)
+{
+ Query *parse = root->parse;
+ List *tlist_vars;
+
+ /* Exit the function if no unprocessed sublink is recorded. */
+ if (!has_unexpanded_sublink(root))
+ return;
+
+ /* process sublink in targetlist */
+ root->processed_tlist = (List *)SS_process_sublinks(root, (Node *)root->processed_tlist, false, true, true);
+
+ /* process sublink in where clause */
+ if (parse->jointree && parse->jointree->quals)
+ {
+ FromExpr *f = parse->jointree;
+ List *newquals = NIL;
+ ListCell *l;
+
+ Assert(IsA(f->quals, List));
+ foreach(l, (List *) f->quals)
+ {
+ Node *qual = (Node *) lfirst(l);
+
+ if (checkExprHasSubLink(qual))
+ qual = lazy_process_sublink_qual(root, qual);
+
+ newquals = lappend(newquals, qual);
+ }
+
+ f->quals = (Node *)newquals;
+ }
+
+ /* process agg functions */
+ if(parse->hasAggs)
+ {
+ preprocess_aggrefs(root, (Node *) root->processed_tlist);
+ preprocess_minmax_aggregates(root, true);
+ }
+
+ /* empty from clause no need prcess targetlist or from clause */
+ if (!single_result_rte)
+ {
+ /* Put the mutated sublink info into the targetList */
+ tlist_vars = pull_var_clause((Node *) root->processed_tlist,
+ PVC_RECURSE_AGGREGATES |
+ PVC_RECURSE_WINDOWFUNCS |
+ PVC_INCLUDE_PLACEHOLDERS);
+
+ if (tlist_vars != NIL)
+ {
+ add_vars_to_targetlist(root, tlist_vars, bms_make_singleton(0), true);
+ list_free(tlist_vars);
+ }
+
+ generate_base_implied_equalities(root);
+ }
+
+ /* Make sure all sublinks are processed. */
+ if (has_unexpanded_sublink(root))
+ elog(ERROR, "sublink is not fully expanded yet");
+
+ return;
+}
+
+typedef struct sublink_node
+{
+ Node *expr;
+ bool below_outer_join;
+ JoinType jointype;
+ Index security_level;
+ Relids qualscope;
+ Relids ojscope;
+ Relids outerjoin_nonnullable;
+ List *postponed_qual_list;
+} sublink_node;
+
+/* Log unexpanded sublink for future do distribute_qual_to_rels in lazy process sublink */
+static void
+remember_qual_info_for_lazy_process_sublink(PlannerInfo *root,
+ Node *clause,
+ bool below_outer_join,
+ JoinType jointype,
+ Index security_level,
+ Relids qualscope,
+ Relids ojscope,
+ Relids outerjoin_nonnullable,
+ List *postponed_qual_list)
+{
+ sublink_node *sublink_info = palloc0(sizeof(sublink_node));
+
+ sublink_info->expr= copyObject(clause);
+ sublink_info->below_outer_join = below_outer_join;
+ sublink_info->jointype = jointype;
+ sublink_info->security_level = security_level;
+ sublink_info->qualscope = bms_copy(qualscope);
+ sublink_info->ojscope = bms_copy(ojscope);
+ sublink_info->outerjoin_nonnullable = bms_copy(outerjoin_nonnullable);
+ sublink_info->postponed_qual_list = list_copy_deep(postponed_qual_list);
+
+ root->unexpanded_sublink_expr_list = lappend(root->unexpanded_sublink_expr_list, sublink_info);
+
+ return;
+}
+
+Node *
+lazy_process_sublink_qual(PlannerInfo *root, Node *node)
+{
+ Node *qual = NULL;
+ sublink_node *sublink_info = NULL;
+
+ qual = SS_process_sublinks(root, node, true, true, true);
+ sublink_info = (sublink_node *)search_sublink_from_lazy_process_list(root, node);
+ if (sublink_info)
+ {
+ List *postponed_qual_list = NIL;
+ distribute_qual_to_rels(root, qual, sublink_info->below_outer_join, sublink_info->jointype, sublink_info->security_level,
+ sublink_info->qualscope, sublink_info->ojscope, sublink_info->outerjoin_nonnullable,
+ &postponed_qual_list);
+
+ Assert(postponed_qual_list == NIL);
+ root->unexpanded_sublink_expr_list = list_delete(root->unexpanded_sublink_expr_list, sublink_info);
+ }
+
+ return qual;
+}
+
+static void *
+search_sublink_from_lazy_process_list(PlannerInfo *root, Node *node)
+{
+ ListCell *lc = NULL;
+ sublink_node *sublink_info = NULL;
+
+ foreach(lc, root->unexpanded_sublink_expr_list)
+ {
+ sublink_node *tmp = lfirst(lc);
+ Assert(tmp->expr);
+ if (equal(tmp->expr, node))
+ {
+ sublink_info = tmp;
+ break;
+ }
+ }
+
+ return (void *)sublink_info;
+}
diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c
index c1634d16669..7f83f58479f 100644
--- a/src/backend/optimizer/plan/planagg.c
+++ b/src/backend/optimizer/plan/planagg.c
@@ -49,7 +49,7 @@
static bool can_minmax_aggs(PlannerInfo *root, List **context);
static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
- Oid eqop, Oid sortop, bool nulls_first);
+ Oid eqop, Oid sortop, bool nulls_first, bool lazy_process_sublink);
static void minmax_qp_callback(PlannerInfo *root, void *extra);
static Oid fetch_agg_sort_op(Oid aggfnoid);
@@ -70,7 +70,7 @@ static Oid fetch_agg_sort_op(Oid aggfnoid);
* root->agginfos, so preprocess_aggrefs() must have been called already, too.
*/
void
-preprocess_minmax_aggregates(PlannerInfo *root)
+preprocess_minmax_aggregates(PlannerInfo *root, bool lazy_process_sublink)
{
Query *parse = root->parse;
FromExpr *jtnode;
@@ -173,9 +173,9 @@ preprocess_minmax_aggregates(PlannerInfo *root)
* FIRST is more likely to be available if the operator is a
* reverse-sort operator, so try that first if reverse.
*/
- if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse))
+ if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse, lazy_process_sublink))
continue;
- if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, !reverse))
+ if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, !reverse, lazy_process_sublink))
continue;
/* No indexable path for this aggregate, so fail */
@@ -315,7 +315,7 @@ can_minmax_aggs(PlannerInfo *root, List **context)
*/
static bool
build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
- Oid eqop, Oid sortop, bool nulls_first)
+ Oid eqop, Oid sortop, bool nulls_first, bool lazy_process_sublink)
{
PlannerInfo *subroot;
Query *parse;
@@ -352,12 +352,23 @@ build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
/* append_rel_list might contain outer Vars? */
subroot->append_rel_list = copyObject(root->append_rel_list);
IncrementVarSublevelsUp((Node *) subroot->append_rel_list, 1, 1);
+
+ if (lazy_process_sublink)
+ {
+ /* under lazy process sublink, parent root may have some data that child does not need, so set it to NIL */
+ subroot->join_info_list = NIL;
+ subroot->eq_classes = NIL;
+ subroot->placeholder_list = NIL;
+ }
+ else
+ {
/* There shouldn't be any OJ info to translate, as yet */
Assert(subroot->join_info_list == NIL);
/* and we haven't made equivalence classes, either */
Assert(subroot->eq_classes == NIL);
/* and we haven't created PlaceHolderInfos, either */
Assert(subroot->placeholder_list == NIL);
+ }
/*----------
* Generate modified query of the form
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index 273ac0acf7e..7042c96b09b 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -102,6 +102,8 @@ query_planner(PlannerInfo *root,
Assert(rte != NULL);
if (rte->rtekind == RTE_RESULT)
{
+ lazy_process_sublinks(root, true);
+
/* Make the RelOptInfo for it directly */
final_rel = build_simple_rel(root, varno, NULL);
@@ -197,6 +199,8 @@ query_planner(PlannerInfo *root,
*/
generate_base_implied_equalities(root);
+ lazy_process_sublinks(root, false);
+
/*
* We have completed merging equivalence sets, so it's now possible to
* generate pathkeys in canonical form; so compute query_pathkeys and
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index bd01ec0526f..91437dcdc8e 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -64,6 +64,7 @@
#include "utils/rel.h"
#include "utils/selfuncs.h"
#include "utils/syscache.h"
+#include "utils/guc.h"
/* GUC parameters */
double cursor_tuple_fraction = DEFAULT_CURSOR_TUPLE_FRACTION;
@@ -128,8 +129,8 @@ typedef struct
/* Local functions */
static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
-static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
static void grouping_planner(PlannerInfo *root, double tuple_fraction);
+static Node *preprocess_expression_ext(PlannerInfo *root, Node *expr, int kind, bool process_sublink);
static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root);
static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
int *tleref_to_colnum_map);
@@ -641,6 +642,8 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
root->wt_param_id = -1;
root->non_recursive_path = NULL;
root->partColsUpdated = false;
+ root->unexpanded_sublink_counter = 0;
+ root->unexpanded_sublink_expr_list = NIL;
/*
* If there is a WITH list, process each WITH query and either convert it
@@ -784,8 +787,8 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
* part of the targetlist.
*/
parse->targetList = (List *)
- preprocess_expression(root, (Node *) parse->targetList,
- EXPRKIND_TARGET);
+ preprocess_expression_ext(root, (Node *) parse->targetList,
+ EXPRKIND_TARGET, false);
/* Constant-folding might have removed all set-returning functions */
if (parse->hasTargetSRFs)
@@ -807,7 +810,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
preprocess_expression(root, (Node *) parse->returningList,
EXPRKIND_TARGET);
- preprocess_qual_conditions(root, (Node *) parse->jointree);
+ preprocess_qual_conditions(root, (Node *) parse->jointree, true);
parse->havingQual = preprocess_expression(root, parse->havingQual,
EXPRKIND_QUAL);
@@ -1049,14 +1052,24 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
return root;
}
+static Node *
+preprocess_expression(PlannerInfo *root, Node *expr, int kind)
+{
+ return preprocess_expression_ext(root, expr, kind, true);
+}
+
/*
* preprocess_expression
* Do subquery_planner's preprocessing work for an expression,
* which can be a targetlist, a WHERE clause (including JOIN/ON
* conditions), a HAVING clause, or a few other things.
+ *
+ * if process_sublink = false
+ * This means that sublink in an expression will try to defer processing.
+ * see lazy_process_sublinks()
*/
static Node *
-preprocess_expression(PlannerInfo *root, Node *expr, int kind)
+preprocess_expression_ext(PlannerInfo *root, Node *expr, int kind, bool process_sublink)
{
/*
* Fall out quickly if expression is empty. This occurs often enough to
@@ -1129,7 +1142,7 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind)
/* Expand SubLinks to SubPlans */
if (root->parse->hasSubLinks)
- expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
+ expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL), false, process_sublink);
/*
* XXX do not insert anything here unless you have grokked the comments in
@@ -1157,8 +1170,8 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind)
* Recursively scan the query's jointree and do subquery_planner's
* preprocessing work on each qual condition found therein.
*/
-static void
-preprocess_qual_conditions(PlannerInfo *root, Node *jtnode)
+void
+preprocess_qual_conditions(PlannerInfo *root, Node *jtnode, bool istop)
{
if (jtnode == NULL)
return;
@@ -1172,17 +1185,24 @@ preprocess_qual_conditions(PlannerInfo *root, Node *jtnode)
ListCell *l;
foreach(l, f->fromlist)
- preprocess_qual_conditions(root, lfirst(l));
+ preprocess_qual_conditions(root, lfirst(l), false);
- f->quals = preprocess_expression(root, f->quals, EXPRKIND_QUAL);
+ /*
+ * istop = true means that this is qual in the WHERE clause
+ * istop = false means that this is the join qual on the Join on clause
+ * For now, only sublink on the WHERE clause can be deferred,
+ */
+ if (istop)
+ f->quals = preprocess_expression_ext(root, f->quals, EXPRKIND_QUAL, false);
+ else
+ f->quals = preprocess_expression_ext(root, f->quals, EXPRKIND_QUAL, true);
}
else if (IsA(jtnode, JoinExpr))
{
JoinExpr *j = (JoinExpr *) jtnode;
- preprocess_qual_conditions(root, j->larg);
- preprocess_qual_conditions(root, j->rarg);
-
+ preprocess_qual_conditions(root, j->larg, false);
+ preprocess_qual_conditions(root, j->rarg, false);
j->quals = preprocess_expression(root, j->quals, EXPRKIND_QUAL);
}
else
@@ -1384,11 +1404,11 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
* pathtargets, else some copies of the Aggref nodes might escape
* being marked.
*/
- if (parse->hasAggs)
- {
+ if (parse->hasAggs && !has_unexpanded_sublink(root))
preprocess_aggrefs(root, (Node *) root->processed_tlist);
+
+ if (parse->hasAggs)
preprocess_aggrefs(root, (Node *) parse->havingQual);
- }
/*
* Locate any window functions in the tlist. (We don't need to look
@@ -1412,8 +1432,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
* that is needed in MIN/MAX-optimizable cases will have to be
* duplicated in planagg.c.
*/
- if (parse->hasAggs)
- preprocess_minmax_aggregates(root);
+ if (parse->hasAggs && !has_unexpanded_sublink(root))
+ preprocess_minmax_aggregates(root, false);
/*
* Figure out whether there's a hard limit on the number of rows that
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index c9f7a09d102..44f189b196e 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -32,11 +32,13 @@
#include "optimizer/planner.h"
#include "optimizer/prep.h"
#include "optimizer/subselect.h"
+#include "optimizer/paths.h"
#include "parser/parse_relation.h"
#include "rewrite/rewriteManip.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
+#include "utils/ruleutils.h"
typedef struct convert_testexpr_context
@@ -49,6 +51,8 @@ typedef struct process_sublinks_context
{
PlannerInfo *root;
bool isTopQual;
+ bool lazy_process;
+ bool force_process;
} process_sublinks_context;
typedef struct finalize_primnode_context
@@ -65,6 +69,13 @@ typedef struct inline_cte_walker_context
Query *ctequery; /* query to substitute */
} inline_cte_walker_context;
+typedef struct equal_expr_info_context
+{
+ bool has_unexpected_expr;
+ bool has_const;
+ Var *outer_var;
+ Var *inner_var;
+} equal_expr_info_context;
static Node *build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
List *plan_params,
@@ -105,6 +116,11 @@ static Bitmapset *finalize_plan(PlannerInfo *root,
static bool finalize_primnode(Node *node, finalize_primnode_context *context);
static bool finalize_agg_primnode(Node *node, finalize_primnode_context *context);
+static Node *replace_vars_mutator(Node *node, void *context);
+static List *find_equal_conditions_contain_uplevelvar_in_sublink_query(Query *orig_subquery);
+static bool equal_expr_analyze_walker(Node *node, void *context);
+static bool equal_expr_safety_check(Node *node, equal_expr_info_context *context);
+
/*
* Get the datatype/typmod/collation of the first column of the plan's output.
@@ -162,7 +178,7 @@ get_first_col_type(Plan *plan, Oid *coltype, int32 *coltypmod,
static Node *
make_subplan(PlannerInfo *root, Query *orig_subquery,
SubLinkType subLinkType, int subLinkId,
- Node *testexpr, bool isTopQual)
+ Node *testexpr, bool isTopQual, bool lazy_process)
{
Query *subquery;
bool simple_exists = false;
@@ -173,6 +189,8 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
Plan *plan;
List *plan_params;
Node *result;
+ Query *optimized_subquery = NULL;
+ Query *optimized_subquery_copy = NULL;
/*
* Copy the source Query node. This is a quick and dirty kluge to resolve
@@ -218,8 +236,32 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
/* plan_params should not be in use in current query level */
Assert(root->plan_params == NIL);
+ if (lazy_process)
+ {
+ List *conditions = NIL;
+ Query *subquery_copy = copyObject(orig_subquery);
+
+ /*
+ * Search sublink query.
+ * If the query contains an outer condition equivalent expression,
+ * this means that there may be external conditions that can be pushed down to optimize the subquery.
+ */
+ conditions = find_equal_conditions_contain_uplevelvar_in_sublink_query(subquery_copy);
+ if (conditions)
+ {
+ /* Search outer queries, and if relevant equivalent expressions are found, push them down into subqueries. */
+ if (try_push_outer_qual_to_sublink_query(root, subquery_copy, conditions))
+ {
+ optimized_subquery = subquery_copy;
+ optimized_subquery_copy = copyObject(optimized_subquery);
+ }
+ list_free(conditions);
+ }
+ }
+
/* Generate Paths for the subquery */
- subroot = subquery_planner(root->glob, subquery,
+ subroot = subquery_planner(root->glob,
+ (optimized_subquery != NULL) ? optimized_subquery : subquery,
root,
false, tuple_fraction);
@@ -256,7 +298,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
List *paramIds;
/* Make a second copy of the original subquery */
- subquery = copyObject(orig_subquery);
+ subquery = copyObject((optimized_subquery_copy != NULL) ? optimized_subquery_copy : orig_subquery);
/* and re-simplify */
simple_exists = simplify_EXISTS_query(root, subquery);
Assert(simple_exists);
@@ -365,7 +407,7 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
*/
if (IsA(arg, PlaceHolderVar) ||
IsA(arg, Aggref))
- arg = SS_process_sublinks(root, arg, false);
+ arg = SS_process_sublinks(root, arg, false, false, true);
splan->parParam = lappend_int(splan->parParam, pitem->paramId);
splan->args = lappend(splan->args, arg);
@@ -1915,12 +1957,14 @@ replace_correlation_vars_mutator(Node *node, PlannerInfo *root)
* not distinguish FALSE from UNKNOWN return values.
*/
Node *
-SS_process_sublinks(PlannerInfo *root, Node *expr, bool isQual)
+SS_process_sublinks(PlannerInfo *root, Node *expr, bool isQual, bool lazy_process, bool force_process)
{
process_sublinks_context context;
context.root = root;
context.isTopQual = isQual;
+ context.lazy_process = lazy_process;
+ context.force_process = force_process;
return process_sublinks_mutator(expr, &context);
}
@@ -1930,20 +1974,34 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context)
process_sublinks_context locContext;
locContext.root = context->root;
+ locContext.lazy_process = context->lazy_process;
+ locContext.force_process = context->force_process;
if (node == NULL)
return NULL;
if (IsA(node, SubLink))
{
SubLink *sublink = (SubLink *) node;
- Node *testexpr;
/*
* First, recursively process the lefthand-side expressions, if any.
* They're not top-level anymore.
*/
locContext.isTopQual = false;
- testexpr = process_sublinks_mutator(sublink->testexpr, &locContext);
+ locContext.lazy_process = context->lazy_process;
+ locContext.force_process = context->force_process;
+ sublink->testexpr = process_sublinks_mutator(sublink->testexpr, &locContext);
+
+ if (!context->force_process &&
+ query_has_sublink_try_pushdown_qual(context->root))
+ {
+ Assert(context->lazy_process == false);
+ context->root->unexpanded_sublink_counter++;
+ return node;
+ }
+
+ if (context->lazy_process)
+ context->root->unexpanded_sublink_counter--;
/*
* Now build the SubPlan node and make the expr to return.
@@ -1952,8 +2010,8 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context)
(Query *) sublink->subselect,
sublink->subLinkType,
sublink->subLinkId,
- testexpr,
- context->isTopQual);
+ sublink->testexpr,
+ context->isTopQual, locContext.lazy_process);
}
/*
@@ -1978,8 +2036,8 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context)
* the very routine that creates 'em to begin with). We shouldn't find
* ourselves invoked directly on a Query, either.
*/
- Assert(!IsA(node, SubPlan));
- Assert(!IsA(node, AlternativeSubPlan));
+ Assert(!IsA(node, SubPlan) || context->lazy_process);
+ Assert(!IsA(node, AlternativeSubPlan) || context->lazy_process);
Assert(!IsA(node, Query));
/*
@@ -2003,6 +2061,8 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context)
/* Still at qual top-level */
locContext.isTopQual = context->isTopQual;
+ locContext.lazy_process = context->lazy_process;
+ locContext.force_process = context->force_process;
foreach(l, ((BoolExpr *) node)->args)
{
@@ -2024,6 +2084,8 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context)
/* Still at qual top-level */
locContext.isTopQual = context->isTopQual;
+ locContext.lazy_process = context->lazy_process;
+ locContext.force_process = context->force_process;
foreach(l, ((BoolExpr *) node)->args)
{
@@ -2989,3 +3051,185 @@ SS_make_initplan_from_plan(PlannerInfo *root,
/* Set costs of SubPlan using info from the plan tree */
cost_subplan(subroot, node, plan);
}
+
+void
+sublink_query_push_qual(Query *subquery, Node *qual, Var *outer, Var *inner)
+{
+ pushdown_expr_info context;
+ Node *new_qual;
+
+ context.outer = outer;
+ context.inner = inner;
+
+ new_qual = expression_tree_mutator(qual, replace_vars_mutator, (void *)&context);
+ subquery->jointree->quals = make_and_qual(subquery->jointree->quals, new_qual);
+}
+
+static Node *
+replace_vars_mutator(Node *node, void *context)
+{
+ pushdown_expr_info *info = (pushdown_expr_info *) context;
+
+ if (IsA(node, Var) && equal(node, (Node *)info->outer))
+ {
+ node = copyObject((Node *)info->inner);
+ return node;
+ }
+
+ return expression_tree_mutator(node, replace_vars_mutator, context);
+}
+
+/* condition has to be (var = const value) */
+bool
+condition_is_safe_pushdown_to_sublink(RestrictInfo *rinfo, Var *var)
+{
+ Node *clause = (Node *) rinfo->clause;
+ equal_expr_info_context context;
+
+ if (clause == NULL)
+ return false;
+
+ if (rinfo->pseudoconstant)
+ return false;
+
+ if (contain_leaked_vars(clause))
+ return false;
+
+ memset(&context, 0, sizeof(equal_expr_info_context));
+ if (equal_expr_safety_check(clause, &context))
+ {
+ /*
+ * RestrictInfo clause must be like inner var = const.
+ * It cannot contain any out var and references the same columns as var.
+ * Finally, system columns are not supported for now.
+ */
+ if (context.inner_var &&
+ context.outer_var == NULL &&
+ context.has_unexpected_expr == false &&
+ context.has_const &&
+ context.inner_var->varattno > 0 &&
+ equal(context.inner_var, var))
+ return true;
+ }
+
+ return false;
+}
+
+static List *
+find_equal_conditions_contain_uplevelvar_in_sublink_query(Query *orig_subquery)
+{
+ Node *quals;
+ ListCell *lc;
+ List *conditions = NIL;
+
+ if (orig_subquery->jointree == NULL ||
+ orig_subquery->jointree->quals == NULL)
+ return NIL;
+
+ quals = copyObject(orig_subquery->jointree->quals);
+ quals = (Node *) canonicalize_qual((Expr *) quals, false);
+ quals = (Node *) make_ands_implicit((Expr *) quals);
+
+ Assert(IsA(quals, List));
+ foreach(lc, (List *)quals)
+ {
+ Node *node = (Node *) lfirst(lc);
+ equal_expr_info_context context;
+ pushdown_expr_info *expr_info = NULL;
+
+ memset(&context, 0, sizeof(equal_expr_info_context));
+ if (equal_expr_safety_check(node, &context))
+ {
+ /* It needs to be something like outer var = inner var */
+ if (context.inner_var &&
+ context.outer_var &&
+ context.has_unexpected_expr == false &&
+ context.has_const == false)
+ {
+ expr_info = palloc0(sizeof(pushdown_expr_info));
+ expr_info->inner = context.inner_var;
+ expr_info->outer = context.outer_var;
+ conditions = lappend(conditions, expr_info);
+ }
+ }
+ }
+
+ return conditions;
+}
+
+static bool
+equal_expr_safety_check(Node *node, equal_expr_info_context *context)
+{
+ const char *op;
+
+ if (!IsA(node, OpExpr))
+ return false;
+
+ op = get_simple_binary_op_name((OpExpr *) node);
+ if (op == NULL || strcmp(op, "=") != 0)
+ return false;
+
+ if (contain_volatile_functions(node) ||
+ contain_mutable_functions(node) ||
+ contain_nonstrict_functions(node))
+ return false;
+
+ equal_expr_analyze_walker(node, context);
+
+ return true;
+}
+
+static bool
+equal_expr_analyze_walker(Node *node, void *context)
+{
+ equal_expr_info_context *info = (equal_expr_info_context *)context;
+
+ if (node == NULL)
+ return false;
+
+ switch (nodeTag(node))
+ {
+ case T_Var:
+ {
+ if (((Var *) node)->varlevelsup > 0)
+ {
+ if (info->outer_var)
+ info->has_unexpected_expr = true;
+ else
+ info->outer_var = (Var *)copyObject(node);
+
+ return info->has_unexpected_expr;
+ }
+ else
+ {
+ if (info->inner_var)
+ info->has_unexpected_expr = true;
+ else
+ info->inner_var = (Var *)copyObject(node);
+
+ return info->has_unexpected_expr;
+ }
+ }
+ break;
+
+ case T_Const:
+ {
+ info->has_const = true;
+ return false;
+ }
+ break;
+
+ case T_Param:
+ case T_FuncExpr:
+ {
+ info->has_unexpected_expr = true;
+ return true;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return expression_tree_walker(node, equal_expr_analyze_walker, context);
+}
diff --git a/src/backend/optimizer/util/placeholder.c b/src/backend/optimizer/util/placeholder.c
index 1c4202d864c..0e11ed22522 100644
--- a/src/backend/optimizer/util/placeholder.c
+++ b/src/backend/optimizer/util/placeholder.c
@@ -22,6 +22,7 @@
#include "optimizer/placeholder.h"
#include "optimizer/planmain.h"
#include "utils/lsyscache.h"
+#include "rewrite/rewriteManip.h"
/* Local functions */
static void find_placeholders_recurse(PlannerInfo *root, Node *jtnode);
@@ -87,6 +88,10 @@ find_placeholder_info(PlannerInfo *root, PlaceHolderVar *phv,
if (!create_new_ph)
elog(ERROR, "too late to create a new PlaceHolderInfo");
+ /* Unprocessed sublink is not accepted, it needs to go through SS_process_sublinks first */
+ if (checkExprHasSubLink((Node *)phv))
+ elog(ERROR, "can not add sublink to placeholder_list");
+
phinfo = makeNode(PlaceHolderInfo);
phinfo->phid = phv->phid;
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 1bb25738a52..5c25c2683ec 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -429,7 +429,6 @@ static void resolve_special_varno(Node *node, deparse_context *context,
static Node *find_param_referent(Param *param, deparse_context *context,
deparse_namespace **dpns_p, ListCell **ancestor_cell_p);
static void get_parameter(Param *param, deparse_context *context);
-static const char *get_simple_binary_op_name(OpExpr *expr);
static bool isSimpleNode(Node *node, Node *parentNode, int prettyFlags);
static void appendContextKeyword(deparse_context *context, const char *str,
int indentBefore, int indentAfter, int indentPlus);
@@ -7971,7 +7970,7 @@ get_parameter(Param *param, deparse_context *context)
* helper function for isSimpleNode
* will return single char binary operator name, or NULL if it's not
*/
-static const char *
+const char *
get_simple_binary_op_name(OpExpr *expr)
{
List *args = expr->args;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index e91d5a3cfda..1df80bf95fc 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -682,6 +682,7 @@ static char *recovery_target_lsn_string;
/* should be static, but commands/variable.c needs to get at this */
char *role_string;
+bool enable_lazy_process_sublink = true;
/*
* Displayable names for context types (enum GucContext)
@@ -971,6 +972,17 @@ static const unit_conversion time_unit_conversion_table[] =
static struct config_bool ConfigureNamesBool[] =
{
+ {
+ {"enable_lazy_process_sublink", PGC_USERSET, QUERY_TUNING_METHOD,
+ gettext_noop("enable lazy process sublink."),
+ NULL,
+ GUC_EXPLAIN
+ },
+ &enable_lazy_process_sublink,
+ true,
+ NULL, NULL, NULL
+ },
+
{
{"enable_seqscan", PGC_USERSET, QUERY_TUNING_METHOD,
gettext_noop("Enables the planner's use of sequential-scan plans."),
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 186e89905b2..bd676569039 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -377,6 +377,9 @@ struct PlannerInfo
/* Does this query modify any partition key columns? */
bool partColsUpdated;
+
+ int unexpanded_sublink_counter;
+ List *unexpanded_sublink_expr_list;
};
@@ -995,6 +998,7 @@ typedef struct EquivalenceClass
bool ec_has_volatile; /* the (sole) member is a volatile expr */
bool ec_below_outer_join; /* equivalence applies below an OJ */
bool ec_broken; /* failed to generate needed clauses? */
+ bool ec_processed;
Index ec_sortref; /* originating sortclause label, or 0 */
Index ec_min_security; /* minimum security_level in ec_sources */
Index ec_max_security; /* maximum security_level in ec_sources */
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index f1d111063c2..425b5c68131 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -25,6 +25,12 @@ extern PGDLLIMPORT int geqo_threshold;
extern PGDLLIMPORT int min_parallel_table_scan_size;
extern PGDLLIMPORT int min_parallel_index_scan_size;
+typedef struct pushdown_expr_info
+{
+ Var *outer;
+ Var *inner;
+} pushdown_expr_info;
+
/* Hook for plugins to get control in set_rel_pathlist() */
typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,
RelOptInfo *rel,
@@ -62,7 +68,7 @@ extern void create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel,
Path *bitmapqual);
extern void generate_partitionwise_join_paths(PlannerInfo *root,
RelOptInfo *rel);
-
+extern bool try_push_outer_qual_to_sublink_query(PlannerInfo *parent, Query *subquery, List *conditions);
#ifdef OPTIMIZER_DEBUG
extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel);
#endif
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index bf1adfc52ac..784dfbfc42e 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -28,12 +28,13 @@ typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra);
* prototypes for plan/planmain.c
*/
extern RelOptInfo *query_planner(PlannerInfo *root,
- query_pathkeys_callback qp_callback, void *qp_extra);
+ query_pathkeys_callback qp_callback,
+ void *qp_extra);
/*
* prototypes for plan/planagg.c
*/
-extern void preprocess_minmax_aggregates(PlannerInfo *root);
+extern void preprocess_minmax_aggregates(PlannerInfo *root, bool lazy_process_sublink);
/*
* prototypes for plan/createplan.c
@@ -67,6 +68,8 @@ extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount,
extern int from_collapse_limit;
extern int join_collapse_limit;
+#define has_unexpanded_sublink(root) ((root)->unexpanded_sublink_counter != 0)
+
extern void add_base_rels_to_query(PlannerInfo *root, Node *jtnode);
extern void add_other_rels_to_query(PlannerInfo *root);
extern void build_base_rel_tlists(PlannerInfo *root, List *final_tlist);
@@ -96,6 +99,9 @@ extern RestrictInfo *build_implied_join_equality(PlannerInfo *root,
Relids nullable_relids,
Index security_level);
extern void match_foreign_keys_to_quals(PlannerInfo *root);
+extern void lazy_process_sublinks(PlannerInfo *root, bool single_result_rte);
+extern bool query_has_sublink_try_pushdown_qual(PlannerInfo *root);
+extern Node *lazy_process_sublink_qual(PlannerInfo *root, Node *node);
/*
* prototypes for plan/analyzejoins.c
diff --git a/src/include/optimizer/planner.h b/src/include/optimizer/planner.h
index 9a15de50259..14ff94f60e3 100644
--- a/src/include/optimizer/planner.h
+++ b/src/include/optimizer/planner.h
@@ -57,5 +57,6 @@ extern Path *get_cheapest_fractional_path(RelOptInfo *rel,
double tuple_fraction);
extern Expr *preprocess_phv_expression(PlannerInfo *root, Expr *expr);
+extern void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode, bool istop);
#endif /* PLANNER_H */
diff --git a/src/include/optimizer/subselect.h b/src/include/optimizer/subselect.h
index 059bdf941ef..396c4c6117e 100644
--- a/src/include/optimizer/subselect.h
+++ b/src/include/optimizer/subselect.h
@@ -25,7 +25,7 @@ extern JoinExpr *convert_EXISTS_sublink_to_join(PlannerInfo *root,
bool under_not,
Relids available_rels);
extern Node *SS_replace_correlation_vars(PlannerInfo *root, Node *expr);
-extern Node *SS_process_sublinks(PlannerInfo *root, Node *expr, bool isQual);
+extern Node *SS_process_sublinks(PlannerInfo *root, Node *expr, bool isQual, bool lazy_process, bool force_process);
extern void SS_identify_outer_params(PlannerInfo *root);
extern void SS_charge_for_initplans(PlannerInfo *root, RelOptInfo *final_rel);
extern void SS_attach_initplans(PlannerInfo *root, Plan *plan);
@@ -36,5 +36,7 @@ extern Param *SS_make_initplan_output_param(PlannerInfo *root,
extern void SS_make_initplan_from_plan(PlannerInfo *root,
PlannerInfo *subroot, Plan *plan,
Param *prm);
+extern bool condition_is_safe_pushdown_to_sublink(RestrictInfo *rinfo, Var *var);
+extern void sublink_query_push_qual(Query *subquery, Node *qual, Var *var, Var *replace);
#endif /* SUBSELECT_H */
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index aa18d304ac0..06f77921449 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -288,6 +288,8 @@ extern int tcp_user_timeout;
extern bool trace_sort;
#endif
+extern bool enable_lazy_process_sublink;
+
/*
* Functions exported by guc.c
*/
diff --git a/src/include/utils/ruleutils.h b/src/include/utils/ruleutils.h
index d333e5e8a56..d4ccca3fe3c 100644
--- a/src/include/utils/ruleutils.h
+++ b/src/include/utils/ruleutils.h
@@ -42,5 +42,6 @@ extern char *generate_opclass_name(Oid opclass);
extern char *get_range_partbound_string(List *bound_datums);
extern char *pg_get_statisticsobjdef_string(Oid statextid);
+extern const char *get_simple_binary_op_name(OpExpr *expr);
#endif /* RULEUTILS_H */
diff --git a/src/test/regress/expected/join_hash.out b/src/test/regress/expected/join_hash.out
index 3a91c144a27..232ee6d15a1 100644
--- a/src/test/regress/expected/join_hash.out
+++ b/src/test/regress/expected/join_hash.out
@@ -926,9 +926,9 @@ WHERE
-> Result
Output: (hjtest_1.b * 5)
-> Hash
- Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b
+ Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.b, hjtest_2.id, hjtest_2.c
-> Seq Scan on public.hjtest_2
- Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b
+ Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.b, hjtest_2.id, hjtest_2.c
Filter: ((SubPlan 5) < 55)
SubPlan 5
-> Result
@@ -974,7 +974,7 @@ WHERE
Hash Cond: (((SubPlan 1) = hjtest_1.id) AND ((SubPlan 3) = (SubPlan 2)))
Join Filter: (hjtest_1.a <> hjtest_2.b)
-> Seq Scan on public.hjtest_2
- Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b
+ Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.b, hjtest_2.id, hjtest_2.c
Filter: ((SubPlan 5) < 55)
SubPlan 5
-> Result
diff --git a/src/test/regress/expected/qual_pushdown_to_sublink.out b/src/test/regress/expected/qual_pushdown_to_sublink.out
new file mode 100644
index 00000000000..bf14fa8a5fb
--- /dev/null
+++ b/src/test/regress/expected/qual_pushdown_to_sublink.out
@@ -0,0 +1,189 @@
+CREATE SCHEMA IF NOT EXISTS test_push_qual_to_sublink;
+SET search_path=test_push_qual_to_sublink,sys;
+show enable_lazy_process_sublink;
+ enable_lazy_process_sublink
+-----------------------------
+ on
+(1 row)
+
+create table ab (a int not null, b int not null) partition by list (a);
+create table ab_a2 partition of ab for values in(2) partition by list (b);
+create table ab_a2_b1 partition of ab_a2 for values in (1);
+create table ab_a2_b2 partition of ab_a2 for values in (2);
+create table ab_a2_b3 partition of ab_a2 for values in (3);
+create table ab_a1 partition of ab for values in(1) partition by list (b);
+create table ab_a1_b1 partition of ab_a1 for values in (1);
+create table ab_a1_b2 partition of ab_a1 for values in (2);
+create table ab_a1_b3 partition of ab_a1 for values in (3);
+INSERT INTO ab VALUES (1,1);
+INSERT INTO ab VALUES (1,2);
+INSERT INTO ab VALUES (1,3);
+INSERT INTO ab VALUES (2,1);
+INSERT INTO ab VALUES (2,2);
+INSERT INTO ab VALUES (2,3);
+--1 sublink in select clause can do pushdown qual(a=1 and b=1)
+EXPLAIN (VERBOSE, COSTS OFF) SELECT
+y.a, (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b) AS b
+FROM ab y WHERE a = 1 AND b = 1;
+ QUERY PLAN
+----------------------------------------------------------------
+ Seq Scan on test_push_qual_to_sublink.ab_a1_b1 y
+ Output: y.a, (SubPlan 1)
+ Filter: ((y.a = 1) AND (y.b = 1))
+ SubPlan 1
+ -> Result
+ Output: x.b
+ One-Time Filter: ((y.b = 1) AND (y.a = 1))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 x
+ Output: x.b
+ Filter: ((x.b = 1) AND (x.a = 1))
+(10 rows)
+
+SELECT
+y.a, (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b) AS b
+FROM ab y WHERE a = 1 AND b = 1;
+ a | b
+---+---
+ 1 | 1
+(1 row)
+
+--2 sublink in where clause can do pushdown qual(a=1 and b=1)
+EXPLAIN (VERBOSE, COSTS OFF) SELECT y.a FROM ab y
+WHERE a = 1 AND b = 1 AND a in (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b);
+ QUERY PLAN
+----------------------------------------------------------------
+ Seq Scan on test_push_qual_to_sublink.ab_a1_b1 y
+ Output: y.a
+ Filter: ((y.a = 1) AND (y.b = 1) AND (SubPlan 1))
+ SubPlan 1
+ -> Result
+ Output: x.b
+ One-Time Filter: ((y.b = 1) AND (y.a = 1))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 x
+ Output: x.b
+ Filter: ((x.b = 1) AND (x.a = 1))
+(10 rows)
+
+SELECT y.a FROM ab y
+WHERE a = 1 AND b = 1 AND a in (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b);
+ a
+---
+ 1
+(1 row)
+
+--3 Nested sublink also supports pushdown qual
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT a.a, a.b,
+(SELECT max(b.a) AS max
+ FROM ab b, ab c
+ WHERE b.a=c.a AND b.b=c.b AND b.a=a.a AND b.b=a.b AND
+ (EXISTS
+ (SELECT a1.a
+ FROM ab a1
+ WHERE
+ a1.a = a.a AND --from uplevel 1
+ a1.b = b.b AND --frem uplevel 2
+ clock_timestamp() > '2020-12-11' --Keep sublink not eliminated
+ )
+ )
+) AS c
+FROM ab a WHERE a.a=1 AND a.b=1;
+ QUERY PLAN
+------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Seq Scan on test_push_qual_to_sublink.ab_a1_b1 a
+ Output: a.a, a.b, (SubPlan 2)
+ Filter: ((a.a = 1) AND (a.b = 1))
+ SubPlan 2
+ -> Aggregate
+ Output: max(b.a)
+ -> Result
+ Output: b.a
+ One-Time Filter: ((a.b = 1) AND (a.a = 1))
+ -> Nested Loop
+ Output: b.a
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 b
+ Output: b.a, b.b
+ Filter: ((b.b = 1) AND (b.a = 1) AND (SubPlan 1))
+ SubPlan 1
+ -> Result
+ Output: a1.a
+ One-Time Filter: ((b.b = 1) AND (a.a = 1))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 a1
+ Output: a1.a
+ Filter: ((a1.b = 1) AND (a1.a = 1) AND (clock_timestamp() > 'Fri Dec 11 00:00:00 2020 PST'::timestamp with time zone))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 c
+ Output: c.a, c.b
+ Filter: ((c.b = 1) AND (c.a = 1))
+(24 rows)
+
+--4 This feature does not conflict with pullUp sublink
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT a.a, a.b
+FROM ab a
+WHERE EXISTS (SELECT b.a
+ FROM ab b, ab c
+ WHERE b.a=c.a AND b.b=c.b AND b.a=a.a AND b.b=a.b AND
+ (EXISTS
+ (SELECT a1.a
+ FROM ab a1
+ WHERE a1.a = b.a AND a1.b = b.b
+ )
+ )
+) AND
+a.a=1 AND a.b=1;
+ QUERY PLAN
+--------------------------------------------------------------------
+ Nested Loop Semi Join
+ Output: a.a, a.b
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 a
+ Output: a.a, a.b
+ Filter: ((a.a = 1) AND (a.b = 1))
+ -> Nested Loop Semi Join
+ Output: b.a, b.b
+ -> Nested Loop
+ Output: b.a, b.b
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 b
+ Output: b.a, b.b
+ Filter: ((b.b = 1) AND (b.a = 1))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 c
+ Output: c.a, c.b
+ Filter: ((c.b = 1) AND (c.a = 1))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 a1
+ Output: a1.a, a1.b
+ Filter: ((a1.b = 1) AND (a1.a = 1))
+(18 rows)
+
+--5 sublink in join on clause can not do pushdown
+EXPLAIN (VERBOSE, COSTS OFF) SELECT y.a
+FROM ab y JOIN ab z on ((y.a=z.a) AND (y.b=z.b) AND exists (SELECT count(*) FROM ab x WHERE x.a=y.a AND x.b=y.b))
+WHERE y.a = 1 AND y.b = 1;
+ QUERY PLAN
+------------------------------------------------------------------------------
+ Nested Loop
+ Output: y.a
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 y
+ Output: y.a, y.b
+ Filter: ((y.b = 1) AND (y.a = 1) AND (SubPlan 1))
+ SubPlan 1
+ -> Aggregate
+ Output: count(*)
+ -> Append
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 x_1
+ Filter: ((x_1.a = y.a) AND (x_1.b = y.b))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b2 x_2
+ Filter: ((x_2.a = y.a) AND (x_2.b = y.b))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b3 x_3
+ Filter: ((x_3.a = y.a) AND (x_3.b = y.b))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a2_b1 x_4
+ Filter: ((x_4.a = y.a) AND (x_4.b = y.b))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a2_b2 x_5
+ Filter: ((x_5.a = y.a) AND (x_5.b = y.b))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a2_b3 x_6
+ Filter: ((x_6.a = y.a) AND (x_6.b = y.b))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 z
+ Output: z.a, z.b
+ Filter: ((z.b = 1) AND (z.a = 1))
+(24 rows)
+
+DROP SCHEMA test_push_qual_to_sublink CASCADE;
+NOTICE: drop cascades to table ab
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 07426260330..fd0079a8335 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -1063,7 +1063,7 @@ where o.ten = 0;
SubPlan 1
-> Seq Scan on public.int4_tbl
Output: int4_tbl.f1
- Filter: (int4_tbl.f1 <= $0)
+ Filter: (int4_tbl.f1 <= $1)
(14 rows)
select sum(ss.tst::int) from
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index 2088857615a..bb7a1dff156 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -110,6 +110,7 @@ select name, setting from pg_settings where name like 'enable%';
enable_incremental_sort | on
enable_indexonlyscan | on
enable_indexscan | on
+ enable_lazy_process_sublink | on
enable_material | on
enable_memoize | on
enable_mergejoin | on
@@ -122,7 +123,7 @@ select name, setting from pg_settings where name like 'enable%';
enable_seqscan | on
enable_sort | on
enable_tidscan | on
-(20 rows)
+(21 rows)
-- Test that the pg_timezone_names and pg_timezone_abbrevs views are
-- more-or-less working. We can't test their contents in any great detail
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 017e962fed2..58c51d582ad 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -131,3 +131,4 @@ test: fast_default
# run stats by itself because its delay may be insufficient under heavy load
test: stats
+test: qual_pushdown_to_sublink
diff --git a/src/test/regress/sql/qual_pushdown_to_sublink.sql b/src/test/regress/sql/qual_pushdown_to_sublink.sql
new file mode 100644
index 00000000000..13252251d37
--- /dev/null
+++ b/src/test/regress/sql/qual_pushdown_to_sublink.sql
@@ -0,0 +1,78 @@
+CREATE SCHEMA IF NOT EXISTS test_push_qual_to_sublink;
+SET search_path=test_push_qual_to_sublink,sys;
+
+show enable_lazy_process_sublink;
+
+create table ab (a int not null, b int not null) partition by list (a);
+create table ab_a2 partition of ab for values in(2) partition by list (b);
+create table ab_a2_b1 partition of ab_a2 for values in (1);
+create table ab_a2_b2 partition of ab_a2 for values in (2);
+create table ab_a2_b3 partition of ab_a2 for values in (3);
+create table ab_a1 partition of ab for values in(1) partition by list (b);
+create table ab_a1_b1 partition of ab_a1 for values in (1);
+create table ab_a1_b2 partition of ab_a1 for values in (2);
+create table ab_a1_b3 partition of ab_a1 for values in (3);
+
+INSERT INTO ab VALUES (1,1);
+INSERT INTO ab VALUES (1,2);
+INSERT INTO ab VALUES (1,3);
+INSERT INTO ab VALUES (2,1);
+INSERT INTO ab VALUES (2,2);
+INSERT INTO ab VALUES (2,3);
+
+--1 sublink in select clause can do pushdown qual(a=1 and b=1)
+EXPLAIN (VERBOSE, COSTS OFF) SELECT
+y.a, (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b) AS b
+FROM ab y WHERE a = 1 AND b = 1;
+
+SELECT
+y.a, (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b) AS b
+FROM ab y WHERE a = 1 AND b = 1;
+
+--2 sublink in where clause can do pushdown qual(a=1 and b=1)
+EXPLAIN (VERBOSE, COSTS OFF) SELECT y.a FROM ab y
+WHERE a = 1 AND b = 1 AND a in (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b);
+
+SELECT y.a FROM ab y
+WHERE a = 1 AND b = 1 AND a in (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b);
+
+--3 Nested sublink also supports pushdown qual
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT a.a, a.b,
+(SELECT max(b.a) AS max
+ FROM ab b, ab c
+ WHERE b.a=c.a AND b.b=c.b AND b.a=a.a AND b.b=a.b AND
+ (EXISTS
+ (SELECT a1.a
+ FROM ab a1
+ WHERE
+ a1.a = a.a AND --from uplevel 1
+ a1.b = b.b AND --frem uplevel 2
+ clock_timestamp() > '2020-12-11' --Keep sublink not eliminated
+ )
+ )
+) AS c
+FROM ab a WHERE a.a=1 AND a.b=1;
+
+--4 This feature does not conflict with pullUp sublink
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT a.a, a.b
+FROM ab a
+WHERE EXISTS (SELECT b.a
+ FROM ab b, ab c
+ WHERE b.a=c.a AND b.b=c.b AND b.a=a.a AND b.b=a.b AND
+ (EXISTS
+ (SELECT a1.a
+ FROM ab a1
+ WHERE a1.a = b.a AND a1.b = b.b
+ )
+ )
+) AND
+a.a=1 AND a.b=1;
+
+--5 sublink in join on clause can not do pushdown
+EXPLAIN (VERBOSE, COSTS OFF) SELECT y.a
+FROM ab y JOIN ab z on ((y.a=z.a) AND (y.b=z.b) AND exists (SELECT count(*) FROM ab x WHERE x.a=y.a AND x.b=y.b))
+WHERE y.a = 1 AND y.b = 1;
+
+DROP SCHEMA test_push_qual_to_sublink CASCADE;
--
2.30.1 (Apple Git-130)
Fixed a bug found during testing.
Wenjing
------------------原始邮件 ------------------
发件人:曾文旌(义从) <wenjing.zwj@alibaba-inc.com>
发送时间:Sun Dec 12 20:51:08 2021
收件人:Zhihong Yu <zyu@yugabyte.com>
抄送:Tomas Vondra <tomas.vondra@enterprisedb.com>, wjzeng <wjzeng2012@gmail.com>, PostgreSQL Hackers <pgsql-hackers@postgresql.org>, shawn wang <shawn.wang.pg@gmail.com>, ggysxcq@gmail.com <ggysxcq@gmail.com>
主题:回复:Re: Re: 回复:Re: Is it worth pushing conditions to sublink/subplan?
------------------原始邮件 ------------------
发件人:Zhihong Yu <zyu@yugabyte.com>
发送时间:Sun Dec 12 01:13:11 2021
收件人:曾文旌(义从) <wenjing.zwj@alibaba-inc.com>
抄送:Tomas Vondra <tomas.vondra@enterprisedb.com>, wjzeng <wjzeng2012@gmail.com>, PostgreSQL Hackers <pgsql-hackers@postgresql.org>, shawn wang <shawn.wang.pg@gmail.com>, ggysxcq@gmail.com <ggysxcq@gmail.com>
主题:Re: Re: 回复:Re: Is it worth pushing conditions to sublink/subplan?
On Sat, Dec 11, 2021 at 7:31 AM 曾文旌(义从) <wenjing.zwj@alibaba-inc.com> wrote:
------------------原始邮件 ------------------
发件人:Tomas Vondra <tomas.vondra@enterprisedb.com>
发送时间:Wed Dec 8 11:26:35 2021
收件人:曾文旌(义从) <wenjing.zwj@alibaba-inc.com>, shawn wang <shawn.wang.pg@gmail.com>, ggysxcq@gmail.com <ggysxcq@gmail.com>, PostgreSQL Hackers <pgsql-hackers@postgresql.org>
抄送:wjzeng <wjzeng2012@gmail.com>
主题:Re: 回复:Re: Is it worth pushing conditions to sublink/subplan?
Hi,
On 12/7/21 10:44, 曾文旌(义从) wrote:
Hi Hackers
For my previous proposal, I developed a prototype and passed
regression testing. It works similarly to subquery's qual pushdown.
We know that sublink expands at the beginning of each level of
query. At this stage, The query's conditions and equivalence classes
are not processed. But after generate_base_implied_equalities the
conditions are processed, which is why qual can push down to
subquery but sublink not.My POC implementation chose to delay the sublink expansion in the
SELECT clause (targetList) and where clause. Specifically, it is
delayed after generate_base_implied_equalities. Thus, the equivalent
conditions already established in the Up level query can be easily
obtained in the sublink expansion process (make_subplan).For example, if the up level query has a.id = 10 and the sublink
query has a.id = b.id, then we get b.id = 10 and push it down to the
sublink quey. If b is a partitioned table and is partitioned by id,
then a large number of unrelated subpartitions are pruned out, This
optimizes a significant amount of Planner and SQL execution time,
especially if the partitioned table has a large number of
subpartitions and is what I want.Currently, There were two SQL failures in the regression test,
because the expansion order of sublink was changed, which did not
affect the execution result of SQL.Look forward to your suggestions on this proposal.
I took a quick look, and while I don't see / can't think of any problems
with delaying it until after generating implied equalities, there seems
to be a number of gaps.
Thank you for your attention.
1) Are there any regression tests exercising this modified behavior?
Maybe there are, but if the only changes are due to change in order of
targetlist entries, that doesn't seem like a clear proof.
It'd be good to add a couple tests exercising both the positive and
negative case (i.e. when we can and can't pushdown a qual).
I added several samples to the regress(qual_pushdown_to_sublink.sql).
and I used the partition table to show the plan status of qual being pushed down into sublink.
Hopefully this will help you understand the details of this patch. Later, I will add more cases.
2) apparently, contrib/postgres_fdw does crash like this:
#3 0x000000000077b412 in adjust_appendrel_attrs_mutator
(node=0x13f7ea0, context=0x7fffc3351b30) at appendinfo.c:470
470 Assert(!IsA(node, SubLink));
(gdb) p node
$1 = (Node *) 0x13f7ea0
(gdb) p *node
$2 = {type = T_SubLink}
Backtrace attached.
For the patch attached in the last email, I passed all the tests under src/test/regress.
As you pointed out, there was a problem with regression under contrib(in contrib/postgres_fdw).
This time I fixed it and the current patch (V2) can pass the check-world.
3) various parts of the patch really need at least some comments, like:
- try_push_outer_qual_to_sublink_query really needs some docs
- new stuff at the end of initsplan.c
Ok, I added some comments and will add more. If you have questions about any details,
please point them out directly.
4) generate_base_implied_equalities
shouldn't this
if (ec->ec_processed)
;
really be?
if (ec->ec_processed)
continue;
You are right. I fixed it.
5) I'm not sure why we need the new ec_processed flag.
I did this to eliminate duplicate equalities from the two generate_base_implied_equalities calls
1) I need the base equivalent expression generated after generate_base_implied_equalities,
which is used to pushdown qual to sublink(lazy_process_sublinks)
2) The expansion of sublink may result in an equivalent expression with parameters, such as a = $1,
which needs to deal with the equivalence classes again.
3) So, I added ec_processed and asked to process it again (generate_base_implied_equalities)
after the equivalence class changed (add_eq_member/process_equivalence).
Maybe you have a better suggestion, please let me know.
6) So we now have lazy_process_sublink callback? Does that mean we
expand sublinks in two places - sometimes lazily, sometimes not?
Yes, not all sublink is delayed. Let me explain this:
1) I added a GUC switch enable_lazy_process_sublink. If it is turned off, all lazy process sublink will not happen,
qual pushdown to sublink depend on lazy procee sublink, which means no quals will be pushed down.
2) Even if enable_lazy_process_sublink = true If Query in this level contains some complex features,
sublink in this level query will not try do qual pushdown. (see function query_has_sublink_try_pushdown_qual).
I want to support a minimum subset first. Then consider complex features such as CTE/DML.
3) Finally, under conditions 1 and 2, all kinds of sublink contained in the SELECT clause or
WHERE clause will delays expansion and try pushdown qual. The sublink elsewhere in the SQL statement
does not delay process.
The current status meets my requirements for now. Of course, after this scheme is proved to be feasible, maybe
we can discuss that all sublinks are processed by overall delay, just like qual pushdown to subquery.
thanks
Wenjing
regards
--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
Hi,
+ /* The outer var could exist in any of the upper-level queries to find these roots */
to find these roots -> so find these roots
+ if (has_unexpand_sublink(root) && checkExprHasSubLink(node))
has_unexpand_sublink -> has_unexpanded_sublink
+ if (enable_lazy_process_sublink)
+ return true;
The above can be simplified to:
return enable_lazy_process_sublink;
+ if (checkExprHasSubLink(qual))
+ {
+ qual = lazy_process_sublink_qual(root, qual);
+ newquals = lappend(newquals, qual);
+ }
+ else
+ newquals = lappend(newquals, qual);
Since the lappend() is common to both branches, you can remove the else clause. In the if block, only call lazy_process_sublink_qual().
+ /* under lazy process sublink, parent root may have some data that child not need, so set it to NULL */
+ subroot->join_info_list = NIL;
minor correction to the comment above:
under lazy process sublink, parent root may have some data that child does not need, so set it to NIL
+void
+preprocess_qual_conditions(PlannerInfo *root, Node *jtnode, bool istop)
Please add a comment explaining the meaning of istop.
+ if (istop)
+ f->quals = preprocess_expression_ext(root, f->quals, EXPRKIND_QUAL, false);
+ else
+ f->quals = preprocess_expression(root, f->quals, EXPRKIND_QUAL);
I think the code would be more readable if you replace the preprocess_expression() call in else branch with call to preprocess_expression_ext().
+ context->root->unexpand_sublink_counter++;
unexpand_sublink_counter -> unexpanded_sublink_counter++
For sublink_query_push_qual(), the return at the end is not needed.
For condition_is_safe_pushdown_to_sublink, you can initialize context this way :
+ equal_expr_info_context context = {0};
I don't understand the benefits of doing this. Please give me some hints.
We can also see a number of memset initializations, such as get_range_partbound_string()
+ if (cvar && cvar->varattno > 0 && equal(cvar, var))
+ return true;
The last few lines of condition_is_safe_pushdown_to_sublink() can be written as:
return cvar && cvar->varattno > 0 && equal(cvar, var);
+ if (equal_expr_safety_check(node, &context))
+ {
+ /* It needs to be something like outer var = inner var */
+ if (context.inner_var &&
The nested if blocks can be merged into one if block.
Cheers
HI Zhihong Yu
Thank you for your attention.
Every suggestion you make makes the patch better.
I have completed the v3 patch according to your suggestions.
Looking forward to your feedback.
Wenjing
Attachments:
0001-poc-pushdown-qual-to-sublink-v4.patchapplication/octet-streamDownload
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 296dd75c1b6..22405bef5fc 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -40,8 +40,10 @@
#include "optimizer/paths.h"
#include "optimizer/plancat.h"
#include "optimizer/planner.h"
+#include "optimizer/planmain.h"
#include "optimizer/restrictinfo.h"
#include "optimizer/tlist.h"
+#include "optimizer/subselect.h"
#include "parser/parse_clause.h"
#include "parser/parsetree.h"
#include "partitioning/partbounds.h"
@@ -3895,6 +3897,68 @@ generate_partitionwise_join_paths(PlannerInfo *root, RelOptInfo *rel)
list_free(live_children);
}
+bool
+try_push_outer_qual_to_sublink_query(PlannerInfo *parent, Query *subquery, List *conditions)
+{
+ pushdown_safety_info safetyInfo;
+ ListCell *lc1;
+ bool found = false;
+ bool query_is_pushdown_safe = false;
+
+ if (conditions == NIL)
+ return false;
+
+ memset(&safetyInfo, 0, sizeof(safetyInfo));
+ safetyInfo.unsafeColumns = (bool *)
+ palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
+
+ /* Check whether pushdown qual to sublink query is safe. */
+ query_is_pushdown_safe = subquery_is_pushdown_safe(subquery, subquery, &safetyInfo);
+ pfree(safetyInfo.unsafeColumns);
+ if (!query_is_pushdown_safe)
+ return false;
+
+ /*
+ * Currently, we have some conditional expressions in sublink (out var = local var)
+ * Now, the outer query looks for related equivalent expressions that have been generated (outer var = const).
+ * If out var = local var and outer var = const, then we get local var = const and push it down to sublink
+ */
+ foreach(lc1, conditions)
+ {
+ pushdown_expr_info *expr_info = (pushdown_expr_info *) lfirst(lc1);
+ Index levelsup = 0;
+ RelOptInfo *rel;
+ ListCell *lc2;
+ PlannerInfo *tmproot = parent;
+
+ /* The outer var could exist in any of the upper-level queries so find these roots */
+ for (levelsup = expr_info->outer->varlevelsup - 1; levelsup > 0; levelsup--)
+ tmproot = tmproot->parent_root;
+
+ /* Flatten varLevelsup, for find conditions from BaserestrictInfo. */
+ expr_info->outer->varlevelsup = 0;
+
+ /* Find if there is an available qual in relation of this var from root */
+ rel = find_base_rel(tmproot, expr_info->outer->varno);
+ if (rel == NULL || rel->baserestrictinfo == NULL)
+ continue;
+
+ foreach(lc2, rel->baserestrictinfo)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc2);
+
+ /* Make sure that qual in restrictInfo that is var = const and can safely pushdown */
+ if (condition_is_safe_pushdown_to_sublink(rinfo, expr_info->outer))
+ {
+ /* replace qual expr from outer var = const to var = const and push down to sublink query */
+ sublink_query_push_qual(subquery, (Node *)copyObject(rinfo->clause), expr_info->outer, expr_info->inner);
+ found = true;
+ }
+ }
+ }
+
+ return found;
+}
/*****************************************************************************
* DEBUG SUPPORT
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 6f1abbe47d6..f4aeb716a59 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -388,6 +388,7 @@ process_equivalence(PlannerInfo *root,
restrictinfo->security_level);
ec1->ec_max_security = Max(ec1->ec_max_security,
restrictinfo->security_level);
+ ec1->ec_processed = false;
/* mark the RI as associated with this eclass */
restrictinfo->left_ec = ec1;
restrictinfo->right_ec = ec1;
@@ -450,6 +451,7 @@ process_equivalence(PlannerInfo *root,
ec->ec_min_security = restrictinfo->security_level;
ec->ec_max_security = restrictinfo->security_level;
ec->ec_merged = NULL;
+ ec->ec_processed = false;
em1 = add_eq_member(ec, item1, item1_relids, item1_nullable_relids,
false, item1_type);
em2 = add_eq_member(ec, item2, item2_relids, item2_nullable_relids,
@@ -574,6 +576,7 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids,
ec->ec_relids = bms_add_members(ec->ec_relids, relids);
}
ec->ec_members = lappend(ec->ec_members, em);
+ ec->ec_processed = false;
return em;
}
@@ -711,6 +714,7 @@ get_eclass_for_sort_expr(PlannerInfo *root,
newec->ec_min_security = UINT_MAX;
newec->ec_max_security = 0;
newec->ec_merged = NULL;
+ newec->ec_processed = false;
if (newec->ec_has_volatile && sortref == 0) /* should not happen */
elog(ERROR, "volatile EquivalenceClass has no sortref");
@@ -1114,7 +1118,12 @@ generate_base_implied_equalities(PlannerInfo *root)
* Single-member ECs won't generate any deductions, either here or at
* the join level.
*/
- if (list_length(ec->ec_members) > 1)
+ if (ec->ec_processed)
+ {
+ ec_index++;
+ continue;
+ }
+ else if (list_length(ec->ec_members) > 1)
{
if (ec->ec_has_const)
generate_base_implied_equalities_const(root, ec);
@@ -1151,6 +1160,7 @@ generate_base_implied_equalities(PlannerInfo *root)
rel->has_eclass_joins = true;
}
+ ec->ec_processed = true;
ec_index++;
}
}
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index f6a202d900f..08ca51b9960 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -30,10 +30,12 @@
#include "optimizer/planner.h"
#include "optimizer/prep.h"
#include "optimizer/restrictinfo.h"
+#include "optimizer/subselect.h"
#include "parser/analyze.h"
#include "rewrite/rewriteManip.h"
#include "utils/lsyscache.h"
#include "utils/typcache.h"
+#include "utils/guc.h"
/* These parameters are set by GUC */
int from_collapse_limit;
@@ -80,6 +82,16 @@ static void check_mergejoinable(RestrictInfo *restrictinfo);
static void check_hashjoinable(RestrictInfo *restrictinfo);
static void check_memoizable(RestrictInfo *restrictinfo);
+static void remember_qual_info_for_lazy_process_sublink(PlannerInfo *root,
+ Node *clause,
+ bool below_outer_join,
+ JoinType jointype,
+ Index security_level,
+ Relids qualscope,
+ Relids ojscope,
+ Relids outerjoin_nonnullable,
+ List *postponed_qual_list);
+static void *search_sublink_from_lazy_process_list(PlannerInfo *root, Node *node);
/*****************************************************************************
*
@@ -262,7 +274,16 @@ add_vars_to_targetlist(PlannerInfo *root, List *vars,
else if (IsA(node, PlaceHolderVar))
{
PlaceHolderVar *phv = (PlaceHolderVar *) node;
- PlaceHolderInfo *phinfo = find_placeholder_info(root, phv,
+ PlaceHolderInfo *phinfo = NULL;
+
+ /*
+ * Since there may be an unexpanded sublink in the targetList,
+ * we'll skip it for now. Don't worry let lazy_process_sublinks do it later.
+ */
+ if (has_unexpanded_sublink(root) && checkExprHasSubLink(node))
+ continue;
+
+ phinfo = find_placeholder_info(root, phv,
create_new_ph);
phinfo->ph_needed = bms_add_members(phinfo->ph_needed,
@@ -1621,6 +1642,17 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
Relids nullable_relids;
RestrictInfo *restrictinfo;
+ /* Before lazy transform sublink has not been converted, so backup it */
+ if (checkExprHasSubLink(clause))
+ {
+ remember_qual_info_for_lazy_process_sublink(root, clause, below_outer_join, jointype, security_level,
+ qualscope, ojscope, outerjoin_nonnullable, *postponed_qual_list);
+
+ relids = pull_varnos(root, clause);
+ Assert(bms_is_subset(relids, qualscope));
+ return;
+ }
+
/*
* Retrieve all relids mentioned within the clause.
*/
@@ -2750,3 +2782,184 @@ check_memoizable(RestrictInfo *restrictinfo)
if (OidIsValid(typentry->hash_proc) && OidIsValid(typentry->eq_opr))
restrictinfo->right_hasheqoperator = typentry->eq_opr;
}
+
+/*
+ * query at this level has sublink and It is safe to try lazy process and pushdown qual.
+ * Use a switch to control it. This is a minimal subset, then try to support more scenarios.
+ */
+bool
+query_has_sublink_try_pushdown_qual(PlannerInfo *root)
+{
+ Query *parse = root->parse;
+
+ if (!parse->hasSubLinks)
+ return false;
+
+ if (parse->commandType != CMD_SELECT ||
+ parse->hasWindowFuncs ||
+ parse->hasTargetSRFs ||
+ parse->hasRecursive ||
+ parse->hasModifyingCTE ||
+ parse->hasForUpdate ||
+ parse->hasRowSecurity ||
+ parse->setOperations ||
+ parse->havingQual ||
+ parse->cteList != NIL)
+ return false;
+
+ return lazy_process_sublink;
+}
+
+/*
+ * Handle sublink that is not expanded.
+ * Convert these sublinks to subplans and handles the associated targetList expr and equivalence classes.
+ */
+void
+lazy_process_sublinks(PlannerInfo *root, bool single_result_rte)
+{
+ Query *parse = root->parse;
+ List *tlist_vars;
+
+ /* Exit the function if no unprocessed sublink is recorded. */
+ if (!has_unexpanded_sublink(root))
+ return;
+
+ /* process sublink in targetlist */
+ root->processed_tlist = (List *)SS_process_sublinks(root, (Node *)root->processed_tlist, false, true, true);
+ if (root->query_level > 1)
+ root->processed_tlist = (List *)SS_replace_correlation_vars(root, (Node *)root->processed_tlist);
+
+ /* process sublink in where clause */
+ if (parse->jointree && parse->jointree->quals)
+ {
+ FromExpr *f = parse->jointree;
+ List *newquals = NIL;
+ ListCell *l;
+
+ Assert(IsA(f->quals, List));
+ foreach(l, (List *) f->quals)
+ {
+ Node *qual = (Node *) lfirst(l);
+
+ if (checkExprHasSubLink(qual))
+ qual = lazy_process_sublink_qual(root, qual);
+
+ newquals = lappend(newquals, qual);
+ }
+
+ f->quals = (Node *)newquals;
+ }
+
+ /* process agg functions */
+ if(parse->hasAggs)
+ {
+ preprocess_aggrefs(root, (Node *) root->processed_tlist);
+ preprocess_minmax_aggregates(root, true);
+ }
+
+ /* empty from clause no need prcess targetlist or from clause */
+ if (!single_result_rte)
+ {
+ /* Put the mutated sublink info into the targetList */
+ tlist_vars = pull_var_clause((Node *) root->processed_tlist,
+ PVC_RECURSE_AGGREGATES |
+ PVC_RECURSE_WINDOWFUNCS |
+ PVC_INCLUDE_PLACEHOLDERS);
+
+ if (tlist_vars != NIL)
+ {
+ add_vars_to_targetlist(root, tlist_vars, bms_make_singleton(0), true);
+ list_free(tlist_vars);
+ }
+
+ generate_base_implied_equalities(root);
+ }
+
+ /* Make sure all sublinks are processed. */
+ if (has_unexpanded_sublink(root))
+ elog(ERROR, "sublink is not fully expanded yet");
+
+ return;
+}
+
+typedef struct sublink_node
+{
+ Node *expr;
+ bool below_outer_join;
+ JoinType jointype;
+ Index security_level;
+ Relids qualscope;
+ Relids ojscope;
+ Relids outerjoin_nonnullable;
+ List *postponed_qual_list;
+} sublink_node;
+
+/* Log unexpanded sublink for future do distribute_qual_to_rels in lazy process sublink */
+static void
+remember_qual_info_for_lazy_process_sublink(PlannerInfo *root,
+ Node *clause,
+ bool below_outer_join,
+ JoinType jointype,
+ Index security_level,
+ Relids qualscope,
+ Relids ojscope,
+ Relids outerjoin_nonnullable,
+ List *postponed_qual_list)
+{
+ sublink_node *sublink_info = palloc0(sizeof(sublink_node));
+
+ sublink_info->expr= copyObject(clause);
+ sublink_info->below_outer_join = below_outer_join;
+ sublink_info->jointype = jointype;
+ sublink_info->security_level = security_level;
+ sublink_info->qualscope = bms_copy(qualscope);
+ sublink_info->ojscope = bms_copy(ojscope);
+ sublink_info->outerjoin_nonnullable = bms_copy(outerjoin_nonnullable);
+ sublink_info->postponed_qual_list = list_copy_deep(postponed_qual_list);
+
+ root->unexpanded_sublink_expr_list = lappend(root->unexpanded_sublink_expr_list, sublink_info);
+
+ return;
+}
+
+Node *
+lazy_process_sublink_qual(PlannerInfo *root, Node *node)
+{
+ Node *qual = NULL;
+ sublink_node *sublink_info = NULL;
+
+ qual = SS_process_sublinks(root, node, true, true, true);
+ sublink_info = (sublink_node *)search_sublink_from_lazy_process_list(root, node);
+ if (sublink_info)
+ {
+ List *postponed_qual_list = NIL;
+ distribute_qual_to_rels(root, qual, sublink_info->below_outer_join, sublink_info->jointype, sublink_info->security_level,
+ sublink_info->qualscope, sublink_info->ojscope, sublink_info->outerjoin_nonnullable,
+ &postponed_qual_list);
+
+ Assert(postponed_qual_list == NIL);
+ root->unexpanded_sublink_expr_list = list_delete(root->unexpanded_sublink_expr_list, sublink_info);
+ }
+
+ return qual;
+}
+
+static void *
+search_sublink_from_lazy_process_list(PlannerInfo *root, Node *node)
+{
+ ListCell *lc = NULL;
+ sublink_node *sublink_info = NULL;
+
+ foreach(lc, root->unexpanded_sublink_expr_list)
+ {
+ sublink_node *tmp = lfirst(lc);
+ Assert(tmp->expr);
+ if (equal(tmp->expr, node))
+ {
+ sublink_info = tmp;
+ break;
+ }
+ }
+
+ return (void *)sublink_info;
+}
diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c
index c1634d16669..7f83f58479f 100644
--- a/src/backend/optimizer/plan/planagg.c
+++ b/src/backend/optimizer/plan/planagg.c
@@ -49,7 +49,7 @@
static bool can_minmax_aggs(PlannerInfo *root, List **context);
static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
- Oid eqop, Oid sortop, bool nulls_first);
+ Oid eqop, Oid sortop, bool nulls_first, bool lazy_process_sublink);
static void minmax_qp_callback(PlannerInfo *root, void *extra);
static Oid fetch_agg_sort_op(Oid aggfnoid);
@@ -70,7 +70,7 @@ static Oid fetch_agg_sort_op(Oid aggfnoid);
* root->agginfos, so preprocess_aggrefs() must have been called already, too.
*/
void
-preprocess_minmax_aggregates(PlannerInfo *root)
+preprocess_minmax_aggregates(PlannerInfo *root, bool lazy_process_sublink)
{
Query *parse = root->parse;
FromExpr *jtnode;
@@ -173,9 +173,9 @@ preprocess_minmax_aggregates(PlannerInfo *root)
* FIRST is more likely to be available if the operator is a
* reverse-sort operator, so try that first if reverse.
*/
- if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse))
+ if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse, lazy_process_sublink))
continue;
- if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, !reverse))
+ if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, !reverse, lazy_process_sublink))
continue;
/* No indexable path for this aggregate, so fail */
@@ -315,7 +315,7 @@ can_minmax_aggs(PlannerInfo *root, List **context)
*/
static bool
build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
- Oid eqop, Oid sortop, bool nulls_first)
+ Oid eqop, Oid sortop, bool nulls_first, bool lazy_process_sublink)
{
PlannerInfo *subroot;
Query *parse;
@@ -352,12 +352,23 @@ build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
/* append_rel_list might contain outer Vars? */
subroot->append_rel_list = copyObject(root->append_rel_list);
IncrementVarSublevelsUp((Node *) subroot->append_rel_list, 1, 1);
+
+ if (lazy_process_sublink)
+ {
+ /* under lazy process sublink, parent root may have some data that child does not need, so set it to NIL */
+ subroot->join_info_list = NIL;
+ subroot->eq_classes = NIL;
+ subroot->placeholder_list = NIL;
+ }
+ else
+ {
/* There shouldn't be any OJ info to translate, as yet */
Assert(subroot->join_info_list == NIL);
/* and we haven't made equivalence classes, either */
Assert(subroot->eq_classes == NIL);
/* and we haven't created PlaceHolderInfos, either */
Assert(subroot->placeholder_list == NIL);
+ }
/*----------
* Generate modified query of the form
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index 273ac0acf7e..7042c96b09b 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -102,6 +102,8 @@ query_planner(PlannerInfo *root,
Assert(rte != NULL);
if (rte->rtekind == RTE_RESULT)
{
+ lazy_process_sublinks(root, true);
+
/* Make the RelOptInfo for it directly */
final_rel = build_simple_rel(root, varno, NULL);
@@ -197,6 +199,8 @@ query_planner(PlannerInfo *root,
*/
generate_base_implied_equalities(root);
+ lazy_process_sublinks(root, false);
+
/*
* We have completed merging equivalence sets, so it's now possible to
* generate pathkeys in canonical form; so compute query_pathkeys and
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index bd01ec0526f..0b978106868 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -64,6 +64,7 @@
#include "utils/rel.h"
#include "utils/selfuncs.h"
#include "utils/syscache.h"
+#include "utils/guc.h"
/* GUC parameters */
double cursor_tuple_fraction = DEFAULT_CURSOR_TUPLE_FRACTION;
@@ -128,8 +129,9 @@ typedef struct
/* Local functions */
static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
-static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
+static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode, bool istop);
static void grouping_planner(PlannerInfo *root, double tuple_fraction);
+static Node *preprocess_expression_ext(PlannerInfo *root, Node *expr, int kind, bool process_sublink);
static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root);
static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
int *tleref_to_colnum_map);
@@ -641,6 +643,8 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
root->wt_param_id = -1;
root->non_recursive_path = NULL;
root->partColsUpdated = false;
+ root->unexpanded_sublink_counter = 0;
+ root->unexpanded_sublink_expr_list = NIL;
/*
* If there is a WITH list, process each WITH query and either convert it
@@ -784,8 +788,8 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
* part of the targetlist.
*/
parse->targetList = (List *)
- preprocess_expression(root, (Node *) parse->targetList,
- EXPRKIND_TARGET);
+ preprocess_expression_ext(root, (Node *) parse->targetList,
+ EXPRKIND_TARGET, false);
/* Constant-folding might have removed all set-returning functions */
if (parse->hasTargetSRFs)
@@ -807,7 +811,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
preprocess_expression(root, (Node *) parse->returningList,
EXPRKIND_TARGET);
- preprocess_qual_conditions(root, (Node *) parse->jointree);
+ preprocess_qual_conditions(root, (Node *) parse->jointree, true);
parse->havingQual = preprocess_expression(root, parse->havingQual,
EXPRKIND_QUAL);
@@ -1049,14 +1053,24 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
return root;
}
+static Node *
+preprocess_expression(PlannerInfo *root, Node *expr, int kind)
+{
+ return preprocess_expression_ext(root, expr, kind, true);
+}
+
/*
* preprocess_expression
* Do subquery_planner's preprocessing work for an expression,
* which can be a targetlist, a WHERE clause (including JOIN/ON
* conditions), a HAVING clause, or a few other things.
+ *
+ * if process_sublink = false
+ * This means that sublink in an expression will try to defer processing.
+ * see lazy_process_sublinks()
*/
static Node *
-preprocess_expression(PlannerInfo *root, Node *expr, int kind)
+preprocess_expression_ext(PlannerInfo *root, Node *expr, int kind, bool process_sublink)
{
/*
* Fall out quickly if expression is empty. This occurs often enough to
@@ -1129,7 +1143,7 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind)
/* Expand SubLinks to SubPlans */
if (root->parse->hasSubLinks)
- expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
+ expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL), false, process_sublink);
/*
* XXX do not insert anything here unless you have grokked the comments in
@@ -1158,7 +1172,7 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind)
* preprocessing work on each qual condition found therein.
*/
static void
-preprocess_qual_conditions(PlannerInfo *root, Node *jtnode)
+preprocess_qual_conditions(PlannerInfo *root, Node *jtnode, bool istop)
{
if (jtnode == NULL)
return;
@@ -1172,17 +1186,24 @@ preprocess_qual_conditions(PlannerInfo *root, Node *jtnode)
ListCell *l;
foreach(l, f->fromlist)
- preprocess_qual_conditions(root, lfirst(l));
+ preprocess_qual_conditions(root, lfirst(l), false);
- f->quals = preprocess_expression(root, f->quals, EXPRKIND_QUAL);
+ /*
+ * istop = true means that this is qual in the WHERE clause
+ * istop = false means that this is the join qual on the Join on clause
+ * For now, only sublink on the WHERE clause can be deferred,
+ */
+ if (istop)
+ f->quals = preprocess_expression_ext(root, f->quals, EXPRKIND_QUAL, false);
+ else
+ f->quals = preprocess_expression_ext(root, f->quals, EXPRKIND_QUAL, true);
}
else if (IsA(jtnode, JoinExpr))
{
JoinExpr *j = (JoinExpr *) jtnode;
- preprocess_qual_conditions(root, j->larg);
- preprocess_qual_conditions(root, j->rarg);
-
+ preprocess_qual_conditions(root, j->larg, false);
+ preprocess_qual_conditions(root, j->rarg, false);
j->quals = preprocess_expression(root, j->quals, EXPRKIND_QUAL);
}
else
@@ -1384,11 +1405,11 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
* pathtargets, else some copies of the Aggref nodes might escape
* being marked.
*/
- if (parse->hasAggs)
- {
+ if (parse->hasAggs && !has_unexpanded_sublink(root))
preprocess_aggrefs(root, (Node *) root->processed_tlist);
+
+ if (parse->hasAggs)
preprocess_aggrefs(root, (Node *) parse->havingQual);
- }
/*
* Locate any window functions in the tlist. (We don't need to look
@@ -1412,8 +1433,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
* that is needed in MIN/MAX-optimizable cases will have to be
* duplicated in planagg.c.
*/
- if (parse->hasAggs)
- preprocess_minmax_aggregates(root);
+ if (parse->hasAggs && !has_unexpanded_sublink(root))
+ preprocess_minmax_aggregates(root, false);
/*
* Figure out whether there's a hard limit on the number of rows that
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index c9f7a09d102..aadf1fd52e0 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -32,11 +32,13 @@
#include "optimizer/planner.h"
#include "optimizer/prep.h"
#include "optimizer/subselect.h"
+#include "optimizer/paths.h"
#include "parser/parse_relation.h"
#include "rewrite/rewriteManip.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
+#include "utils/ruleutils.h"
typedef struct convert_testexpr_context
@@ -49,6 +51,8 @@ typedef struct process_sublinks_context
{
PlannerInfo *root;
bool isTopQual;
+ bool lazy_process;
+ bool force_process;
} process_sublinks_context;
typedef struct finalize_primnode_context
@@ -65,6 +69,13 @@ typedef struct inline_cte_walker_context
Query *ctequery; /* query to substitute */
} inline_cte_walker_context;
+typedef struct equal_expr_info_context
+{
+ bool has_unexpected_expr;
+ bool has_const;
+ Var *outer_var;
+ Var *inner_var;
+} equal_expr_info_context;
static Node *build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
List *plan_params,
@@ -105,6 +116,11 @@ static Bitmapset *finalize_plan(PlannerInfo *root,
static bool finalize_primnode(Node *node, finalize_primnode_context *context);
static bool finalize_agg_primnode(Node *node, finalize_primnode_context *context);
+static Node *replace_vars_mutator(Node *node, void *context);
+static List *find_equal_conditions_contain_uplevelvar_in_sublink_query(Query *orig_subquery);
+static bool equal_expr_analyze_walker(Node *node, void *context);
+static bool equal_expr_safety_check(Node *node, equal_expr_info_context *context);
+
/*
* Get the datatype/typmod/collation of the first column of the plan's output.
@@ -162,7 +178,7 @@ get_first_col_type(Plan *plan, Oid *coltype, int32 *coltypmod,
static Node *
make_subplan(PlannerInfo *root, Query *orig_subquery,
SubLinkType subLinkType, int subLinkId,
- Node *testexpr, bool isTopQual)
+ Node *testexpr, bool isTopQual, bool lazy_process)
{
Query *subquery;
bool simple_exists = false;
@@ -173,6 +189,8 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
Plan *plan;
List *plan_params;
Node *result;
+ Query *optimized_subquery = NULL;
+ Query *optimized_subquery_copy = NULL;
/*
* Copy the source Query node. This is a quick and dirty kluge to resolve
@@ -218,8 +236,32 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
/* plan_params should not be in use in current query level */
Assert(root->plan_params == NIL);
+ if (lazy_process)
+ {
+ List *conditions = NIL;
+ Query *subquery_copy = copyObject(orig_subquery);
+
+ /*
+ * Search sublink query.
+ * If the query contains an outer condition equivalent expression,
+ * this means that there may be external conditions that can be pushed down to optimize the subquery.
+ */
+ conditions = find_equal_conditions_contain_uplevelvar_in_sublink_query(subquery_copy);
+ if (conditions)
+ {
+ /* Search outer queries, and if relevant equivalent expressions are found, push them down into subqueries. */
+ if (try_push_outer_qual_to_sublink_query(root, subquery_copy, conditions))
+ {
+ optimized_subquery = subquery_copy;
+ optimized_subquery_copy = copyObject(optimized_subquery);
+ }
+ list_free(conditions);
+ }
+ }
+
/* Generate Paths for the subquery */
- subroot = subquery_planner(root->glob, subquery,
+ subroot = subquery_planner(root->glob,
+ (optimized_subquery != NULL) ? optimized_subquery : subquery,
root,
false, tuple_fraction);
@@ -256,7 +298,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
List *paramIds;
/* Make a second copy of the original subquery */
- subquery = copyObject(orig_subquery);
+ subquery = copyObject((optimized_subquery_copy != NULL) ? optimized_subquery_copy : orig_subquery);
/* and re-simplify */
simple_exists = simplify_EXISTS_query(root, subquery);
Assert(simple_exists);
@@ -365,7 +407,7 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
*/
if (IsA(arg, PlaceHolderVar) ||
IsA(arg, Aggref))
- arg = SS_process_sublinks(root, arg, false);
+ arg = SS_process_sublinks(root, arg, false, false, true);
splan->parParam = lappend_int(splan->parParam, pitem->paramId);
splan->args = lappend(splan->args, arg);
@@ -1915,12 +1957,14 @@ replace_correlation_vars_mutator(Node *node, PlannerInfo *root)
* not distinguish FALSE from UNKNOWN return values.
*/
Node *
-SS_process_sublinks(PlannerInfo *root, Node *expr, bool isQual)
+SS_process_sublinks(PlannerInfo *root, Node *expr, bool isQual, bool lazy_process, bool force_process)
{
process_sublinks_context context;
context.root = root;
context.isTopQual = isQual;
+ context.lazy_process = lazy_process;
+ context.force_process = force_process;
return process_sublinks_mutator(expr, &context);
}
@@ -1930,20 +1974,34 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context)
process_sublinks_context locContext;
locContext.root = context->root;
+ locContext.lazy_process = context->lazy_process;
+ locContext.force_process = context->force_process;
if (node == NULL)
return NULL;
if (IsA(node, SubLink))
{
SubLink *sublink = (SubLink *) node;
- Node *testexpr;
/*
* First, recursively process the lefthand-side expressions, if any.
* They're not top-level anymore.
*/
locContext.isTopQual = false;
- testexpr = process_sublinks_mutator(sublink->testexpr, &locContext);
+ locContext.lazy_process = context->lazy_process;
+ locContext.force_process = context->force_process;
+ sublink->testexpr = process_sublinks_mutator(sublink->testexpr, &locContext);
+
+ if (!context->force_process &&
+ query_has_sublink_try_pushdown_qual(context->root))
+ {
+ Assert(context->lazy_process == false);
+ context->root->unexpanded_sublink_counter++;
+ return node;
+ }
+
+ if (context->lazy_process)
+ context->root->unexpanded_sublink_counter--;
/*
* Now build the SubPlan node and make the expr to return.
@@ -1952,8 +2010,8 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context)
(Query *) sublink->subselect,
sublink->subLinkType,
sublink->subLinkId,
- testexpr,
- context->isTopQual);
+ sublink->testexpr,
+ context->isTopQual, locContext.lazy_process);
}
/*
@@ -1978,8 +2036,8 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context)
* the very routine that creates 'em to begin with). We shouldn't find
* ourselves invoked directly on a Query, either.
*/
- Assert(!IsA(node, SubPlan));
- Assert(!IsA(node, AlternativeSubPlan));
+ Assert(!IsA(node, SubPlan) || context->lazy_process);
+ Assert(!IsA(node, AlternativeSubPlan) || context->lazy_process);
Assert(!IsA(node, Query));
/*
@@ -2003,6 +2061,8 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context)
/* Still at qual top-level */
locContext.isTopQual = context->isTopQual;
+ locContext.lazy_process = context->lazy_process;
+ locContext.force_process = context->force_process;
foreach(l, ((BoolExpr *) node)->args)
{
@@ -2024,6 +2084,8 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context)
/* Still at qual top-level */
locContext.isTopQual = context->isTopQual;
+ locContext.lazy_process = context->lazy_process;
+ locContext.force_process = context->force_process;
foreach(l, ((BoolExpr *) node)->args)
{
@@ -2989,3 +3051,184 @@ SS_make_initplan_from_plan(PlannerInfo *root,
/* Set costs of SubPlan using info from the plan tree */
cost_subplan(subroot, node, plan);
}
+
+void
+sublink_query_push_qual(Query *subquery, Node *qual, Var *outer, Var *inner)
+{
+ pushdown_expr_info context;
+ Node *new_qual;
+
+ context.outer = outer;
+ context.inner = inner;
+
+ new_qual = expression_tree_mutator(qual, replace_vars_mutator, (void *)&context);
+ subquery->jointree->quals = make_and_qual(subquery->jointree->quals, new_qual);
+}
+
+static Node *
+replace_vars_mutator(Node *node, void *context)
+{
+ pushdown_expr_info *info = (pushdown_expr_info *) context;
+
+ if (IsA(node, Var) && equal(node, (Node *)info->outer))
+ {
+ node = copyObject((Node *)info->inner);
+ return node;
+ }
+
+ return expression_tree_mutator(node, replace_vars_mutator, context);
+}
+
+/* condition has to be (var = const value) */
+bool
+condition_is_safe_pushdown_to_sublink(RestrictInfo *rinfo, Var *var)
+{
+ Node *clause = (Node *) rinfo->clause;
+ equal_expr_info_context context;
+
+ if (clause == NULL)
+ return false;
+
+ if (rinfo->pseudoconstant)
+ return false;
+
+ if (contain_leaked_vars(clause))
+ return false;
+
+ memset(&context, 0, sizeof(equal_expr_info_context));
+ if (equal_expr_safety_check(clause, &context))
+ {
+ /*
+ * RestrictInfo clause must be like inner var = const.
+ * It cannot contain any out var and references the same columns as var.
+ * Finally, system columns are not supported for now.
+ */
+ if (context.inner_var &&
+ context.outer_var == NULL &&
+ context.has_unexpected_expr == false &&
+ context.has_const &&
+ context.inner_var->varattno > 0 &&
+ equal(context.inner_var, var))
+ return true;
+ }
+
+ return false;
+}
+
+static List *
+find_equal_conditions_contain_uplevelvar_in_sublink_query(Query *orig_subquery)
+{
+ Node *quals;
+ ListCell *lc;
+ List *conditions = NIL;
+
+ if (orig_subquery->jointree == NULL ||
+ orig_subquery->jointree->quals == NULL)
+ return NIL;
+
+ quals = copyObject(orig_subquery->jointree->quals);
+ quals = (Node *) canonicalize_qual((Expr *) quals, false);
+ quals = (Node *) make_ands_implicit((Expr *) quals);
+
+ foreach(lc, (List *)quals)
+ {
+ Node *node = (Node *) lfirst(lc);
+ equal_expr_info_context context;
+ pushdown_expr_info *expr_info = NULL;
+
+ memset(&context, 0, sizeof(equal_expr_info_context));
+ if (equal_expr_safety_check(node, &context))
+ {
+ /* It needs to be something like outer var = inner var */
+ if (context.inner_var &&
+ context.outer_var &&
+ context.has_unexpected_expr == false &&
+ context.has_const == false)
+ {
+ expr_info = palloc0(sizeof(pushdown_expr_info));
+ expr_info->inner = context.inner_var;
+ expr_info->outer = context.outer_var;
+ conditions = lappend(conditions, expr_info);
+ }
+ }
+ }
+
+ return conditions;
+}
+
+static bool
+equal_expr_safety_check(Node *node, equal_expr_info_context *context)
+{
+ const char *op;
+
+ if (!IsA(node, OpExpr))
+ return false;
+
+ op = get_simple_binary_op_name((OpExpr *) node);
+ if (op == NULL || strcmp(op, "=") != 0)
+ return false;
+
+ if (contain_volatile_functions(node) ||
+ contain_mutable_functions(node) ||
+ contain_nonstrict_functions(node))
+ return false;
+
+ equal_expr_analyze_walker(node, context);
+
+ return true;
+}
+
+static bool
+equal_expr_analyze_walker(Node *node, void *context)
+{
+ equal_expr_info_context *info = (equal_expr_info_context *)context;
+
+ if (node == NULL)
+ return false;
+
+ switch (nodeTag(node))
+ {
+ case T_Var:
+ {
+ if (((Var *) node)->varlevelsup > 0)
+ {
+ if (info->outer_var)
+ info->has_unexpected_expr = true;
+ else
+ info->outer_var = (Var *)copyObject(node);
+
+ return info->has_unexpected_expr;
+ }
+ else
+ {
+ if (info->inner_var)
+ info->has_unexpected_expr = true;
+ else
+ info->inner_var = (Var *)copyObject(node);
+
+ return info->has_unexpected_expr;
+ }
+ }
+ break;
+
+ case T_Const:
+ {
+ info->has_const = true;
+ return false;
+ }
+ break;
+
+ case T_Param:
+ case T_FuncExpr:
+ {
+ info->has_unexpected_expr = true;
+ return true;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return expression_tree_walker(node, equal_expr_analyze_walker, context);
+}
diff --git a/src/backend/optimizer/util/placeholder.c b/src/backend/optimizer/util/placeholder.c
index 1c4202d864c..0e11ed22522 100644
--- a/src/backend/optimizer/util/placeholder.c
+++ b/src/backend/optimizer/util/placeholder.c
@@ -22,6 +22,7 @@
#include "optimizer/placeholder.h"
#include "optimizer/planmain.h"
#include "utils/lsyscache.h"
+#include "rewrite/rewriteManip.h"
/* Local functions */
static void find_placeholders_recurse(PlannerInfo *root, Node *jtnode);
@@ -87,6 +88,10 @@ find_placeholder_info(PlannerInfo *root, PlaceHolderVar *phv,
if (!create_new_ph)
elog(ERROR, "too late to create a new PlaceHolderInfo");
+ /* Unprocessed sublink is not accepted, it needs to go through SS_process_sublinks first */
+ if (checkExprHasSubLink((Node *)phv))
+ elog(ERROR, "can not add sublink to placeholder_list");
+
phinfo = makeNode(PlaceHolderInfo);
phinfo->phid = phv->phid;
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 8da525c715b..f847e898e68 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -429,7 +429,6 @@ static void resolve_special_varno(Node *node, deparse_context *context,
static Node *find_param_referent(Param *param, deparse_context *context,
deparse_namespace **dpns_p, ListCell **ancestor_cell_p);
static void get_parameter(Param *param, deparse_context *context);
-static const char *get_simple_binary_op_name(OpExpr *expr);
static bool isSimpleNode(Node *node, Node *parentNode, int prettyFlags);
static void appendContextKeyword(deparse_context *context, const char *str,
int indentBefore, int indentAfter, int indentPlus);
@@ -7983,7 +7982,7 @@ get_parameter(Param *param, deparse_context *context)
* helper function for isSimpleNode
* will return single char binary operator name, or NULL if it's not
*/
-static const char *
+const char *
get_simple_binary_op_name(OpExpr *expr)
{
List *args = expr->args;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 7b030463013..cd4f6fd51b4 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -684,6 +684,7 @@ static char *recovery_target_lsn_string;
/* should be static, but commands/variable.c needs to get at this */
char *role_string;
+bool lazy_process_sublink = true;
/*
* Displayable names for context types (enum GucContext)
@@ -973,6 +974,17 @@ static const unit_conversion time_unit_conversion_table[] =
static struct config_bool ConfigureNamesBool[] =
{
+ {
+ {"lazy_process_sublink", PGC_USERSET, QUERY_TUNING_METHOD,
+ gettext_noop("enable lazy process sublink."),
+ NULL,
+ GUC_EXPLAIN
+ },
+ &lazy_process_sublink,
+ true,
+ NULL, NULL, NULL
+ },
+
{
{"enable_seqscan", PGC_USERSET, QUERY_TUNING_METHOD,
gettext_noop("Enables the planner's use of sequential-scan plans."),
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 324d92880b5..fd8f6c995d6 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -377,6 +377,9 @@ struct PlannerInfo
/* Does this query modify any partition key columns? */
bool partColsUpdated;
+
+ int unexpanded_sublink_counter;
+ List *unexpanded_sublink_expr_list;
};
@@ -995,6 +998,7 @@ typedef struct EquivalenceClass
bool ec_has_volatile; /* the (sole) member is a volatile expr */
bool ec_below_outer_join; /* equivalence applies below an OJ */
bool ec_broken; /* failed to generate needed clauses? */
+ bool ec_processed;
Index ec_sortref; /* originating sortclause label, or 0 */
Index ec_min_security; /* minimum security_level in ec_sources */
Index ec_max_security; /* maximum security_level in ec_sources */
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index f1d111063c2..425b5c68131 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -25,6 +25,12 @@ extern PGDLLIMPORT int geqo_threshold;
extern PGDLLIMPORT int min_parallel_table_scan_size;
extern PGDLLIMPORT int min_parallel_index_scan_size;
+typedef struct pushdown_expr_info
+{
+ Var *outer;
+ Var *inner;
+} pushdown_expr_info;
+
/* Hook for plugins to get control in set_rel_pathlist() */
typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,
RelOptInfo *rel,
@@ -62,7 +68,7 @@ extern void create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel,
Path *bitmapqual);
extern void generate_partitionwise_join_paths(PlannerInfo *root,
RelOptInfo *rel);
-
+extern bool try_push_outer_qual_to_sublink_query(PlannerInfo *parent, Query *subquery, List *conditions);
#ifdef OPTIMIZER_DEBUG
extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel);
#endif
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index bf1adfc52ac..784dfbfc42e 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -28,12 +28,13 @@ typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra);
* prototypes for plan/planmain.c
*/
extern RelOptInfo *query_planner(PlannerInfo *root,
- query_pathkeys_callback qp_callback, void *qp_extra);
+ query_pathkeys_callback qp_callback,
+ void *qp_extra);
/*
* prototypes for plan/planagg.c
*/
-extern void preprocess_minmax_aggregates(PlannerInfo *root);
+extern void preprocess_minmax_aggregates(PlannerInfo *root, bool lazy_process_sublink);
/*
* prototypes for plan/createplan.c
@@ -67,6 +68,8 @@ extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount,
extern int from_collapse_limit;
extern int join_collapse_limit;
+#define has_unexpanded_sublink(root) ((root)->unexpanded_sublink_counter != 0)
+
extern void add_base_rels_to_query(PlannerInfo *root, Node *jtnode);
extern void add_other_rels_to_query(PlannerInfo *root);
extern void build_base_rel_tlists(PlannerInfo *root, List *final_tlist);
@@ -96,6 +99,9 @@ extern RestrictInfo *build_implied_join_equality(PlannerInfo *root,
Relids nullable_relids,
Index security_level);
extern void match_foreign_keys_to_quals(PlannerInfo *root);
+extern void lazy_process_sublinks(PlannerInfo *root, bool single_result_rte);
+extern bool query_has_sublink_try_pushdown_qual(PlannerInfo *root);
+extern Node *lazy_process_sublink_qual(PlannerInfo *root, Node *node);
/*
* prototypes for plan/analyzejoins.c
diff --git a/src/include/optimizer/subselect.h b/src/include/optimizer/subselect.h
index 059bdf941ef..396c4c6117e 100644
--- a/src/include/optimizer/subselect.h
+++ b/src/include/optimizer/subselect.h
@@ -25,7 +25,7 @@ extern JoinExpr *convert_EXISTS_sublink_to_join(PlannerInfo *root,
bool under_not,
Relids available_rels);
extern Node *SS_replace_correlation_vars(PlannerInfo *root, Node *expr);
-extern Node *SS_process_sublinks(PlannerInfo *root, Node *expr, bool isQual);
+extern Node *SS_process_sublinks(PlannerInfo *root, Node *expr, bool isQual, bool lazy_process, bool force_process);
extern void SS_identify_outer_params(PlannerInfo *root);
extern void SS_charge_for_initplans(PlannerInfo *root, RelOptInfo *final_rel);
extern void SS_attach_initplans(PlannerInfo *root, Plan *plan);
@@ -36,5 +36,7 @@ extern Param *SS_make_initplan_output_param(PlannerInfo *root,
extern void SS_make_initplan_from_plan(PlannerInfo *root,
PlannerInfo *subroot, Plan *plan,
Param *prm);
+extern bool condition_is_safe_pushdown_to_sublink(RestrictInfo *rinfo, Var *var);
+extern void sublink_query_push_qual(Query *subquery, Node *qual, Var *var, Var *replace);
#endif /* SUBSELECT_H */
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index aa18d304ac0..92bfc1b806e 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -288,6 +288,8 @@ extern int tcp_user_timeout;
extern bool trace_sort;
#endif
+extern bool lazy_process_sublink;
+
/*
* Functions exported by guc.c
*/
diff --git a/src/include/utils/ruleutils.h b/src/include/utils/ruleutils.h
index d333e5e8a56..d4ccca3fe3c 100644
--- a/src/include/utils/ruleutils.h
+++ b/src/include/utils/ruleutils.h
@@ -42,5 +42,6 @@ extern char *generate_opclass_name(Oid opclass);
extern char *get_range_partbound_string(List *bound_datums);
extern char *pg_get_statisticsobjdef_string(Oid statextid);
+extern const char *get_simple_binary_op_name(OpExpr *expr);
#endif /* RULEUTILS_H */
diff --git a/src/test/regress/expected/join_hash.out b/src/test/regress/expected/join_hash.out
index 3a91c144a27..232ee6d15a1 100644
--- a/src/test/regress/expected/join_hash.out
+++ b/src/test/regress/expected/join_hash.out
@@ -926,9 +926,9 @@ WHERE
-> Result
Output: (hjtest_1.b * 5)
-> Hash
- Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b
+ Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.b, hjtest_2.id, hjtest_2.c
-> Seq Scan on public.hjtest_2
- Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b
+ Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.b, hjtest_2.id, hjtest_2.c
Filter: ((SubPlan 5) < 55)
SubPlan 5
-> Result
@@ -974,7 +974,7 @@ WHERE
Hash Cond: (((SubPlan 1) = hjtest_1.id) AND ((SubPlan 3) = (SubPlan 2)))
Join Filter: (hjtest_1.a <> hjtest_2.b)
-> Seq Scan on public.hjtest_2
- Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b
+ Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.b, hjtest_2.id, hjtest_2.c
Filter: ((SubPlan 5) < 55)
SubPlan 5
-> Result
diff --git a/src/test/regress/expected/qual_pushdown_to_sublink.out b/src/test/regress/expected/qual_pushdown_to_sublink.out
new file mode 100644
index 00000000000..b4d68ac3a12
--- /dev/null
+++ b/src/test/regress/expected/qual_pushdown_to_sublink.out
@@ -0,0 +1,221 @@
+CREATE SCHEMA IF NOT EXISTS test_push_qual_to_sublink;
+SET search_path=test_push_qual_to_sublink,sys;
+show lazy_process_sublink;
+ lazy_process_sublink
+----------------------
+ on
+(1 row)
+
+create table ab (a int not null, b int not null) partition by list (a);
+create table ab_a2 partition of ab for values in(2) partition by list (b);
+create table ab_a2_b1 partition of ab_a2 for values in (1);
+create table ab_a2_b2 partition of ab_a2 for values in (2);
+create table ab_a2_b3 partition of ab_a2 for values in (3);
+create table ab_a1 partition of ab for values in(1) partition by list (b);
+create table ab_a1_b1 partition of ab_a1 for values in (1);
+create table ab_a1_b2 partition of ab_a1 for values in (2);
+create table ab_a1_b3 partition of ab_a1 for values in (3);
+INSERT INTO ab VALUES (1,1);
+INSERT INTO ab VALUES (1,2);
+INSERT INTO ab VALUES (1,3);
+INSERT INTO ab VALUES (2,1);
+INSERT INTO ab VALUES (2,2);
+INSERT INTO ab VALUES (2,3);
+--1 sublink in select clause can do pushdown qual(a=1 and b=1)
+EXPLAIN (VERBOSE, COSTS OFF) SELECT
+y.a, (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b) AS b
+FROM ab y WHERE a = 1 AND b = 1;
+ QUERY PLAN
+----------------------------------------------------------------
+ Seq Scan on test_push_qual_to_sublink.ab_a1_b1 y
+ Output: y.a, (SubPlan 1)
+ Filter: ((y.a = 1) AND (y.b = 1))
+ SubPlan 1
+ -> Result
+ Output: x.b
+ One-Time Filter: ((y.b = 1) AND (y.a = 1))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 x
+ Output: x.b
+ Filter: ((x.b = 1) AND (x.a = 1))
+(10 rows)
+
+SELECT
+y.a, (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b) AS b
+FROM ab y WHERE a = 1 AND b = 1;
+ a | b
+---+---
+ 1 | 1
+(1 row)
+
+--2 sublink in where clause can do pushdown qual(a=1 and b=1)
+EXPLAIN (VERBOSE, COSTS OFF) SELECT y.a FROM ab y
+WHERE a = 1 AND b = 1 AND a in (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b);
+ QUERY PLAN
+----------------------------------------------------------------
+ Seq Scan on test_push_qual_to_sublink.ab_a1_b1 y
+ Output: y.a
+ Filter: ((y.a = 1) AND (y.b = 1) AND (SubPlan 1))
+ SubPlan 1
+ -> Result
+ Output: x.b
+ One-Time Filter: ((y.b = 1) AND (y.a = 1))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 x
+ Output: x.b
+ Filter: ((x.b = 1) AND (x.a = 1))
+(10 rows)
+
+SELECT y.a FROM ab y
+WHERE a = 1 AND b = 1 AND a in (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b);
+ a
+---
+ 1
+(1 row)
+
+--3 Nested sublink also supports pushdown qual
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT a.a, a.b,
+(SELECT max(b.a) AS max
+ FROM ab b, ab c
+ WHERE b.a=c.a AND b.b=c.b AND b.a=a.a AND b.b=a.b AND
+ (EXISTS
+ (SELECT a1.a
+ FROM ab a1
+ WHERE
+ a1.a = a.a AND --from uplevel 1
+ a1.b = b.b AND --frem uplevel 2
+ clock_timestamp() > '2020-12-11' --Keep sublink not eliminated
+ )
+ )
+) AS c
+FROM ab a WHERE a.a=1 AND a.b=1;
+ QUERY PLAN
+------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Seq Scan on test_push_qual_to_sublink.ab_a1_b1 a
+ Output: a.a, a.b, (SubPlan 2)
+ Filter: ((a.a = 1) AND (a.b = 1))
+ SubPlan 2
+ -> Aggregate
+ Output: max(b.a)
+ -> Result
+ Output: b.a
+ One-Time Filter: ((a.b = 1) AND (a.a = 1))
+ -> Nested Loop
+ Output: b.a
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 b
+ Output: b.a, b.b
+ Filter: ((b.b = 1) AND (b.a = 1) AND (SubPlan 1))
+ SubPlan 1
+ -> Result
+ Output: a1.a
+ One-Time Filter: ((b.b = 1) AND (a.a = 1))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 a1
+ Output: a1.a
+ Filter: ((a1.b = 1) AND (a1.a = 1) AND (clock_timestamp() > 'Fri Dec 11 00:00:00 2020 PST'::timestamp with time zone))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 c
+ Output: c.a, c.b
+ Filter: ((c.b = 1) AND (c.a = 1))
+(24 rows)
+
+--4 This feature does not conflict with pullUp sublink
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT a.a, a.b
+FROM ab a
+WHERE EXISTS (SELECT b.a
+ FROM ab b, ab c
+ WHERE b.a=c.a AND b.b=c.b AND b.a=a.a AND b.b=a.b AND
+ (EXISTS
+ (SELECT a1.a
+ FROM ab a1
+ WHERE a1.a = b.a AND a1.b = b.b
+ )
+ )
+) AND
+a.a=1 AND a.b=1;
+ QUERY PLAN
+--------------------------------------------------------------------
+ Nested Loop Semi Join
+ Output: a.a, a.b
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 a
+ Output: a.a, a.b
+ Filter: ((a.a = 1) AND (a.b = 1))
+ -> Nested Loop Semi Join
+ Output: b.a, b.b
+ -> Nested Loop
+ Output: b.a, b.b
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 b
+ Output: b.a, b.b
+ Filter: ((b.b = 1) AND (b.a = 1))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 c
+ Output: c.a, c.b
+ Filter: ((c.b = 1) AND (c.a = 1))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 a1
+ Output: a1.a, a1.b
+ Filter: ((a1.b = 1) AND (a1.a = 1))
+(18 rows)
+
+--5 aggrefs with multiple agglevelsup
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT
+ (SELECT
+ (SELECT sum(foo.a + bar.b) FROM ab jazz WHERE jazz.a=foo.a AND jazz.b=foo.b)
+ FROM ab bar WHERE bar.a=foo.a AND bar.b=foo.b
+ ) FROM ab foo WHERE foo.a=1 AND foo.b=1 GROUP BY a, b;
+ QUERY PLAN
+---------------------------------------------------------------------------
+ Group
+ Output: (SubPlan 2), foo.a, foo.b
+ Group Key: foo.a, foo.b
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 foo
+ Output: foo.a, foo.b
+ Filter: ((foo.a = 1) AND (foo.b = 1))
+ SubPlan 2
+ -> Aggregate
+ Output: (SubPlan 1)
+ -> Result
+ Output: bar.b
+ One-Time Filter: ((foo.b = 1) AND (foo.a = 1))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 bar
+ Output: bar.b
+ Filter: ((bar.b = 1) AND (bar.a = 1))
+ SubPlan 1
+ -> Result
+ Output: sum((foo.a + bar.b))
+ One-Time Filter: ((foo.b = 1) AND (foo.a = 1))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 jazz
+ Filter: ((jazz.b = 1) AND (jazz.a = 1))
+(21 rows)
+
+--6 sublink in join on clause can not do pushdown
+EXPLAIN (VERBOSE, COSTS OFF) SELECT y.a
+FROM ab y JOIN ab z on ((y.a=z.a) AND (y.b=z.b) AND exists (SELECT count(*) FROM ab x WHERE x.a=y.a AND x.b=y.b))
+WHERE y.a = 1 AND y.b = 1;
+ QUERY PLAN
+------------------------------------------------------------------------------
+ Nested Loop
+ Output: y.a
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 y
+ Output: y.a, y.b
+ Filter: ((y.b = 1) AND (y.a = 1) AND (SubPlan 1))
+ SubPlan 1
+ -> Aggregate
+ Output: count(*)
+ -> Append
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 x_1
+ Filter: ((x_1.a = y.a) AND (x_1.b = y.b))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b2 x_2
+ Filter: ((x_2.a = y.a) AND (x_2.b = y.b))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b3 x_3
+ Filter: ((x_3.a = y.a) AND (x_3.b = y.b))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a2_b1 x_4
+ Filter: ((x_4.a = y.a) AND (x_4.b = y.b))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a2_b2 x_5
+ Filter: ((x_5.a = y.a) AND (x_5.b = y.b))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a2_b3 x_6
+ Filter: ((x_6.a = y.a) AND (x_6.b = y.b))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 z
+ Output: z.a, z.b
+ Filter: ((z.b = 1) AND (z.a = 1))
+(24 rows)
+
+DROP SCHEMA test_push_qual_to_sublink CASCADE;
+NOTICE: drop cascades to table ab
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 4e8ddc70613..2df4d6e15b5 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -1063,7 +1063,7 @@ where o.ten = 0;
SubPlan 1
-> Seq Scan on public.int4_tbl
Output: int4_tbl.f1
- Filter: (int4_tbl.f1 <= $0)
+ Filter: (int4_tbl.f1 <= $1)
(14 rows)
select sum(ss.tst::int) from
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 5b0c73d7e37..3060801f2f1 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -134,3 +134,4 @@ test: fast_default
# run stats by itself because its delay may be insufficient under heavy load
test: stats
+test: qual_pushdown_to_sublink
diff --git a/src/test/regress/sql/qual_pushdown_to_sublink.sql b/src/test/regress/sql/qual_pushdown_to_sublink.sql
new file mode 100644
index 00000000000..375e9aef91b
--- /dev/null
+++ b/src/test/regress/sql/qual_pushdown_to_sublink.sql
@@ -0,0 +1,86 @@
+CREATE SCHEMA IF NOT EXISTS test_push_qual_to_sublink;
+SET search_path=test_push_qual_to_sublink,sys;
+
+show lazy_process_sublink;
+
+create table ab (a int not null, b int not null) partition by list (a);
+create table ab_a2 partition of ab for values in(2) partition by list (b);
+create table ab_a2_b1 partition of ab_a2 for values in (1);
+create table ab_a2_b2 partition of ab_a2 for values in (2);
+create table ab_a2_b3 partition of ab_a2 for values in (3);
+create table ab_a1 partition of ab for values in(1) partition by list (b);
+create table ab_a1_b1 partition of ab_a1 for values in (1);
+create table ab_a1_b2 partition of ab_a1 for values in (2);
+create table ab_a1_b3 partition of ab_a1 for values in (3);
+
+INSERT INTO ab VALUES (1,1);
+INSERT INTO ab VALUES (1,2);
+INSERT INTO ab VALUES (1,3);
+INSERT INTO ab VALUES (2,1);
+INSERT INTO ab VALUES (2,2);
+INSERT INTO ab VALUES (2,3);
+
+--1 sublink in select clause can do pushdown qual(a=1 and b=1)
+EXPLAIN (VERBOSE, COSTS OFF) SELECT
+y.a, (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b) AS b
+FROM ab y WHERE a = 1 AND b = 1;
+
+SELECT
+y.a, (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b) AS b
+FROM ab y WHERE a = 1 AND b = 1;
+
+--2 sublink in where clause can do pushdown qual(a=1 and b=1)
+EXPLAIN (VERBOSE, COSTS OFF) SELECT y.a FROM ab y
+WHERE a = 1 AND b = 1 AND a in (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b);
+
+SELECT y.a FROM ab y
+WHERE a = 1 AND b = 1 AND a in (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b);
+
+--3 Nested sublink also supports pushdown qual
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT a.a, a.b,
+(SELECT max(b.a) AS max
+ FROM ab b, ab c
+ WHERE b.a=c.a AND b.b=c.b AND b.a=a.a AND b.b=a.b AND
+ (EXISTS
+ (SELECT a1.a
+ FROM ab a1
+ WHERE
+ a1.a = a.a AND --from uplevel 1
+ a1.b = b.b AND --frem uplevel 2
+ clock_timestamp() > '2020-12-11' --Keep sublink not eliminated
+ )
+ )
+) AS c
+FROM ab a WHERE a.a=1 AND a.b=1;
+
+--4 This feature does not conflict with pullUp sublink
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT a.a, a.b
+FROM ab a
+WHERE EXISTS (SELECT b.a
+ FROM ab b, ab c
+ WHERE b.a=c.a AND b.b=c.b AND b.a=a.a AND b.b=a.b AND
+ (EXISTS
+ (SELECT a1.a
+ FROM ab a1
+ WHERE a1.a = b.a AND a1.b = b.b
+ )
+ )
+) AND
+a.a=1 AND a.b=1;
+
+--5 aggrefs with multiple agglevelsup
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT
+ (SELECT
+ (SELECT sum(foo.a + bar.b) FROM ab jazz WHERE jazz.a=foo.a AND jazz.b=foo.b)
+ FROM ab bar WHERE bar.a=foo.a AND bar.b=foo.b
+ ) FROM ab foo WHERE foo.a=1 AND foo.b=1 GROUP BY a, b;
+
+--6 sublink in join on clause can not do pushdown
+EXPLAIN (VERBOSE, COSTS OFF) SELECT y.a
+FROM ab y JOIN ab z on ((y.a=z.a) AND (y.b=z.b) AND exists (SELECT count(*) FROM ab x WHERE x.a=y.a AND x.b=y.b))
+WHERE y.a = 1 AND y.b = 1;
+
+DROP SCHEMA test_push_qual_to_sublink CASCADE;
--
2.32.0 (Apple Git-132)
On Thu, Dec 23, 2021 at 3:52 AM 曾文旌(义从) <wenjing.zwj@alibaba-inc.com> wrote:
Fixed a bug found during testing.
Wenjing
Hi,
+ if (condition_is_safe_pushdown_to_sublink(rinfo,
expr_info->outer))
+ {
+ /* replace qual expr from outer var = const to var = const
and push down to sublink query */
+ sublink_query_push_qual(subquery, (Node
*)copyObject(rinfo->clause), expr_info->outer, expr_info->inner);
Since sublink_query_push_qual() is always guarded
by condition_is_safe_pushdown_to_sublink(), it seems
sublink_query_push_qual() can be folded into
condition_is_safe_pushdown_to_sublink().
For generate_base_implied_equalities():
+ if (ec->ec_processed)
+ {
+ ec_index++;
+ continue;
+ }
+ else if (list_length(ec->ec_members) > 1)
Minor comment: the keyword `else` can be omitted (due to `continue` above).
+ * Since there may be an unexpanded sublink in the targetList,
+ * we'll skip it for now.
Since there may be an -> If there is an
+ {"lazy_process_sublink", PGC_USERSET, QUERY_TUNING_METHOD,
+ gettext_noop("enable lazy process sublink."),
Looking at existing examples from src/backend/utils/misc/guc.c,
enable_lazy_sublink_processing seems to be consistent with existing guc
variable naming.
+lazy_process_sublinks(PlannerInfo *root, bool single_result_rte)
lazy_process_sublinks -> lazily_process_sublinks
+ else
+ {
/* There shouldn't be any OJ info to translate, as yet */
Assert(subroot->join_info_list == NIL);
Indentation for the else block is off.
+ if (istop)
+ f->quals = preprocess_expression_ext(root, f->quals,
EXPRKIND_QUAL, false);
+ else
+ f->quals = preprocess_expression_ext(root, f->quals,
EXPRKIND_QUAL, true);
The above can be written as:
+ f->quals = preprocess_expression_ext(root, f->quals,
EXPRKIND_QUAL, !istop);
For find_equal_conditions_contain_uplevelvar_in_sublink_query():
+ context.has_unexpected_expr == false &&
`!context.has_unexpected_expr` should suffice
equal_expr_safety_check -> is_equal_expr_safe
Cheers
I corrected it according to your suggestion.
thanks
Wenjing.
Zhihong Yu <zyu@yugabyte.com> 于2021年12月25日周六 02:26写道:
Show quoted text
On Thu, Dec 23, 2021 at 3:52 AM 曾文旌(义从) <wenjing.zwj@alibaba-inc.com>
wrote:Fixed a bug found during testing.
Wenjing
Hi,
+ if (condition_is_safe_pushdown_to_sublink(rinfo, expr_info->outer)) + { + /* replace qual expr from outer var = const to var = const and push down to sublink query */ + sublink_query_push_qual(subquery, (Node *)copyObject(rinfo->clause), expr_info->outer, expr_info->inner);Since sublink_query_push_qual() is always guarded
by condition_is_safe_pushdown_to_sublink(), it seems
sublink_query_push_qual() can be folded into
condition_is_safe_pushdown_to_sublink().For generate_base_implied_equalities():
+ if (ec->ec_processed) + { + ec_index++; + continue; + } + else if (list_length(ec->ec_members) > 1)Minor comment: the keyword `else` can be omitted (due to `continue` above).
+ * Since there may be an unexpanded sublink in the targetList, + * we'll skip it for now.Since there may be an -> If there is an
+ {"lazy_process_sublink", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("enable lazy process sublink."),Looking at existing examples from src/backend/utils/misc/guc.c,
enable_lazy_sublink_processing seems to be consistent with existing guc
variable naming.+lazy_process_sublinks(PlannerInfo *root, bool single_result_rte)
lazy_process_sublinks -> lazily_process_sublinks
+ else + { /* There shouldn't be any OJ info to translate, as yet */ Assert(subroot->join_info_list == NIL);Indentation for the else block is off.
+ if (istop) + f->quals = preprocess_expression_ext(root, f->quals, EXPRKIND_QUAL, false); + else + f->quals = preprocess_expression_ext(root, f->quals, EXPRKIND_QUAL, true);The above can be written as:
+ f->quals = preprocess_expression_ext(root, f->quals,
EXPRKIND_QUAL, !istop);For find_equal_conditions_contain_uplevelvar_in_sublink_query():
+ context.has_unexpected_expr == false &&
`!context.has_unexpected_expr` should sufficeequal_expr_safety_check -> is_equal_expr_safe
Cheers
Attachments:
0001-poc-pushdown-qual-to-sublink-v5.patchapplication/octet-stream; name=0001-poc-pushdown-qual-to-sublink-v5.patchDownload
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 296dd75c1b6..22405bef5fc 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -40,8 +40,10 @@
#include "optimizer/paths.h"
#include "optimizer/plancat.h"
#include "optimizer/planner.h"
+#include "optimizer/planmain.h"
#include "optimizer/restrictinfo.h"
#include "optimizer/tlist.h"
+#include "optimizer/subselect.h"
#include "parser/parse_clause.h"
#include "parser/parsetree.h"
#include "partitioning/partbounds.h"
@@ -3895,6 +3897,68 @@ generate_partitionwise_join_paths(PlannerInfo *root, RelOptInfo *rel)
list_free(live_children);
}
+bool
+try_push_outer_qual_to_sublink_query(PlannerInfo *parent, Query *subquery, List *conditions)
+{
+ pushdown_safety_info safetyInfo;
+ ListCell *lc1;
+ bool found = false;
+ bool query_is_pushdown_safe = false;
+
+ if (conditions == NIL)
+ return false;
+
+ memset(&safetyInfo, 0, sizeof(safetyInfo));
+ safetyInfo.unsafeColumns = (bool *)
+ palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
+
+ /* Check whether pushdown qual to sublink query is safe. */
+ query_is_pushdown_safe = subquery_is_pushdown_safe(subquery, subquery, &safetyInfo);
+ pfree(safetyInfo.unsafeColumns);
+ if (!query_is_pushdown_safe)
+ return false;
+
+ /*
+ * Currently, we have some conditional expressions in sublink (out var = local var)
+ * Now, the outer query looks for related equivalent expressions that have been generated (outer var = const).
+ * If out var = local var and outer var = const, then we get local var = const and push it down to sublink
+ */
+ foreach(lc1, conditions)
+ {
+ pushdown_expr_info *expr_info = (pushdown_expr_info *) lfirst(lc1);
+ Index levelsup = 0;
+ RelOptInfo *rel;
+ ListCell *lc2;
+ PlannerInfo *tmproot = parent;
+
+ /* The outer var could exist in any of the upper-level queries so find these roots */
+ for (levelsup = expr_info->outer->varlevelsup - 1; levelsup > 0; levelsup--)
+ tmproot = tmproot->parent_root;
+
+ /* Flatten varLevelsup, for find conditions from BaserestrictInfo. */
+ expr_info->outer->varlevelsup = 0;
+
+ /* Find if there is an available qual in relation of this var from root */
+ rel = find_base_rel(tmproot, expr_info->outer->varno);
+ if (rel == NULL || rel->baserestrictinfo == NULL)
+ continue;
+
+ foreach(lc2, rel->baserestrictinfo)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc2);
+
+ /* Make sure that qual in restrictInfo that is var = const and can safely pushdown */
+ if (condition_is_safe_pushdown_to_sublink(rinfo, expr_info->outer))
+ {
+ /* replace qual expr from outer var = const to var = const and push down to sublink query */
+ sublink_query_push_qual(subquery, (Node *)copyObject(rinfo->clause), expr_info->outer, expr_info->inner);
+ found = true;
+ }
+ }
+ }
+
+ return found;
+}
/*****************************************************************************
* DEBUG SUPPORT
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 6f1abbe47d6..bc8cea7fa24 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -388,6 +388,7 @@ process_equivalence(PlannerInfo *root,
restrictinfo->security_level);
ec1->ec_max_security = Max(ec1->ec_max_security,
restrictinfo->security_level);
+ ec1->ec_processed = false;
/* mark the RI as associated with this eclass */
restrictinfo->left_ec = ec1;
restrictinfo->right_ec = ec1;
@@ -450,6 +451,7 @@ process_equivalence(PlannerInfo *root,
ec->ec_min_security = restrictinfo->security_level;
ec->ec_max_security = restrictinfo->security_level;
ec->ec_merged = NULL;
+ ec->ec_processed = false;
em1 = add_eq_member(ec, item1, item1_relids, item1_nullable_relids,
false, item1_type);
em2 = add_eq_member(ec, item2, item2_relids, item2_nullable_relids,
@@ -574,6 +576,7 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids,
ec->ec_relids = bms_add_members(ec->ec_relids, relids);
}
ec->ec_members = lappend(ec->ec_members, em);
+ ec->ec_processed = false;
return em;
}
@@ -711,6 +714,7 @@ get_eclass_for_sort_expr(PlannerInfo *root,
newec->ec_min_security = UINT_MAX;
newec->ec_max_security = 0;
newec->ec_merged = NULL;
+ newec->ec_processed = false;
if (newec->ec_has_volatile && sortref == 0) /* should not happen */
elog(ERROR, "volatile EquivalenceClass has no sortref");
@@ -1114,6 +1118,11 @@ generate_base_implied_equalities(PlannerInfo *root)
* Single-member ECs won't generate any deductions, either here or at
* the join level.
*/
+ if (ec->ec_processed)
+ {
+ ec_index++;
+ continue;
+ }
if (list_length(ec->ec_members) > 1)
{
if (ec->ec_has_const)
@@ -1151,6 +1160,7 @@ generate_base_implied_equalities(PlannerInfo *root)
rel->has_eclass_joins = true;
}
+ ec->ec_processed = true;
ec_index++;
}
}
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index f6a202d900f..2548e39e842 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -30,10 +30,12 @@
#include "optimizer/planner.h"
#include "optimizer/prep.h"
#include "optimizer/restrictinfo.h"
+#include "optimizer/subselect.h"
#include "parser/analyze.h"
#include "rewrite/rewriteManip.h"
#include "utils/lsyscache.h"
#include "utils/typcache.h"
+#include "utils/guc.h"
/* These parameters are set by GUC */
int from_collapse_limit;
@@ -80,6 +82,16 @@ static void check_mergejoinable(RestrictInfo *restrictinfo);
static void check_hashjoinable(RestrictInfo *restrictinfo);
static void check_memoizable(RestrictInfo *restrictinfo);
+static void remember_qual_info_for_lazy_process_sublink(PlannerInfo *root,
+ Node *clause,
+ bool below_outer_join,
+ JoinType jointype,
+ Index security_level,
+ Relids qualscope,
+ Relids ojscope,
+ Relids outerjoin_nonnullable,
+ List *postponed_qual_list);
+static void *search_sublink_from_lazy_process_list(PlannerInfo *root, Node *node);
/*****************************************************************************
*
@@ -262,7 +274,16 @@ add_vars_to_targetlist(PlannerInfo *root, List *vars,
else if (IsA(node, PlaceHolderVar))
{
PlaceHolderVar *phv = (PlaceHolderVar *) node;
- PlaceHolderInfo *phinfo = find_placeholder_info(root, phv,
+ PlaceHolderInfo *phinfo = NULL;
+
+ /*
+ * If there is an unexpanded sublink in the targetList,
+ * we'll skip it for now. Don't worry let lazy_process_sublinks do it later.
+ */
+ if (has_unexpanded_sublink(root) && checkExprHasSubLink(node))
+ continue;
+
+ phinfo = find_placeholder_info(root, phv,
create_new_ph);
phinfo->ph_needed = bms_add_members(phinfo->ph_needed,
@@ -1621,6 +1642,17 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
Relids nullable_relids;
RestrictInfo *restrictinfo;
+ /* Before lazy transform sublink has not been converted, so backup it */
+ if (checkExprHasSubLink(clause))
+ {
+ remember_qual_info_for_lazy_process_sublink(root, clause, below_outer_join, jointype, security_level,
+ qualscope, ojscope, outerjoin_nonnullable, *postponed_qual_list);
+
+ relids = pull_varnos(root, clause);
+ Assert(bms_is_subset(relids, qualscope));
+ return;
+ }
+
/*
* Retrieve all relids mentioned within the clause.
*/
@@ -2750,3 +2782,184 @@ check_memoizable(RestrictInfo *restrictinfo)
if (OidIsValid(typentry->hash_proc) && OidIsValid(typentry->eq_opr))
restrictinfo->right_hasheqoperator = typentry->eq_opr;
}
+
+/*
+ * query at this level has sublink and It is safe to try lazy process and pushdown qual.
+ * Use a switch to control it. This is a minimal subset, then try to support more scenarios.
+ */
+bool
+query_has_sublink_try_pushdown_qual(PlannerInfo *root)
+{
+ Query *parse = root->parse;
+
+ if (!parse->hasSubLinks)
+ return false;
+
+ if (parse->commandType != CMD_SELECT ||
+ parse->hasWindowFuncs ||
+ parse->hasTargetSRFs ||
+ parse->hasRecursive ||
+ parse->hasModifyingCTE ||
+ parse->hasForUpdate ||
+ parse->hasRowSecurity ||
+ parse->setOperations ||
+ parse->havingQual ||
+ parse->cteList != NIL)
+ return false;
+
+ return lazy_process_sublink;
+}
+
+/*
+ * Handle sublink that is not expanded.
+ * Convert these sublinks to subplans and handles the associated targetList expr and equivalence classes.
+ */
+void
+lazy_process_sublinks(PlannerInfo *root, bool single_result_rte)
+{
+ Query *parse = root->parse;
+ List *tlist_vars;
+
+ /* Exit the function if no unprocessed sublink is recorded. */
+ if (!has_unexpanded_sublink(root))
+ return;
+
+ /* process sublink in targetlist */
+ root->processed_tlist = (List *)SS_process_sublinks(root, (Node *)root->processed_tlist, false, true, true);
+ if (root->query_level > 1)
+ root->processed_tlist = (List *)SS_replace_correlation_vars(root, (Node *)root->processed_tlist);
+
+ /* process sublink in where clause */
+ if (parse->jointree && parse->jointree->quals)
+ {
+ FromExpr *f = parse->jointree;
+ List *newquals = NIL;
+ ListCell *l;
+
+ Assert(IsA(f->quals, List));
+ foreach(l, (List *) f->quals)
+ {
+ Node *qual = (Node *) lfirst(l);
+
+ if (checkExprHasSubLink(qual))
+ qual = lazy_process_sublink_qual(root, qual);
+
+ newquals = lappend(newquals, qual);
+ }
+
+ f->quals = (Node *)newquals;
+ }
+
+ /* process agg functions */
+ if(parse->hasAggs)
+ {
+ preprocess_aggrefs(root, (Node *) root->processed_tlist);
+ preprocess_minmax_aggregates(root, true);
+ }
+
+ /* empty from clause no need prcess targetlist or from clause */
+ if (!single_result_rte)
+ {
+ /* Put the mutated sublink info into the targetList */
+ tlist_vars = pull_var_clause((Node *) root->processed_tlist,
+ PVC_RECURSE_AGGREGATES |
+ PVC_RECURSE_WINDOWFUNCS |
+ PVC_INCLUDE_PLACEHOLDERS);
+
+ if (tlist_vars != NIL)
+ {
+ add_vars_to_targetlist(root, tlist_vars, bms_make_singleton(0), true);
+ list_free(tlist_vars);
+ }
+
+ generate_base_implied_equalities(root);
+ }
+
+ /* Make sure all sublinks are processed. */
+ if (has_unexpanded_sublink(root))
+ elog(ERROR, "sublink is not fully expanded yet");
+
+ return;
+}
+
+typedef struct sublink_node
+{
+ Node *expr;
+ bool below_outer_join;
+ JoinType jointype;
+ Index security_level;
+ Relids qualscope;
+ Relids ojscope;
+ Relids outerjoin_nonnullable;
+ List *postponed_qual_list;
+} sublink_node;
+
+/* Log unexpanded sublink for future do distribute_qual_to_rels in lazy process sublink */
+static void
+remember_qual_info_for_lazy_process_sublink(PlannerInfo *root,
+ Node *clause,
+ bool below_outer_join,
+ JoinType jointype,
+ Index security_level,
+ Relids qualscope,
+ Relids ojscope,
+ Relids outerjoin_nonnullable,
+ List *postponed_qual_list)
+{
+ sublink_node *sublink_info = palloc0(sizeof(sublink_node));
+
+ sublink_info->expr= copyObject(clause);
+ sublink_info->below_outer_join = below_outer_join;
+ sublink_info->jointype = jointype;
+ sublink_info->security_level = security_level;
+ sublink_info->qualscope = bms_copy(qualscope);
+ sublink_info->ojscope = bms_copy(ojscope);
+ sublink_info->outerjoin_nonnullable = bms_copy(outerjoin_nonnullable);
+ sublink_info->postponed_qual_list = list_copy_deep(postponed_qual_list);
+
+ root->unexpanded_sublink_expr_list = lappend(root->unexpanded_sublink_expr_list, sublink_info);
+
+ return;
+}
+
+Node *
+lazy_process_sublink_qual(PlannerInfo *root, Node *node)
+{
+ Node *qual = NULL;
+ sublink_node *sublink_info = NULL;
+
+ qual = SS_process_sublinks(root, node, true, true, true);
+ sublink_info = (sublink_node *)search_sublink_from_lazy_process_list(root, node);
+ if (sublink_info)
+ {
+ List *postponed_qual_list = NIL;
+ distribute_qual_to_rels(root, qual, sublink_info->below_outer_join, sublink_info->jointype, sublink_info->security_level,
+ sublink_info->qualscope, sublink_info->ojscope, sublink_info->outerjoin_nonnullable,
+ &postponed_qual_list);
+
+ Assert(postponed_qual_list == NIL);
+ root->unexpanded_sublink_expr_list = list_delete(root->unexpanded_sublink_expr_list, sublink_info);
+ }
+
+ return qual;
+}
+
+static void *
+search_sublink_from_lazy_process_list(PlannerInfo *root, Node *node)
+{
+ ListCell *lc = NULL;
+ sublink_node *sublink_info = NULL;
+
+ foreach(lc, root->unexpanded_sublink_expr_list)
+ {
+ sublink_node *tmp = lfirst(lc);
+ Assert(tmp->expr);
+ if (equal(tmp->expr, node))
+ {
+ sublink_info = tmp;
+ break;
+ }
+ }
+
+ return (void *)sublink_info;
+}
diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c
index c1634d16669..897903d7e05 100644
--- a/src/backend/optimizer/plan/planagg.c
+++ b/src/backend/optimizer/plan/planagg.c
@@ -49,7 +49,7 @@
static bool can_minmax_aggs(PlannerInfo *root, List **context);
static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
- Oid eqop, Oid sortop, bool nulls_first);
+ Oid eqop, Oid sortop, bool nulls_first, bool lazy_process_sublink);
static void minmax_qp_callback(PlannerInfo *root, void *extra);
static Oid fetch_agg_sort_op(Oid aggfnoid);
@@ -70,7 +70,7 @@ static Oid fetch_agg_sort_op(Oid aggfnoid);
* root->agginfos, so preprocess_aggrefs() must have been called already, too.
*/
void
-preprocess_minmax_aggregates(PlannerInfo *root)
+preprocess_minmax_aggregates(PlannerInfo *root, bool lazy_process_sublink)
{
Query *parse = root->parse;
FromExpr *jtnode;
@@ -173,9 +173,9 @@ preprocess_minmax_aggregates(PlannerInfo *root)
* FIRST is more likely to be available if the operator is a
* reverse-sort operator, so try that first if reverse.
*/
- if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse))
+ if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse, lazy_process_sublink))
continue;
- if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, !reverse))
+ if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, !reverse, lazy_process_sublink))
continue;
/* No indexable path for this aggregate, so fail */
@@ -315,7 +315,7 @@ can_minmax_aggs(PlannerInfo *root, List **context)
*/
static bool
build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
- Oid eqop, Oid sortop, bool nulls_first)
+ Oid eqop, Oid sortop, bool nulls_first, bool lazy_process_sublink)
{
PlannerInfo *subroot;
Query *parse;
@@ -352,12 +352,23 @@ build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
/* append_rel_list might contain outer Vars? */
subroot->append_rel_list = copyObject(root->append_rel_list);
IncrementVarSublevelsUp((Node *) subroot->append_rel_list, 1, 1);
- /* There shouldn't be any OJ info to translate, as yet */
- Assert(subroot->join_info_list == NIL);
- /* and we haven't made equivalence classes, either */
- Assert(subroot->eq_classes == NIL);
- /* and we haven't created PlaceHolderInfos, either */
- Assert(subroot->placeholder_list == NIL);
+
+ if (lazy_process_sublink)
+ {
+ /* under lazy process sublink, parent root may have some data that child does not need, so set it to NIL */
+ subroot->join_info_list = NIL;
+ subroot->eq_classes = NIL;
+ subroot->placeholder_list = NIL;
+ }
+ else
+ {
+ /* There shouldn't be any OJ info to translate, as yet */
+ Assert(subroot->join_info_list == NIL);
+ /* and we haven't made equivalence classes, either */
+ Assert(subroot->eq_classes == NIL);
+ /* and we haven't created PlaceHolderInfos, either */
+ Assert(subroot->placeholder_list == NIL);
+ }
/*----------
* Generate modified query of the form
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index 273ac0acf7e..7042c96b09b 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -102,6 +102,8 @@ query_planner(PlannerInfo *root,
Assert(rte != NULL);
if (rte->rtekind == RTE_RESULT)
{
+ lazy_process_sublinks(root, true);
+
/* Make the RelOptInfo for it directly */
final_rel = build_simple_rel(root, varno, NULL);
@@ -197,6 +199,8 @@ query_planner(PlannerInfo *root,
*/
generate_base_implied_equalities(root);
+ lazy_process_sublinks(root, false);
+
/*
* We have completed merging equivalence sets, so it's now possible to
* generate pathkeys in canonical form; so compute query_pathkeys and
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index bd01ec0526f..1924cbe0246 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -64,6 +64,7 @@
#include "utils/rel.h"
#include "utils/selfuncs.h"
#include "utils/syscache.h"
+#include "utils/guc.h"
/* GUC parameters */
double cursor_tuple_fraction = DEFAULT_CURSOR_TUPLE_FRACTION;
@@ -128,8 +129,9 @@ typedef struct
/* Local functions */
static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
-static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
+static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode, bool istop);
static void grouping_planner(PlannerInfo *root, double tuple_fraction);
+static Node *preprocess_expression_ext(PlannerInfo *root, Node *expr, int kind, bool process_sublink);
static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root);
static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
int *tleref_to_colnum_map);
@@ -641,6 +643,8 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
root->wt_param_id = -1;
root->non_recursive_path = NULL;
root->partColsUpdated = false;
+ root->unexpanded_sublink_counter = 0;
+ root->unexpanded_sublink_expr_list = NIL;
/*
* If there is a WITH list, process each WITH query and either convert it
@@ -784,8 +788,8 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
* part of the targetlist.
*/
parse->targetList = (List *)
- preprocess_expression(root, (Node *) parse->targetList,
- EXPRKIND_TARGET);
+ preprocess_expression_ext(root, (Node *) parse->targetList,
+ EXPRKIND_TARGET, false);
/* Constant-folding might have removed all set-returning functions */
if (parse->hasTargetSRFs)
@@ -807,7 +811,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
preprocess_expression(root, (Node *) parse->returningList,
EXPRKIND_TARGET);
- preprocess_qual_conditions(root, (Node *) parse->jointree);
+ preprocess_qual_conditions(root, (Node *) parse->jointree, true);
parse->havingQual = preprocess_expression(root, parse->havingQual,
EXPRKIND_QUAL);
@@ -1049,14 +1053,24 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
return root;
}
+static Node *
+preprocess_expression(PlannerInfo *root, Node *expr, int kind)
+{
+ return preprocess_expression_ext(root, expr, kind, true);
+}
+
/*
* preprocess_expression
* Do subquery_planner's preprocessing work for an expression,
* which can be a targetlist, a WHERE clause (including JOIN/ON
* conditions), a HAVING clause, or a few other things.
+ *
+ * if process_sublink = false
+ * This means that sublink in an expression will try to defer processing.
+ * see lazy_process_sublinks()
*/
static Node *
-preprocess_expression(PlannerInfo *root, Node *expr, int kind)
+preprocess_expression_ext(PlannerInfo *root, Node *expr, int kind, bool process_sublink)
{
/*
* Fall out quickly if expression is empty. This occurs often enough to
@@ -1129,7 +1143,7 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind)
/* Expand SubLinks to SubPlans */
if (root->parse->hasSubLinks)
- expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
+ expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL), false, process_sublink);
/*
* XXX do not insert anything here unless you have grokked the comments in
@@ -1158,7 +1172,7 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind)
* preprocessing work on each qual condition found therein.
*/
static void
-preprocess_qual_conditions(PlannerInfo *root, Node *jtnode)
+preprocess_qual_conditions(PlannerInfo *root, Node *jtnode, bool istop)
{
if (jtnode == NULL)
return;
@@ -1172,17 +1186,21 @@ preprocess_qual_conditions(PlannerInfo *root, Node *jtnode)
ListCell *l;
foreach(l, f->fromlist)
- preprocess_qual_conditions(root, lfirst(l));
+ preprocess_qual_conditions(root, lfirst(l), false);
- f->quals = preprocess_expression(root, f->quals, EXPRKIND_QUAL);
+ /*
+ * istop = true means that this is qual in the WHERE clause
+ * istop = false means that this is the join qual on the Join on clause
+ * For now, only sublink on the WHERE clause can be deferred,
+ */
+ f->quals = preprocess_expression_ext(root, f->quals, EXPRKIND_QUAL, !istop);
}
else if (IsA(jtnode, JoinExpr))
{
JoinExpr *j = (JoinExpr *) jtnode;
- preprocess_qual_conditions(root, j->larg);
- preprocess_qual_conditions(root, j->rarg);
-
+ preprocess_qual_conditions(root, j->larg, false);
+ preprocess_qual_conditions(root, j->rarg, false);
j->quals = preprocess_expression(root, j->quals, EXPRKIND_QUAL);
}
else
@@ -1384,11 +1402,11 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
* pathtargets, else some copies of the Aggref nodes might escape
* being marked.
*/
- if (parse->hasAggs)
- {
+ if (parse->hasAggs && !has_unexpanded_sublink(root))
preprocess_aggrefs(root, (Node *) root->processed_tlist);
+
+ if (parse->hasAggs)
preprocess_aggrefs(root, (Node *) parse->havingQual);
- }
/*
* Locate any window functions in the tlist. (We don't need to look
@@ -1412,8 +1430,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
* that is needed in MIN/MAX-optimizable cases will have to be
* duplicated in planagg.c.
*/
- if (parse->hasAggs)
- preprocess_minmax_aggregates(root);
+ if (parse->hasAggs && !has_unexpanded_sublink(root))
+ preprocess_minmax_aggregates(root, false);
/*
* Figure out whether there's a hard limit on the number of rows that
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index c9f7a09d102..edd407740ea 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -32,11 +32,13 @@
#include "optimizer/planner.h"
#include "optimizer/prep.h"
#include "optimizer/subselect.h"
+#include "optimizer/paths.h"
#include "parser/parse_relation.h"
#include "rewrite/rewriteManip.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
+#include "utils/ruleutils.h"
typedef struct convert_testexpr_context
@@ -49,6 +51,8 @@ typedef struct process_sublinks_context
{
PlannerInfo *root;
bool isTopQual;
+ bool lazy_process;
+ bool force_process;
} process_sublinks_context;
typedef struct finalize_primnode_context
@@ -65,6 +69,13 @@ typedef struct inline_cte_walker_context
Query *ctequery; /* query to substitute */
} inline_cte_walker_context;
+typedef struct equal_expr_info_context
+{
+ bool has_unexpected_expr;
+ bool has_const;
+ Var *outer_var;
+ Var *inner_var;
+} equal_expr_info_context;
static Node *build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
List *plan_params,
@@ -105,6 +116,11 @@ static Bitmapset *finalize_plan(PlannerInfo *root,
static bool finalize_primnode(Node *node, finalize_primnode_context *context);
static bool finalize_agg_primnode(Node *node, finalize_primnode_context *context);
+static Node *replace_vars_mutator(Node *node, void *context);
+static List *find_equal_conditions_contain_uplevelvar_in_sublink_query(Query *orig_subquery);
+static bool equal_expr_analyze_walker(Node *node, void *context);
+static bool equal_expr_safety_check(Node *node, equal_expr_info_context *context);
+
/*
* Get the datatype/typmod/collation of the first column of the plan's output.
@@ -162,7 +178,7 @@ get_first_col_type(Plan *plan, Oid *coltype, int32 *coltypmod,
static Node *
make_subplan(PlannerInfo *root, Query *orig_subquery,
SubLinkType subLinkType, int subLinkId,
- Node *testexpr, bool isTopQual)
+ Node *testexpr, bool isTopQual, bool lazy_process)
{
Query *subquery;
bool simple_exists = false;
@@ -173,6 +189,8 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
Plan *plan;
List *plan_params;
Node *result;
+ Query *optimized_subquery = NULL;
+ Query *optimized_subquery_copy = NULL;
/*
* Copy the source Query node. This is a quick and dirty kluge to resolve
@@ -218,8 +236,32 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
/* plan_params should not be in use in current query level */
Assert(root->plan_params == NIL);
+ if (lazy_process)
+ {
+ List *conditions = NIL;
+ Query *subquery_copy = copyObject(orig_subquery);
+
+ /*
+ * Search sublink query.
+ * If the query contains an outer condition equivalent expression,
+ * this means that there may be external conditions that can be pushed down to optimize the subquery.
+ */
+ conditions = find_equal_conditions_contain_uplevelvar_in_sublink_query(subquery_copy);
+ if (conditions)
+ {
+ /* Search outer queries, and if relevant equivalent expressions are found, push them down into subqueries. */
+ if (try_push_outer_qual_to_sublink_query(root, subquery_copy, conditions))
+ {
+ optimized_subquery = subquery_copy;
+ optimized_subquery_copy = copyObject(optimized_subquery);
+ }
+ list_free(conditions);
+ }
+ }
+
/* Generate Paths for the subquery */
- subroot = subquery_planner(root->glob, subquery,
+ subroot = subquery_planner(root->glob,
+ (optimized_subquery != NULL) ? optimized_subquery : subquery,
root,
false, tuple_fraction);
@@ -256,7 +298,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
List *paramIds;
/* Make a second copy of the original subquery */
- subquery = copyObject(orig_subquery);
+ subquery = copyObject((optimized_subquery_copy != NULL) ? optimized_subquery_copy : orig_subquery);
/* and re-simplify */
simple_exists = simplify_EXISTS_query(root, subquery);
Assert(simple_exists);
@@ -365,7 +407,7 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
*/
if (IsA(arg, PlaceHolderVar) ||
IsA(arg, Aggref))
- arg = SS_process_sublinks(root, arg, false);
+ arg = SS_process_sublinks(root, arg, false, false, true);
splan->parParam = lappend_int(splan->parParam, pitem->paramId);
splan->args = lappend(splan->args, arg);
@@ -1915,12 +1957,14 @@ replace_correlation_vars_mutator(Node *node, PlannerInfo *root)
* not distinguish FALSE from UNKNOWN return values.
*/
Node *
-SS_process_sublinks(PlannerInfo *root, Node *expr, bool isQual)
+SS_process_sublinks(PlannerInfo *root, Node *expr, bool isQual, bool lazy_process, bool force_process)
{
process_sublinks_context context;
context.root = root;
context.isTopQual = isQual;
+ context.lazy_process = lazy_process;
+ context.force_process = force_process;
return process_sublinks_mutator(expr, &context);
}
@@ -1930,20 +1974,34 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context)
process_sublinks_context locContext;
locContext.root = context->root;
+ locContext.lazy_process = context->lazy_process;
+ locContext.force_process = context->force_process;
if (node == NULL)
return NULL;
if (IsA(node, SubLink))
{
SubLink *sublink = (SubLink *) node;
- Node *testexpr;
/*
* First, recursively process the lefthand-side expressions, if any.
* They're not top-level anymore.
*/
locContext.isTopQual = false;
- testexpr = process_sublinks_mutator(sublink->testexpr, &locContext);
+ locContext.lazy_process = context->lazy_process;
+ locContext.force_process = context->force_process;
+ sublink->testexpr = process_sublinks_mutator(sublink->testexpr, &locContext);
+
+ if (!context->force_process &&
+ query_has_sublink_try_pushdown_qual(context->root))
+ {
+ Assert(context->lazy_process == false);
+ context->root->unexpanded_sublink_counter++;
+ return node;
+ }
+
+ if (context->lazy_process)
+ context->root->unexpanded_sublink_counter--;
/*
* Now build the SubPlan node and make the expr to return.
@@ -1952,8 +2010,8 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context)
(Query *) sublink->subselect,
sublink->subLinkType,
sublink->subLinkId,
- testexpr,
- context->isTopQual);
+ sublink->testexpr,
+ context->isTopQual, locContext.lazy_process);
}
/*
@@ -1978,8 +2036,8 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context)
* the very routine that creates 'em to begin with). We shouldn't find
* ourselves invoked directly on a Query, either.
*/
- Assert(!IsA(node, SubPlan));
- Assert(!IsA(node, AlternativeSubPlan));
+ Assert(!IsA(node, SubPlan) || context->lazy_process);
+ Assert(!IsA(node, AlternativeSubPlan) || context->lazy_process);
Assert(!IsA(node, Query));
/*
@@ -2003,6 +2061,8 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context)
/* Still at qual top-level */
locContext.isTopQual = context->isTopQual;
+ locContext.lazy_process = context->lazy_process;
+ locContext.force_process = context->force_process;
foreach(l, ((BoolExpr *) node)->args)
{
@@ -2024,6 +2084,8 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context)
/* Still at qual top-level */
locContext.isTopQual = context->isTopQual;
+ locContext.lazy_process = context->lazy_process;
+ locContext.force_process = context->force_process;
foreach(l, ((BoolExpr *) node)->args)
{
@@ -2989,3 +3051,184 @@ SS_make_initplan_from_plan(PlannerInfo *root,
/* Set costs of SubPlan using info from the plan tree */
cost_subplan(subroot, node, plan);
}
+
+void
+sublink_query_push_qual(Query *subquery, Node *qual, Var *outer, Var *inner)
+{
+ pushdown_expr_info context;
+ Node *new_qual;
+
+ context.outer = outer;
+ context.inner = inner;
+
+ new_qual = expression_tree_mutator(qual, replace_vars_mutator, (void *)&context);
+ subquery->jointree->quals = make_and_qual(subquery->jointree->quals, new_qual);
+}
+
+static Node *
+replace_vars_mutator(Node *node, void *context)
+{
+ pushdown_expr_info *info = (pushdown_expr_info *) context;
+
+ if (IsA(node, Var) && equal(node, (Node *)info->outer))
+ {
+ node = copyObject((Node *)info->inner);
+ return node;
+ }
+
+ return expression_tree_mutator(node, replace_vars_mutator, context);
+}
+
+/* condition has to be (var = const value) */
+bool
+condition_is_safe_pushdown_to_sublink(RestrictInfo *rinfo, Var *var)
+{
+ Node *clause = (Node *) rinfo->clause;
+ equal_expr_info_context context;
+
+ if (clause == NULL)
+ return false;
+
+ if (rinfo->pseudoconstant)
+ return false;
+
+ if (contain_leaked_vars(clause))
+ return false;
+
+ memset(&context, 0, sizeof(equal_expr_info_context));
+ if (equal_expr_safety_check(clause, &context))
+ {
+ /*
+ * RestrictInfo clause must be like inner var = const.
+ * It cannot contain any out var and references the same columns as var.
+ * Finally, system columns are not supported for now.
+ */
+ if (context.inner_var &&
+ context.outer_var == NULL &&
+ !context.has_unexpected_expr &&
+ context.has_const &&
+ context.inner_var->varattno > 0 &&
+ equal(context.inner_var, var))
+ return true;
+ }
+
+ return false;
+}
+
+static List *
+find_equal_conditions_contain_uplevelvar_in_sublink_query(Query *orig_subquery)
+{
+ Node *quals;
+ ListCell *lc;
+ List *conditions = NIL;
+
+ if (orig_subquery->jointree == NULL ||
+ orig_subquery->jointree->quals == NULL)
+ return NIL;
+
+ quals = copyObject(orig_subquery->jointree->quals);
+ quals = (Node *) canonicalize_qual((Expr *) quals, false);
+ quals = (Node *) make_ands_implicit((Expr *) quals);
+
+ foreach(lc, (List *)quals)
+ {
+ Node *node = (Node *) lfirst(lc);
+ equal_expr_info_context context;
+ pushdown_expr_info *expr_info = NULL;
+
+ memset(&context, 0, sizeof(equal_expr_info_context));
+ if (equal_expr_safety_check(node, &context))
+ {
+ /* It needs to be something like outer var = inner var */
+ if (context.inner_var &&
+ context.outer_var &&
+ !context.has_unexpected_expr &&
+ !context.has_const)
+ {
+ expr_info = palloc0(sizeof(pushdown_expr_info));
+ expr_info->inner = context.inner_var;
+ expr_info->outer = context.outer_var;
+ conditions = lappend(conditions, expr_info);
+ }
+ }
+ }
+
+ return conditions;
+}
+
+static bool
+equal_expr_safety_check(Node *node, equal_expr_info_context *context)
+{
+ const char *op;
+
+ if (!IsA(node, OpExpr))
+ return false;
+
+ op = get_simple_binary_op_name((OpExpr *) node);
+ if (op == NULL || strcmp(op, "=") != 0)
+ return false;
+
+ if (contain_volatile_functions(node) ||
+ contain_mutable_functions(node) ||
+ contain_nonstrict_functions(node))
+ return false;
+
+ equal_expr_analyze_walker(node, context);
+
+ return true;
+}
+
+static bool
+equal_expr_analyze_walker(Node *node, void *context)
+{
+ equal_expr_info_context *info = (equal_expr_info_context *)context;
+
+ if (node == NULL)
+ return false;
+
+ switch (nodeTag(node))
+ {
+ case T_Var:
+ {
+ if (((Var *) node)->varlevelsup > 0)
+ {
+ if (info->outer_var)
+ info->has_unexpected_expr = true;
+ else
+ info->outer_var = (Var *)copyObject(node);
+
+ return info->has_unexpected_expr;
+ }
+ else
+ {
+ if (info->inner_var)
+ info->has_unexpected_expr = true;
+ else
+ info->inner_var = (Var *)copyObject(node);
+
+ return info->has_unexpected_expr;
+ }
+ }
+ break;
+
+ case T_Const:
+ {
+ info->has_const = true;
+ return false;
+ }
+ break;
+
+ case T_Param:
+ case T_FuncExpr:
+ {
+ info->has_unexpected_expr = true;
+ return true;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return expression_tree_walker(node, equal_expr_analyze_walker, context);
+}
diff --git a/src/backend/optimizer/util/placeholder.c b/src/backend/optimizer/util/placeholder.c
index 1c4202d864c..0e11ed22522 100644
--- a/src/backend/optimizer/util/placeholder.c
+++ b/src/backend/optimizer/util/placeholder.c
@@ -22,6 +22,7 @@
#include "optimizer/placeholder.h"
#include "optimizer/planmain.h"
#include "utils/lsyscache.h"
+#include "rewrite/rewriteManip.h"
/* Local functions */
static void find_placeholders_recurse(PlannerInfo *root, Node *jtnode);
@@ -87,6 +88,10 @@ find_placeholder_info(PlannerInfo *root, PlaceHolderVar *phv,
if (!create_new_ph)
elog(ERROR, "too late to create a new PlaceHolderInfo");
+ /* Unprocessed sublink is not accepted, it needs to go through SS_process_sublinks first */
+ if (checkExprHasSubLink((Node *)phv))
+ elog(ERROR, "can not add sublink to placeholder_list");
+
phinfo = makeNode(PlaceHolderInfo);
phinfo->phid = phv->phid;
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 8da525c715b..f847e898e68 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -429,7 +429,6 @@ static void resolve_special_varno(Node *node, deparse_context *context,
static Node *find_param_referent(Param *param, deparse_context *context,
deparse_namespace **dpns_p, ListCell **ancestor_cell_p);
static void get_parameter(Param *param, deparse_context *context);
-static const char *get_simple_binary_op_name(OpExpr *expr);
static bool isSimpleNode(Node *node, Node *parentNode, int prettyFlags);
static void appendContextKeyword(deparse_context *context, const char *str,
int indentBefore, int indentAfter, int indentPlus);
@@ -7983,7 +7982,7 @@ get_parameter(Param *param, deparse_context *context)
* helper function for isSimpleNode
* will return single char binary operator name, or NULL if it's not
*/
-static const char *
+const char *
get_simple_binary_op_name(OpExpr *expr)
{
List *args = expr->args;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 7b030463013..cd4f6fd51b4 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -684,6 +684,7 @@ static char *recovery_target_lsn_string;
/* should be static, but commands/variable.c needs to get at this */
char *role_string;
+bool lazy_process_sublink = true;
/*
* Displayable names for context types (enum GucContext)
@@ -973,6 +974,17 @@ static const unit_conversion time_unit_conversion_table[] =
static struct config_bool ConfigureNamesBool[] =
{
+ {
+ {"lazy_process_sublink", PGC_USERSET, QUERY_TUNING_METHOD,
+ gettext_noop("enable lazy process sublink."),
+ NULL,
+ GUC_EXPLAIN
+ },
+ &lazy_process_sublink,
+ true,
+ NULL, NULL, NULL
+ },
+
{
{"enable_seqscan", PGC_USERSET, QUERY_TUNING_METHOD,
gettext_noop("Enables the planner's use of sequential-scan plans."),
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 324d92880b5..fd8f6c995d6 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -377,6 +377,9 @@ struct PlannerInfo
/* Does this query modify any partition key columns? */
bool partColsUpdated;
+
+ int unexpanded_sublink_counter;
+ List *unexpanded_sublink_expr_list;
};
@@ -995,6 +998,7 @@ typedef struct EquivalenceClass
bool ec_has_volatile; /* the (sole) member is a volatile expr */
bool ec_below_outer_join; /* equivalence applies below an OJ */
bool ec_broken; /* failed to generate needed clauses? */
+ bool ec_processed;
Index ec_sortref; /* originating sortclause label, or 0 */
Index ec_min_security; /* minimum security_level in ec_sources */
Index ec_max_security; /* maximum security_level in ec_sources */
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index f1d111063c2..425b5c68131 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -25,6 +25,12 @@ extern PGDLLIMPORT int geqo_threshold;
extern PGDLLIMPORT int min_parallel_table_scan_size;
extern PGDLLIMPORT int min_parallel_index_scan_size;
+typedef struct pushdown_expr_info
+{
+ Var *outer;
+ Var *inner;
+} pushdown_expr_info;
+
/* Hook for plugins to get control in set_rel_pathlist() */
typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,
RelOptInfo *rel,
@@ -62,7 +68,7 @@ extern void create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel,
Path *bitmapqual);
extern void generate_partitionwise_join_paths(PlannerInfo *root,
RelOptInfo *rel);
-
+extern bool try_push_outer_qual_to_sublink_query(PlannerInfo *parent, Query *subquery, List *conditions);
#ifdef OPTIMIZER_DEBUG
extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel);
#endif
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index bf1adfc52ac..784dfbfc42e 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -28,12 +28,13 @@ typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra);
* prototypes for plan/planmain.c
*/
extern RelOptInfo *query_planner(PlannerInfo *root,
- query_pathkeys_callback qp_callback, void *qp_extra);
+ query_pathkeys_callback qp_callback,
+ void *qp_extra);
/*
* prototypes for plan/planagg.c
*/
-extern void preprocess_minmax_aggregates(PlannerInfo *root);
+extern void preprocess_minmax_aggregates(PlannerInfo *root, bool lazy_process_sublink);
/*
* prototypes for plan/createplan.c
@@ -67,6 +68,8 @@ extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount,
extern int from_collapse_limit;
extern int join_collapse_limit;
+#define has_unexpanded_sublink(root) ((root)->unexpanded_sublink_counter != 0)
+
extern void add_base_rels_to_query(PlannerInfo *root, Node *jtnode);
extern void add_other_rels_to_query(PlannerInfo *root);
extern void build_base_rel_tlists(PlannerInfo *root, List *final_tlist);
@@ -96,6 +99,9 @@ extern RestrictInfo *build_implied_join_equality(PlannerInfo *root,
Relids nullable_relids,
Index security_level);
extern void match_foreign_keys_to_quals(PlannerInfo *root);
+extern void lazy_process_sublinks(PlannerInfo *root, bool single_result_rte);
+extern bool query_has_sublink_try_pushdown_qual(PlannerInfo *root);
+extern Node *lazy_process_sublink_qual(PlannerInfo *root, Node *node);
/*
* prototypes for plan/analyzejoins.c
diff --git a/src/include/optimizer/subselect.h b/src/include/optimizer/subselect.h
index 059bdf941ef..396c4c6117e 100644
--- a/src/include/optimizer/subselect.h
+++ b/src/include/optimizer/subselect.h
@@ -25,7 +25,7 @@ extern JoinExpr *convert_EXISTS_sublink_to_join(PlannerInfo *root,
bool under_not,
Relids available_rels);
extern Node *SS_replace_correlation_vars(PlannerInfo *root, Node *expr);
-extern Node *SS_process_sublinks(PlannerInfo *root, Node *expr, bool isQual);
+extern Node *SS_process_sublinks(PlannerInfo *root, Node *expr, bool isQual, bool lazy_process, bool force_process);
extern void SS_identify_outer_params(PlannerInfo *root);
extern void SS_charge_for_initplans(PlannerInfo *root, RelOptInfo *final_rel);
extern void SS_attach_initplans(PlannerInfo *root, Plan *plan);
@@ -36,5 +36,7 @@ extern Param *SS_make_initplan_output_param(PlannerInfo *root,
extern void SS_make_initplan_from_plan(PlannerInfo *root,
PlannerInfo *subroot, Plan *plan,
Param *prm);
+extern bool condition_is_safe_pushdown_to_sublink(RestrictInfo *rinfo, Var *var);
+extern void sublink_query_push_qual(Query *subquery, Node *qual, Var *var, Var *replace);
#endif /* SUBSELECT_H */
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index aa18d304ac0..92bfc1b806e 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -288,6 +288,8 @@ extern int tcp_user_timeout;
extern bool trace_sort;
#endif
+extern bool lazy_process_sublink;
+
/*
* Functions exported by guc.c
*/
diff --git a/src/include/utils/ruleutils.h b/src/include/utils/ruleutils.h
index d333e5e8a56..d4ccca3fe3c 100644
--- a/src/include/utils/ruleutils.h
+++ b/src/include/utils/ruleutils.h
@@ -42,5 +42,6 @@ extern char *generate_opclass_name(Oid opclass);
extern char *get_range_partbound_string(List *bound_datums);
extern char *pg_get_statisticsobjdef_string(Oid statextid);
+extern const char *get_simple_binary_op_name(OpExpr *expr);
#endif /* RULEUTILS_H */
diff --git a/src/test/regress/expected/join_hash.out b/src/test/regress/expected/join_hash.out
index 3a91c144a27..232ee6d15a1 100644
--- a/src/test/regress/expected/join_hash.out
+++ b/src/test/regress/expected/join_hash.out
@@ -926,9 +926,9 @@ WHERE
-> Result
Output: (hjtest_1.b * 5)
-> Hash
- Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b
+ Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.b, hjtest_2.id, hjtest_2.c
-> Seq Scan on public.hjtest_2
- Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b
+ Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.b, hjtest_2.id, hjtest_2.c
Filter: ((SubPlan 5) < 55)
SubPlan 5
-> Result
@@ -974,7 +974,7 @@ WHERE
Hash Cond: (((SubPlan 1) = hjtest_1.id) AND ((SubPlan 3) = (SubPlan 2)))
Join Filter: (hjtest_1.a <> hjtest_2.b)
-> Seq Scan on public.hjtest_2
- Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b
+ Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.b, hjtest_2.id, hjtest_2.c
Filter: ((SubPlan 5) < 55)
SubPlan 5
-> Result
diff --git a/src/test/regress/expected/qual_pushdown_to_sublink.out b/src/test/regress/expected/qual_pushdown_to_sublink.out
new file mode 100644
index 00000000000..b4d68ac3a12
--- /dev/null
+++ b/src/test/regress/expected/qual_pushdown_to_sublink.out
@@ -0,0 +1,221 @@
+CREATE SCHEMA IF NOT EXISTS test_push_qual_to_sublink;
+SET search_path=test_push_qual_to_sublink,sys;
+show lazy_process_sublink;
+ lazy_process_sublink
+----------------------
+ on
+(1 row)
+
+create table ab (a int not null, b int not null) partition by list (a);
+create table ab_a2 partition of ab for values in(2) partition by list (b);
+create table ab_a2_b1 partition of ab_a2 for values in (1);
+create table ab_a2_b2 partition of ab_a2 for values in (2);
+create table ab_a2_b3 partition of ab_a2 for values in (3);
+create table ab_a1 partition of ab for values in(1) partition by list (b);
+create table ab_a1_b1 partition of ab_a1 for values in (1);
+create table ab_a1_b2 partition of ab_a1 for values in (2);
+create table ab_a1_b3 partition of ab_a1 for values in (3);
+INSERT INTO ab VALUES (1,1);
+INSERT INTO ab VALUES (1,2);
+INSERT INTO ab VALUES (1,3);
+INSERT INTO ab VALUES (2,1);
+INSERT INTO ab VALUES (2,2);
+INSERT INTO ab VALUES (2,3);
+--1 sublink in select clause can do pushdown qual(a=1 and b=1)
+EXPLAIN (VERBOSE, COSTS OFF) SELECT
+y.a, (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b) AS b
+FROM ab y WHERE a = 1 AND b = 1;
+ QUERY PLAN
+----------------------------------------------------------------
+ Seq Scan on test_push_qual_to_sublink.ab_a1_b1 y
+ Output: y.a, (SubPlan 1)
+ Filter: ((y.a = 1) AND (y.b = 1))
+ SubPlan 1
+ -> Result
+ Output: x.b
+ One-Time Filter: ((y.b = 1) AND (y.a = 1))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 x
+ Output: x.b
+ Filter: ((x.b = 1) AND (x.a = 1))
+(10 rows)
+
+SELECT
+y.a, (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b) AS b
+FROM ab y WHERE a = 1 AND b = 1;
+ a | b
+---+---
+ 1 | 1
+(1 row)
+
+--2 sublink in where clause can do pushdown qual(a=1 and b=1)
+EXPLAIN (VERBOSE, COSTS OFF) SELECT y.a FROM ab y
+WHERE a = 1 AND b = 1 AND a in (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b);
+ QUERY PLAN
+----------------------------------------------------------------
+ Seq Scan on test_push_qual_to_sublink.ab_a1_b1 y
+ Output: y.a
+ Filter: ((y.a = 1) AND (y.b = 1) AND (SubPlan 1))
+ SubPlan 1
+ -> Result
+ Output: x.b
+ One-Time Filter: ((y.b = 1) AND (y.a = 1))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 x
+ Output: x.b
+ Filter: ((x.b = 1) AND (x.a = 1))
+(10 rows)
+
+SELECT y.a FROM ab y
+WHERE a = 1 AND b = 1 AND a in (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b);
+ a
+---
+ 1
+(1 row)
+
+--3 Nested sublink also supports pushdown qual
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT a.a, a.b,
+(SELECT max(b.a) AS max
+ FROM ab b, ab c
+ WHERE b.a=c.a AND b.b=c.b AND b.a=a.a AND b.b=a.b AND
+ (EXISTS
+ (SELECT a1.a
+ FROM ab a1
+ WHERE
+ a1.a = a.a AND --from uplevel 1
+ a1.b = b.b AND --frem uplevel 2
+ clock_timestamp() > '2020-12-11' --Keep sublink not eliminated
+ )
+ )
+) AS c
+FROM ab a WHERE a.a=1 AND a.b=1;
+ QUERY PLAN
+------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Seq Scan on test_push_qual_to_sublink.ab_a1_b1 a
+ Output: a.a, a.b, (SubPlan 2)
+ Filter: ((a.a = 1) AND (a.b = 1))
+ SubPlan 2
+ -> Aggregate
+ Output: max(b.a)
+ -> Result
+ Output: b.a
+ One-Time Filter: ((a.b = 1) AND (a.a = 1))
+ -> Nested Loop
+ Output: b.a
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 b
+ Output: b.a, b.b
+ Filter: ((b.b = 1) AND (b.a = 1) AND (SubPlan 1))
+ SubPlan 1
+ -> Result
+ Output: a1.a
+ One-Time Filter: ((b.b = 1) AND (a.a = 1))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 a1
+ Output: a1.a
+ Filter: ((a1.b = 1) AND (a1.a = 1) AND (clock_timestamp() > 'Fri Dec 11 00:00:00 2020 PST'::timestamp with time zone))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 c
+ Output: c.a, c.b
+ Filter: ((c.b = 1) AND (c.a = 1))
+(24 rows)
+
+--4 This feature does not conflict with pullUp sublink
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT a.a, a.b
+FROM ab a
+WHERE EXISTS (SELECT b.a
+ FROM ab b, ab c
+ WHERE b.a=c.a AND b.b=c.b AND b.a=a.a AND b.b=a.b AND
+ (EXISTS
+ (SELECT a1.a
+ FROM ab a1
+ WHERE a1.a = b.a AND a1.b = b.b
+ )
+ )
+) AND
+a.a=1 AND a.b=1;
+ QUERY PLAN
+--------------------------------------------------------------------
+ Nested Loop Semi Join
+ Output: a.a, a.b
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 a
+ Output: a.a, a.b
+ Filter: ((a.a = 1) AND (a.b = 1))
+ -> Nested Loop Semi Join
+ Output: b.a, b.b
+ -> Nested Loop
+ Output: b.a, b.b
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 b
+ Output: b.a, b.b
+ Filter: ((b.b = 1) AND (b.a = 1))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 c
+ Output: c.a, c.b
+ Filter: ((c.b = 1) AND (c.a = 1))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 a1
+ Output: a1.a, a1.b
+ Filter: ((a1.b = 1) AND (a1.a = 1))
+(18 rows)
+
+--5 aggrefs with multiple agglevelsup
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT
+ (SELECT
+ (SELECT sum(foo.a + bar.b) FROM ab jazz WHERE jazz.a=foo.a AND jazz.b=foo.b)
+ FROM ab bar WHERE bar.a=foo.a AND bar.b=foo.b
+ ) FROM ab foo WHERE foo.a=1 AND foo.b=1 GROUP BY a, b;
+ QUERY PLAN
+---------------------------------------------------------------------------
+ Group
+ Output: (SubPlan 2), foo.a, foo.b
+ Group Key: foo.a, foo.b
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 foo
+ Output: foo.a, foo.b
+ Filter: ((foo.a = 1) AND (foo.b = 1))
+ SubPlan 2
+ -> Aggregate
+ Output: (SubPlan 1)
+ -> Result
+ Output: bar.b
+ One-Time Filter: ((foo.b = 1) AND (foo.a = 1))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 bar
+ Output: bar.b
+ Filter: ((bar.b = 1) AND (bar.a = 1))
+ SubPlan 1
+ -> Result
+ Output: sum((foo.a + bar.b))
+ One-Time Filter: ((foo.b = 1) AND (foo.a = 1))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 jazz
+ Filter: ((jazz.b = 1) AND (jazz.a = 1))
+(21 rows)
+
+--6 sublink in join on clause can not do pushdown
+EXPLAIN (VERBOSE, COSTS OFF) SELECT y.a
+FROM ab y JOIN ab z on ((y.a=z.a) AND (y.b=z.b) AND exists (SELECT count(*) FROM ab x WHERE x.a=y.a AND x.b=y.b))
+WHERE y.a = 1 AND y.b = 1;
+ QUERY PLAN
+------------------------------------------------------------------------------
+ Nested Loop
+ Output: y.a
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 y
+ Output: y.a, y.b
+ Filter: ((y.b = 1) AND (y.a = 1) AND (SubPlan 1))
+ SubPlan 1
+ -> Aggregate
+ Output: count(*)
+ -> Append
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 x_1
+ Filter: ((x_1.a = y.a) AND (x_1.b = y.b))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b2 x_2
+ Filter: ((x_2.a = y.a) AND (x_2.b = y.b))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b3 x_3
+ Filter: ((x_3.a = y.a) AND (x_3.b = y.b))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a2_b1 x_4
+ Filter: ((x_4.a = y.a) AND (x_4.b = y.b))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a2_b2 x_5
+ Filter: ((x_5.a = y.a) AND (x_5.b = y.b))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a2_b3 x_6
+ Filter: ((x_6.a = y.a) AND (x_6.b = y.b))
+ -> Seq Scan on test_push_qual_to_sublink.ab_a1_b1 z
+ Output: z.a, z.b
+ Filter: ((z.b = 1) AND (z.a = 1))
+(24 rows)
+
+DROP SCHEMA test_push_qual_to_sublink CASCADE;
+NOTICE: drop cascades to table ab
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 4e8ddc70613..2df4d6e15b5 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -1063,7 +1063,7 @@ where o.ten = 0;
SubPlan 1
-> Seq Scan on public.int4_tbl
Output: int4_tbl.f1
- Filter: (int4_tbl.f1 <= $0)
+ Filter: (int4_tbl.f1 <= $1)
(14 rows)
select sum(ss.tst::int) from
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 5b0c73d7e37..3060801f2f1 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -134,3 +134,4 @@ test: fast_default
# run stats by itself because its delay may be insufficient under heavy load
test: stats
+test: qual_pushdown_to_sublink
diff --git a/src/test/regress/sql/qual_pushdown_to_sublink.sql b/src/test/regress/sql/qual_pushdown_to_sublink.sql
new file mode 100644
index 00000000000..375e9aef91b
--- /dev/null
+++ b/src/test/regress/sql/qual_pushdown_to_sublink.sql
@@ -0,0 +1,86 @@
+CREATE SCHEMA IF NOT EXISTS test_push_qual_to_sublink;
+SET search_path=test_push_qual_to_sublink,sys;
+
+show lazy_process_sublink;
+
+create table ab (a int not null, b int not null) partition by list (a);
+create table ab_a2 partition of ab for values in(2) partition by list (b);
+create table ab_a2_b1 partition of ab_a2 for values in (1);
+create table ab_a2_b2 partition of ab_a2 for values in (2);
+create table ab_a2_b3 partition of ab_a2 for values in (3);
+create table ab_a1 partition of ab for values in(1) partition by list (b);
+create table ab_a1_b1 partition of ab_a1 for values in (1);
+create table ab_a1_b2 partition of ab_a1 for values in (2);
+create table ab_a1_b3 partition of ab_a1 for values in (3);
+
+INSERT INTO ab VALUES (1,1);
+INSERT INTO ab VALUES (1,2);
+INSERT INTO ab VALUES (1,3);
+INSERT INTO ab VALUES (2,1);
+INSERT INTO ab VALUES (2,2);
+INSERT INTO ab VALUES (2,3);
+
+--1 sublink in select clause can do pushdown qual(a=1 and b=1)
+EXPLAIN (VERBOSE, COSTS OFF) SELECT
+y.a, (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b) AS b
+FROM ab y WHERE a = 1 AND b = 1;
+
+SELECT
+y.a, (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b) AS b
+FROM ab y WHERE a = 1 AND b = 1;
+
+--2 sublink in where clause can do pushdown qual(a=1 and b=1)
+EXPLAIN (VERBOSE, COSTS OFF) SELECT y.a FROM ab y
+WHERE a = 1 AND b = 1 AND a in (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b);
+
+SELECT y.a FROM ab y
+WHERE a = 1 AND b = 1 AND a in (SELECT x.b FROM ab x WHERE y.a=x.a AND y.b=x.b);
+
+--3 Nested sublink also supports pushdown qual
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT a.a, a.b,
+(SELECT max(b.a) AS max
+ FROM ab b, ab c
+ WHERE b.a=c.a AND b.b=c.b AND b.a=a.a AND b.b=a.b AND
+ (EXISTS
+ (SELECT a1.a
+ FROM ab a1
+ WHERE
+ a1.a = a.a AND --from uplevel 1
+ a1.b = b.b AND --frem uplevel 2
+ clock_timestamp() > '2020-12-11' --Keep sublink not eliminated
+ )
+ )
+) AS c
+FROM ab a WHERE a.a=1 AND a.b=1;
+
+--4 This feature does not conflict with pullUp sublink
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT a.a, a.b
+FROM ab a
+WHERE EXISTS (SELECT b.a
+ FROM ab b, ab c
+ WHERE b.a=c.a AND b.b=c.b AND b.a=a.a AND b.b=a.b AND
+ (EXISTS
+ (SELECT a1.a
+ FROM ab a1
+ WHERE a1.a = b.a AND a1.b = b.b
+ )
+ )
+) AND
+a.a=1 AND a.b=1;
+
+--5 aggrefs with multiple agglevelsup
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT
+ (SELECT
+ (SELECT sum(foo.a + bar.b) FROM ab jazz WHERE jazz.a=foo.a AND jazz.b=foo.b)
+ FROM ab bar WHERE bar.a=foo.a AND bar.b=foo.b
+ ) FROM ab foo WHERE foo.a=1 AND foo.b=1 GROUP BY a, b;
+
+--6 sublink in join on clause can not do pushdown
+EXPLAIN (VERBOSE, COSTS OFF) SELECT y.a
+FROM ab y JOIN ab z on ((y.a=z.a) AND (y.b=z.b) AND exists (SELECT count(*) FROM ab x WHERE x.a=y.a AND x.b=y.b))
+WHERE y.a = 1 AND y.b = 1;
+
+DROP SCHEMA test_push_qual_to_sublink CASCADE;
--
2.32.0 (Apple Git-132)