diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index ed62e3a0fc..f5042b6952 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -1376,6 +1376,11 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, int varno; Relids clause_varnos; Relids upper_varnos; + ListCell *lc; + List *clauses; + List *all_clauses = NIL; + Const *const_var; + bool first_elem; Assert(sublink->subLinkType == EXISTS_SUBLINK); @@ -1405,12 +1410,80 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, if (!simplify_EXISTS_query(root, subselect)) return NULL; + if (subselect->jointree->quals) + all_clauses = lappend(all_clauses, subselect->jointree->quals); + + subselect->jointree->quals = NULL; + + /* Gather all clauses in main list for the further consideration */ + all_clauses = list_concat(all_clauses, subselect->jointree->fromlist); + /* - * Separate out the WHERE clause. (We could theoretically also remove - * top-level plain JOIN/ON clauses, but it's probably not worth the - * trouble.) + * We will able to remove top-level plain JOIN/ON clauses if they are not outer join. */ - whereClause = subselect->jointree->quals; + clauses = NIL; + first_elem = true; + const_var = makeConst(BOOLOID, + -1, + InvalidOid, + sizeof(bool), + (Datum) 1, + false, + true); + foreach (lc, all_clauses) + { + Node *je = ((Node *) lfirst(lc)); + + whereClause = je; + if (IsA(je, RangeTblRef)) + goto end; + + if ((IsA(je, JoinExpr) && ((JoinExpr *)je)->jointype != JOIN_INNER)) + goto end; + + if (IsA(je, JoinExpr) && ((JoinExpr *)je)->quals != NULL) + whereClause = ((JoinExpr *)je)->quals; + + /* + * On the other hand, the WHERE clause must contain some Vars of the + * parent query, else it's not gonna be a join. + */ + if (!contain_vars_of_level(whereClause, 1)) + goto end; + + /* + * We don't risk optimizing if the WHERE clause is volatile, either. + */ + if (contain_volatile_functions(whereClause)) + goto end; + + /* + * In case of a successful attempt, replaces it with the correct condition + */ + if (IsA(je, JoinExpr)) + ((JoinExpr *)je)->quals = (Node *) const_var; + + clauses = lappend(clauses, whereClause); + + first_elem = false; + subselect->jointree->fromlist = list_delete_ptr(subselect->jointree->fromlist, lc); + + end: + if (first_elem) + return NULL; + } + + list_free(all_clauses); + + /* We don't have any uses for pull-up creation */ + if (clauses == NIL) + return NULL; + else + /* We can easily combine clauses through AND operator because they are independent */ + whereClause = list_length(clauses) > 1 ? + (Node *) makeBoolExpr(AND_EXPR, clauses, -1) : + (Node *) linitial(clauses); + subselect->jointree->quals = NULL; /* diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index ebc545e246..ab0d716a70 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -812,6 +812,157 @@ where exists ( from text_tbl ) ss where road.name = ss.f1 ); rollback; +-- Test case for exist sublink where we can consider some undependent expression +-- with outer link +-- +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) + SELECT 1 + FROM ta + WHERE EXISTS (SELECT 1 + FROM tb + JOIN tc + ON ta.id = tb.id); + QUERY PLAN +------------------------------------------------------------------------- + Nested Loop Semi Join (actual rows=2 loops=1) + -> Seq Scan on ta (actual rows=2 loops=1) + -> Nested Loop (actual rows=1 loops=2) + -> Index Only Scan using tb_pkey on tb (actual rows=1 loops=2) + Index Cond: (id = ta.id) + Heap Fetches: 2 + -> Seq Scan on tc (actual rows=1 loops=2) +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) + SELECT 1 + FROM ta + WHERE EXISTS (SELECT 1 + FROM tb + JOIN tc + ON ta.id = tc.id); + QUERY PLAN +------------------------------------------------------------------------- + Nested Loop Semi Join (actual rows=2 loops=1) + -> Seq Scan on ta (actual rows=2 loops=1) + -> Nested Loop (actual rows=1 loops=2) + -> Index Only Scan using tc_pkey on tc (actual rows=1 loops=2) + Index Cond: (id = ta.id) + Heap Fetches: 2 + -> Seq Scan on tb (actual rows=1 loops=2) +(7 rows) + +-- Join compound expression +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) + SELECT 1 + FROM ta + WHERE EXISTS (SELECT 1 + FROM tb + JOIN tc + ON ta.id = tc.id and + ta.id = tb.id); + QUERY PLAN +------------------------------------------------------------- + Hash Right Semi Join (actual rows=2 loops=1) + Hash Cond: (tc.id = ta.id) + -> Hash Join (actual rows=2 loops=1) + Hash Cond: (tb.id = tc.id) + -> Seq Scan on tb (actual rows=4 loops=1) + -> Hash (actual rows=2 loops=1) + Buckets: 4096 Batches: 1 Memory Usage: 33kB + -> Seq Scan on tc (actual rows=2 loops=1) + -> Hash (actual rows=2 loops=1) + Buckets: 4096 Batches: 1 Memory Usage: 33kB + -> Seq Scan on ta (actual rows=2 loops=1) +(11 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) + SELECT 1 + FROM ta + WHERE EXISTS (SELECT 1 + FROM tb + JOIN tc + ON ta.id = tc.id and + ta.id = tb.id); + QUERY PLAN +------------------------------------------------------------- + Hash Right Semi Join (actual rows=2 loops=1) + Hash Cond: (tc.id = ta.id) + -> Hash Join (actual rows=2 loops=1) + Hash Cond: (tb.id = tc.id) + -> Seq Scan on tb (actual rows=4 loops=1) + -> Hash (actual rows=2 loops=1) + Buckets: 4096 Batches: 1 Memory Usage: 33kB + -> Seq Scan on tc (actual rows=2 loops=1) + -> Hash (actual rows=2 loops=1) + Buckets: 4096 Batches: 1 Memory Usage: 33kB + -> Seq Scan on ta (actual rows=2 loops=1) +(11 rows) + +-- Compound expression with const type +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) + SELECT 1 + FROM ta + WHERE EXISTS (SELECT 1 + FROM tb + JOIN tc + ON ta.id = tc.id and + ta.id = 1); + QUERY PLAN +------------------------------------------------------------------------- + Nested Loop Semi Join (actual rows=1 loops=1) + -> Index Only Scan using ta_pkey on ta (actual rows=1 loops=1) + Index Cond: (id = 1) + Heap Fetches: 1 + -> Nested Loop (actual rows=1 loops=1) + -> Index Only Scan using tc_pkey on tc (actual rows=1 loops=1) + Index Cond: (id = 1) + Heap Fetches: 1 + -> Seq Scan on tb (actual rows=1 loops=1) +(9 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) + SELECT 1 + FROM ta + WHERE EXISTS (SELECT 1 + FROM tb + JOIN tc + ON tb.id = 1 and + ta.id = 1); + QUERY PLAN +------------------------------------------------------------------------- + Nested Loop Semi Join (actual rows=1 loops=1) + -> Index Only Scan using ta_pkey on ta (actual rows=1 loops=1) + Index Cond: (id = 1) + Heap Fetches: 1 + -> Nested Loop (actual rows=1 loops=1) + -> Index Only Scan using tb_pkey on tb (actual rows=1 loops=1) + Index Cond: (id = 1) + Heap Fetches: 1 + -> Seq Scan on tc (actual rows=1 loops=1) +(9 rows) + +-- Disabled pull up because it is applcapable for INNER JOIN connection +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) + SELECT 1 + FROM ta + WHERE EXISTS (SELECT 1 + FROM tb + RIGHT JOIN tc + ON ta.id = tc.id); + QUERY PLAN +------------------------------------------------------------ + Seq Scan on ta (actual rows=2 loops=1) + Filter: EXISTS(SubPlan 1) + SubPlan 1 + -> Nested Loop Left Join (actual rows=1 loops=2) + Join Filter: (ta.id = tc.id) + Rows Removed by Join Filter: 2 + -> Seq Scan on tc (actual rows=1 loops=2) + -> Materialize (actual rows=2 loops=2) + Storage: Memory Maximum Storage: 17kB + -> Seq Scan on tb (actual rows=4 loops=1) +(10 rows) + -- -- Test case for sublinks pushed down into subselects via join alias expansion -- diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index 6ed3636a9e..bd842d627c 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -439,6 +439,71 @@ where exists ( rollback; +-- Test case for exist sublink where we can consider some undependent expression +-- with outer link +-- +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) + SELECT 1 + FROM ta + WHERE EXISTS (SELECT 1 + FROM tb + JOIN tc + ON ta.id = tb.id); + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) + SELECT 1 + FROM ta + WHERE EXISTS (SELECT 1 + FROM tb + JOIN tc + ON ta.id = tc.id); + +-- Join compound expression +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) + SELECT 1 + FROM ta + WHERE EXISTS (SELECT 1 + FROM tb + JOIN tc + ON ta.id = tc.id and + ta.id = tb.id); + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) + SELECT 1 + FROM ta + WHERE EXISTS (SELECT 1 + FROM tb + JOIN tc + ON ta.id = tc.id and + ta.id = tb.id); + +-- Compound expression with const type +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) + SELECT 1 + FROM ta + WHERE EXISTS (SELECT 1 + FROM tb + JOIN tc + ON ta.id = tc.id and + ta.id = 1); + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) + SELECT 1 + FROM ta + WHERE EXISTS (SELECT 1 + FROM tb + JOIN tc + ON tb.id = 1 and + ta.id = 1); +-- Disabled pull up because it is applcapable for INNER JOIN connection +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) + SELECT 1 + FROM ta + WHERE EXISTS (SELECT 1 + FROM tb + RIGHT JOIN tc + ON ta.id = tc.id); + -- -- Test case for sublinks pushed down into subselects via join alias expansion --