diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index be92049..45b4104 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -27,6 +27,7 @@ #include "optimizer/prep.h" #include "optimizer/subselect.h" #include "optimizer/var.h" +#include "parser/parse_clause.h" #include "parser/parse_relation.h" #include "rewrite/rewriteManip.h" #include "utils/builtins.h" @@ -1176,7 +1177,7 @@ SS_process_ctes(PlannerInfo *root) */ JoinExpr * convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, - Relids available_rels) + bool under_not, Relids available_rels) { JoinExpr *result; Query *parse = root->parse; @@ -1191,6 +1192,16 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, Assert(sublink->subLinkType == ANY_SUBLINK); /* + * For the case of NOT IN, we can only perform this optimization if we are + * guaranteed that we'll never get NULL values from the subquery results. + * For example "SELECT 1 WHERE 3 NOT IN(1,2,NULL)", the query will produce + * 0 rows due to the fact that 3 <> NULL is unknown. + */ + if (under_not && + !targetListIsGuaranteedNotToHaveNulls(subselect->targetList)) + return NULL; + + /* * The sub-select must not refer to any Vars of the parent query. (Vars of * higher levels should be okay, though.) */ @@ -1256,7 +1267,7 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, * And finally, build the JoinExpr node. */ result = makeNode(JoinExpr); - result->jointype = JOIN_SEMI; + result->jointype = under_not ? JOIN_ANTI : JOIN_SEMI; result->isNatural = false; result->larg = NULL; /* caller must fill this in */ result->rarg = (Node *) rtr; diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index 776fe42..7ea165d 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -334,7 +334,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, /* Is it a convertible ANY or EXISTS clause? */ if (sublink->subLinkType == ANY_SUBLINK) { - if ((j = convert_ANY_sublink_to_join(root, sublink, + if ((j = convert_ANY_sublink_to_join(root, sublink, false, available_rels1)) != NULL) { /* Yes; insert the new join node into the join tree */ @@ -360,7 +360,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, return NULL; } if (available_rels2 != NULL && - (j = convert_ANY_sublink_to_join(root, sublink, + (j = convert_ANY_sublink_to_join(root, sublink, false, available_rels2)) != NULL) { /* Yes; insert the new join node into the join tree */ @@ -452,7 +452,61 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, if (sublink && IsA(sublink, SubLink)) { - if (sublink->subLinkType == EXISTS_SUBLINK) + if (sublink->subLinkType == ANY_SUBLINK) + { + if ((j = convert_ANY_sublink_to_join(root, sublink, true, + available_rels1)) != NULL) + { + /* Yes; insert the new join node into the join tree */ + j->larg = *jtlink1; + *jtlink1 = (Node *) j; + /* Recursively process pulled-up jointree nodes */ + j->rarg = pull_up_sublinks_jointree_recurse(root, + j->rarg, + &child_rels); + + /* + * Now recursively process the pulled-up quals. Because + * we are underneath a NOT, we can't pull up sublinks that + * reference the left-hand stuff, but it's still okay to + * pull up sublinks referencing j->rarg. + */ + j->quals = pull_up_sublinks_qual_recurse(root, + j->quals, + &j->rarg, + child_rels, + NULL, NULL); + /* Return NULL representing constant TRUE */ + return NULL; + } + if (available_rels2 != NULL && + (j = convert_ANY_sublink_to_join(root, sublink, true, + available_rels2)) != NULL) + { + /* Yes; insert the new join node into the join tree */ + j->larg = *jtlink2; + *jtlink2 = (Node *) j; + /* Recursively process pulled-up jointree nodes */ + j->rarg = pull_up_sublinks_jointree_recurse(root, + j->rarg, + &child_rels); + + /* + * Now recursively process the pulled-up quals. Because + * we are underneath a NOT, we can't pull up sublinks that + * reference the left-hand stuff, but it's still okay to + * pull up sublinks referencing j->rarg. + */ + j->quals = pull_up_sublinks_qual_recurse(root, + j->quals, + &j->rarg, + child_rels, + NULL, NULL); + /* Return NULL representing constant TRUE */ + return NULL; + } + } + else if (sublink->subLinkType == EXISTS_SUBLINK) { if ((j = convert_EXISTS_sublink_to_join(root, sublink, true, available_rels1)) != NULL) @@ -506,6 +560,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, return NULL; } } + } /* Else return it unmodified */ return node; diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index fcee137..062b6b1 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -2406,6 +2406,61 @@ assignSortGroupRef(TargetEntry *tle, List *tlist) return tle->ressortgroupref; } + +/* + * targetListIsGuaranteedNotToHaveNulls + * true if the given target list only contains columns that have + * NOT NULL constraints to protect them from getting NULL values + * and Constants that are not null. + * + * Note: if this function returns false then it does not mean that the + * targetList will have NULLs, it just means we can't guarantee + * that it does not. + * + * Note: resjunk targets are ignored here. + */ +bool +targetListIsGuaranteedNotToHaveNulls(List *targetList) +{ + ListCell *tl; + + foreach(tl, targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(tl); + + if (tle->resjunk) + continue; + + /* + * For Constant expressions we can tell directly if they're NULL or + * not. If we know the Const expression is not null then we can + * continue checking, but if we find the Const to be NULL then must + * return false + */ + if (IsA(tle->expr, Const)) + { + Const *expr = (Const *) tle->expr; + + if (expr->constisnull) + return false; + else + continue; /* It's okay */ + } + + /* + * We can't know if the target cannot contain NULL values if the + * target does not belong to a table. + */ + if (!OidIsValid(tle->resorigtbl)) + return false; + + + if (!get_attnotnull(tle->resorigtbl, tle->resorigcol)) + return false; + } + return true; +} + /* * targetIsInSortList * Is the given target item already in the sortlist? diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index 4b5ef99..c98adc7 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -816,6 +816,37 @@ get_opfamily_proc(Oid opfamily, Oid lefttype, Oid righttype, int16 procnum) /* ---------- ATTRIBUTE CACHES ---------- */ /* + * get_attnotnull + * Returns true if pg_attribute.attnotnull is true, otherwise returns + * false. An error is raised if no record is found for the relid/attnum. + * + * Note: Calling functions should be careful and test relid for InvalidOid + * before calling this function. + */ +bool +get_attnotnull(Oid relid, AttrNumber attnum) +{ + HeapTuple tp; + + tp = SearchSysCache2(ATTNUM, + ObjectIdGetDatum(relid), + Int16GetDatum(attnum)); + if (HeapTupleIsValid(tp)) + { + Form_pg_attribute att_tup = (Form_pg_attribute) GETSTRUCT(tp); + bool result = att_tup->attnotnull; + ReleaseSysCache(tp); + return result; + } + else + { + elog(ERROR, "cache lookup failed for attribute %d of relation %u", + attnum, relid); + return false; /* keep compiler quiet */ + } +} + +/* * get_attname * Given the relation id and the attribute number, * return the "attname" field from the attribute relation. diff --git a/src/include/optimizer/subselect.h b/src/include/optimizer/subselect.h index 5607e98..3e8bfe7 100644 --- a/src/include/optimizer/subselect.h +++ b/src/include/optimizer/subselect.h @@ -18,6 +18,7 @@ extern void SS_process_ctes(PlannerInfo *root); extern JoinExpr *convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, + bool under_not, Relids available_rels); extern JoinExpr *convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h index e9e7cdc..9aab0cb 100644 --- a/src/include/parser/parse_clause.h +++ b/src/include/parser/parse_clause.h @@ -47,5 +47,7 @@ extern List *addTargetToSortList(ParseState *pstate, TargetEntry *tle, bool resolveUnknown); extern Index assignSortGroupRef(TargetEntry *tle, List *tlist); extern bool targetIsInSortList(TargetEntry *tle, Oid sortop, List *sortList); +extern bool targetListIsGuaranteedNotToHaveNulls(List *targetList); + #endif /* PARSE_CLAUSE_H */ diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index f46460a..5e7d946 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -63,6 +63,7 @@ extern List *get_op_btree_interpretation(Oid opno); extern bool equality_ops_are_compatible(Oid opno1, Oid opno2); extern Oid get_opfamily_proc(Oid opfamily, Oid lefttype, Oid righttype, int16 procnum); +extern bool get_attnotnull(Oid relid, AttrNumber attnum); extern char *get_attname(Oid relid, AttrNumber attnum); extern char *get_relid_attribute_name(Oid relid, AttrNumber attnum); extern AttrNumber get_attnum(Oid relid, const char *attname); diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index ce15af7..c97571b 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -739,3 +739,65 @@ select * from int4_tbl where 0 (1 row) +-- +-- Check NOT IN performs ANTI JOIN when subquery columns are NOT NULL +-- and does not when subquery columns can contain NULLs. +-- +BEGIN; +CREATE TEMP TABLE a (id INT PRIMARY KEY, b_id INT NULL); +CREATE TEMP TABLE b (id INT NULL); +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE b_id NOT IN (SELECT id FROM b); + QUERY PLAN +------------------------------------ + Seq Scan on a + Filter: (NOT (hashed SubPlan 1)) + SubPlan 1 + -> Seq Scan on b +(4 rows) + +ALTER TABLE b ALTER COLUMN id SET NOT NULL; +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE b_id NOT IN (SELECT id FROM b); + QUERY PLAN +------------------------------ + Hash Anti Join + Hash Cond: (a.b_id = b.id) + -> Seq Scan on a + -> Hash + -> Seq Scan on b +(5 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE b_id NOT IN (SELECT id+1 FROM b); + QUERY PLAN +------------------------------------ + Seq Scan on a + Filter: (NOT (hashed SubPlan 1)) + SubPlan 1 + -> Seq Scan on b +(4 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE (id,b_id) NOT IN (SELECT 1,id FROM b); + QUERY PLAN +------------------------------ + Hash Anti Join + Hash Cond: (a.b_id = b.id) + Join Filter: (a.id = 1) + -> Seq Scan on a + -> Hash + -> Seq Scan on b +(6 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE (id,b_id) NOT IN (SELECT NULL::int,id FROM b); + QUERY PLAN +------------------------------------ + Seq Scan on a + Filter: (NOT (hashed SubPlan 1)) + SubPlan 1 + -> Seq Scan on b +(4 rows) + +ROLLBACK; diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index 326fd70..96b2c44 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -422,3 +422,32 @@ select * from int4_tbl where select * from int4_tbl where (case when f1 in (select unique1 from tenk1 a) then f1 else null end) in (select ten from tenk1 b); + +-- +-- Check NOT IN performs ANTI JOIN when subquery columns are NOT NULL +-- and does not when subquery columns can contain NULLs. +-- + +BEGIN; + +CREATE TEMP TABLE a (id INT PRIMARY KEY, b_id INT NULL); +CREATE TEMP TABLE b (id INT NULL); + +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE b_id NOT IN (SELECT id FROM b); + +ALTER TABLE b ALTER COLUMN id SET NOT NULL; + +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE b_id NOT IN (SELECT id FROM b); + +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE b_id NOT IN (SELECT id+1 FROM b); + +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE (id,b_id) NOT IN (SELECT 1,id FROM b); + +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE (id,b_id) NOT IN (SELECT NULL::int,id FROM b); + +ROLLBACK;