Evaluate expression at planning time for two more cases

Started by Surafel Temesgenover 5 years ago14 messages

surafel3000@gmail.com

over 5 years ago

1 attachment(s)

Hi,

In good written query IS NULL and IS NOT NULL check on primary and non null
constraints columns should not happen but if it is mentioned PostgreSQL
have to be smart enough for not checking every return result about null
value on primary key column. Instead it can be evaluate its truth value and
set the result only once. The attached patch evaluate and set the truth
value for null and not null check on primary column on planning time if the
relation attribute is not mention on nullable side of outer join.

Thought?

regards

Surafel

Attachments:

null_check_on_pkey_optimization_v1.patchtext/x-patch; charset=US-ASCII; name=null_check_on_pkey_optimization_v1.patchDownload

diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 750586fceb..417758f705 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -22,6 +22,7 @@
 #include "access/htup_details.h"
 #include "catalog/pg_aggregate.h"
 #include "catalog/pg_class.h"
+#include "catalog/pg_constraint.h"
 #include "catalog/pg_language.h"
 #include "catalog/pg_operator.h"
 #include "catalog/pg_proc.h"
@@ -42,6 +43,7 @@
 #include "parser/parse_agg.h"
 #include "parser/parse_coerce.h"
 #include "parser/parse_func.h"
+#include "parser/parsetree.h"
 #include "rewrite/rewriteManip.h"
 #include "tcop/tcopprot.h"
 #include "utils/acl.h"
@@ -90,6 +92,14 @@ typedef struct
 	char	   *prosrc;
 } inline_error_callback_arg;
 
+typedef struct check_null_side_state
+{
+	Relids		relids;			/* base relids within this subtree */
+	bool		contains_outer; /* does subtree contain outer join(s)? */
+	JoinType	jointype;		/* type of join */
+	List	   *sub_states;		/* List of states for subtree components */
+}			check_null_side_state;
+
 typedef struct
 {
 	char		max_hazard;		/* worst proparallel hazard found so far */
@@ -156,6 +166,9 @@ static Query *substitute_actual_srf_parameters(Query *expr,
 											   int nargs, List *args);
 static Node *substitute_actual_srf_parameters_mutator(Node *node,
 													  substitute_actual_srf_parameters_context *context);
+static bool check_null_side(PlannerInfo *root, int relid);
+static check_null_side_state * collect_jointree_data(Node *jtnode);
+
 
 
 /*****************************************************************************
@@ -2245,7 +2258,6 @@ rowtype_field_matches(Oid rowtypeid, int fieldnum,
 	return true;
 }
 
-
 /*--------------------
  * eval_const_expressions
  *
@@ -2626,6 +2638,7 @@ eval_const_expressions_mutator(Node *node,
 					{
 						has_null_input |= ((Const *) lfirst(arg))->constisnull;
 						all_null_input &= ((Const *) lfirst(arg))->constisnull;
+
 					}
 					else
 						has_nonconst_input = true;
@@ -3382,7 +3395,47 @@ eval_const_expressions_mutator(Node *node,
 
 					return makeBoolConst(result, false);
 				}
+				if (IsA(arg, Var) &&
+					((Var *) arg)->varlevelsup == 0 && context->root)
+				{
+					/*
+					 * Evaluate the test if it is on primary column and the
+					 * relation is not mentioned on nullable side of outer
+					 * join
+					 */
+					Var		   *var = (Var *) arg;
+					Query	   *parse = context->root->parse;
+					int			relid;
+					Bitmapset  *pkattnos;
+					Oid			constraintOid;
+					RangeTblEntry *rte;
 
+					relid = var->varno;
+					rte = rt_fetch(relid, parse->rtable);
+					pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid);
+
+					if (pkattnos != NULL && bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber, pkattnos)
+						&& !check_null_side(context->root, relid))
+					{
+						bool		result;
+
+						switch (ntest->nulltesttype)
+						{
+							case IS_NULL:
+								result = false;
+								break;
+							case IS_NOT_NULL:
+								result = true;
+								break;
+							default:
+								elog(ERROR, "unrecognized nulltesttype: %d",
+									 (int) ntest->nulltesttype);
+								result = false; /* keep compiler quiet */
+								break;
+						}
+						return makeBoolConst(result, false);
+					}
+				}
 				newntest = makeNode(NullTest);
 				newntest->arg = (Expr *) arg;
 				newntest->nulltesttype = ntest->nulltesttype;
@@ -3572,6 +3625,118 @@ eval_const_expressions_mutator(Node *node,
 	return ece_generic_processing(node);
 }
 
+/*
+ * Check a relation attributes on nullable side of the outer join.
+ */
+static bool
+check_null_side(PlannerInfo *root, int relid)
+{
+	check_null_side_state *state;
+	ListCell   *l;
+
+	state = collect_jointree_data((Node *) root->parse->jointree);
+
+	/* if no outer joins the relation is not on nullable side */
+	if (state == NULL || !state->contains_outer)
+		return false;
+
+	/* scan the state and check relation on nullable outer join side */
+	foreach(l, state->sub_states)
+	{
+		check_null_side_state *sub_state = (check_null_side_state *) lfirst(l);
+
+		if (sub_state->contains_outer)
+		{
+			if (sub_state->jointype == JOIN_LEFT)
+			{
+				check_null_side_state *right_state = (check_null_side_state *) lsecond(sub_state->sub_states);
+
+				if (bms_is_member(relid, right_state->relids))
+					return true;
+			}
+			if (sub_state->jointype == JOIN_RIGHT)
+			{
+				check_null_side_state *left_state = (check_null_side_state *) linitial(sub_state->sub_states);
+
+				if (bms_is_member(relid, left_state->relids))
+					return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+
+/*
+ * Collecting data about join. this function is similar to reduce_outer_joins_pass1
+ *
+ * Returns a state node describing the given jointree node.
+ */
+static check_null_side_state *
+collect_jointree_data(Node *jtnode)
+{
+	check_null_side_state *result;
+
+	result = (check_null_side_state *)
+		palloc(sizeof(check_null_side_state));
+	result->relids = NULL;
+	result->contains_outer = false;
+	result->sub_states = NIL;
+
+	if (jtnode == NULL)
+		return result;
+	if (IsA(jtnode, RangeTblRef))
+	{
+		int			varno = ((RangeTblRef *) jtnode)->rtindex;
+
+		result->relids = bms_make_singleton(varno);
+	}
+	else if (IsA(jtnode, FromExpr))
+	{
+		FromExpr   *f = (FromExpr *) jtnode;
+		ListCell   *l;
+
+		foreach(l, f->fromlist)
+		{
+			check_null_side_state *sub_state;
+
+			sub_state = collect_jointree_data(lfirst(l));
+			result->relids = bms_add_members(result->relids,
+											 sub_state->relids);
+			result->contains_outer |= sub_state->contains_outer;
+			result->sub_states = lappend(result->sub_states, sub_state);
+		}
+	}
+	else if (IsA(jtnode, JoinExpr))
+	{
+		JoinExpr   *j = (JoinExpr *) jtnode;
+		check_null_side_state *sub_state;
+
+		/* join's own RT index is not wanted in result->relids */
+		if (j->jointype == JOIN_RIGHT || j->jointype == JOIN_LEFT)
+			result->contains_outer = true;
+
+		sub_state = collect_jointree_data(j->larg);
+		result->jointype = j->jointype;
+		result->relids = bms_add_members(result->relids,
+										 sub_state->relids);
+		sub_state->contains_outer |= sub_state->contains_outer;
+		result->sub_states = lappend(result->sub_states, sub_state);
+
+		sub_state = collect_jointree_data(j->rarg);
+		result->jointype = j->jointype;
+		result->relids = bms_add_members(result->relids,
+										 sub_state->relids);
+		result->contains_outer |= sub_state->contains_outer;
+		result->sub_states = lappend(result->sub_states, sub_state);
+	}
+	else
+		elog(ERROR, "unrecognized node type: %d",
+			 (int) nodeTag(jtnode));
+	return result;
+}
+
 /*
  * Subroutine for eval_const_expressions: check for non-Const nodes.
  *
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index a46b1573bd..a42f58c9f8 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -4577,6 +4577,32 @@ SELECT b.* FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0
   1 |     
 (1 row)
 
+explain (costs off)
+SELECT * FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
+                QUERY PLAN                
+------------------------------------------
+ Hash Left Join
+   Hash Cond: (b.a_id = a.id)
+   Filter: ((a.id IS NULL) OR (a.id > 0))
+   ->  Seq Scan on b
+   ->  Hash
+         ->  Seq Scan on a
+(6 rows)
+
+explain (costs off)
+SELECT * FROM b RIGHT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
+                  QUERY PLAN                   
+-----------------------------------------------
+ Hash Right Join
+   Hash Cond: (b.a_id = a.id)
+   ->  Seq Scan on b
+   ->  Hash
+         ->  Bitmap Heap Scan on a
+               Recheck Cond: (id > 0)
+               ->  Bitmap Index Scan on a_pkey
+                     Index Cond: (id > 0)
+(8 rows)
+
 rollback;
 -- another join removal bug: this is not optimizable, either
 begin;
diff --git a/src/test/regress/expected/select.out b/src/test/regress/expected/select.out
index c441049f41..1514b5a05a 100644
--- a/src/test/regress/expected/select.out
+++ b/src/test/regress/expected/select.out
@@ -903,6 +903,22 @@ select unique1, unique2 from onek2
        0 |     998
 (2 rows)
 
+CREATE TEMP TABLE a (id int PRIMARY KEY);
+explain (costs off)
+SELECT * FROM a WHERE id IS NULL;
+        QUERY PLAN        
+--------------------------
+ Result
+   One-Time Filter: false
+(2 rows)
+
+explain (costs off)
+SELECT * FROM a WHERE id IS NOT NULL;
+  QUERY PLAN   
+---------------
+ Seq Scan on a
+(1 row)
+
 --
 -- Test some corner cases that have been known to confuse the planner
 --
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 1403e0ffe7..4146562c3b 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -1589,6 +1589,11 @@ INSERT INTO b VALUES (0, 0), (1, NULL);
 SELECT * FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
 SELECT b.* FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
 
+explain (costs off)
+SELECT * FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
+
+explain (costs off)
+SELECT * FROM b RIGHT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
 rollback;
 
 -- another join removal bug: this is not optimizable, either
diff --git a/src/test/regress/sql/select.sql b/src/test/regress/sql/select.sql
index b5929b2eca..6c4f4f1af8 100644
--- a/src/test/regress/sql/select.sql
+++ b/src/test/regress/sql/select.sql
@@ -234,6 +234,13 @@ select unique1, unique2 from onek2
 select unique1, unique2 from onek2
   where (unique2 = 11 and stringu1 < 'B') or unique1 = 0;
 
+CREATE TEMP TABLE a (id int PRIMARY KEY);
+explain (costs off)
+SELECT * FROM a WHERE id IS NULL;
+
+explain (costs off)
+SELECT * FROM a WHERE id IS NOT NULL;
+
 --
 -- Test some corner cases that have been known to confuse the planner
 --

Ashutosh Bapat

ashutosh.bapat.oss@gmail.com

over 5 years ago

In reply to: Surafel Temesgen (#1)

Re: Evaluate expression at planning time for two more cases

Hi Surafel,

On Thu, Aug 27, 2020 at 6:01 PM Surafel Temesgen <surafel3000@gmail.com> wrote:

Hi,

In good written query IS NULL and IS NOT NULL check on primary and non null constraints columns should not happen but if it is mentioned PostgreSQL have to be smart enough for not checking every return result about null value on primary key column. Instead it can be evaluate its truth value and set the result only once. The attached patch evaluate and set the truth value for null and not null check on primary column on planning time if the relation attribute is not mention on nullable side of outer join.

Thought?

Thanks for the patch. Such SQL may arise from not-so-smart SQL
generation tools. It will be useful to have this optimization. Here
are some comments on your patch.

}
else
has_nonconst_input = true;
@@ -3382,7 +3395,47 @@ eval_const_expressions_mutator(Node *node,

+
+                    if (pkattnos != NULL &&
bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber,
pkattnos)
+                        && !check_null_side(context->root, relid))

Since this is working on parse->rtable this will work only for top level tables
as against the inherited tables or partitions which may have their own primary
key constraints if the parent doesn't have those.

This better be done when planning individual relations, plain or join or upper,
where all the necessary information is already available with each of the
relations and also the quals, derived as well as user specified, are
distributed to individual relations where they should be evalutated. My memory
is hazy but it might be possible do this while distributing the quals
themselves (distribute_qual_to_rels()).

Said that, to me, this looks more like something we should be able to do at the
time of constraint exclusion. But IIRC, we just prove whether constraints
refute a qual and not necessarily whether constraints imply a qual, making it
redundant, as is required here. E.g. primary key constraint implies key NOT
NULL rendering a "key IS NOT NULL" qual redundant. It might be better to test
the case when col IS NOT NULL is specified on a column which already has a NOT
NULL constraint. That may be another direction to take. We may require much
lesser code.

With either of these two approaches, the amount of code changes might
be justified.

+explain (costs off)
+SELECT * FROM b RIGHT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL
OR a.id > 0);
+                  QUERY PLAN
+-----------------------------------------------
+ Hash Right Join
+   Hash Cond: (b.a_id = a.id)
+   ->  Seq Scan on b
+   ->  Hash
+         ->  Bitmap Heap Scan on a
+               Recheck Cond: (id > 0)
+               ->  Bitmap Index Scan on a_pkey
+                     Index Cond: (id > 0)
+(8 rows)

Thanks for the tests.

Please add the patch to the next commitfest https://commitfest.postgresql.org/.

--
Best Wishes,
Ashutosh Bapat

Surafel Temesgen

surafel3000@gmail.com

over 5 years ago

In reply to: Ashutosh Bapat (#2)

Re: Evaluate expression at planning time for two more cases

Hi ,

Thank you for looking into this

On Fri, Aug 28, 2020 at 9:48 AM Ashutosh Bapat <ashutosh.bapat.oss@gmail.com>
wrote:

}
else
has_nonconst_input = true;
@@ -3382,7 +3395,47 @@ eval_const_expressions_mutator(Node *node,
+
+                    if (pkattnos != NULL &&
bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber,
pkattnos)
+                        && !check_null_side(context->root, relid))
Since this is working on parse->rtable this will work only for top level
tables
as against the inherited tables or partitions which may have their own
primary
key constraints if the parent doesn't have those.

In that case the table have to be specified in from clause otherwise its
error

e.g postgres=# CREATE TABLE cities (

name text,

population float,

altitude int

);

CREATE TABLE

postgres=# CREATE TABLE capitals (

id serial primary key,

state char(2)

) INHERITS (cities);

CREATE TABLE

postgres=# EXPLAIN SELECT * FROM cities WHERE id is not null;

ERROR: column "id" does not exist

LINE 1: EXPLAIN SELECT * FROM cities WHERE id is not null;

Even it will not work on the child table because the primary key constraint
on the parent table is not in-force in the child table.

This better be done when planning individual relations, plain or join or
upper,
where all the necessary information is already available with each of the
relations and also the quals, derived as well as user specified, are
distributed to individual relations where they should be evalutated. My
memory
is hazy but it might be possible do this while distributing the quals
themselves (distribute_qual_to_rels()).

The place where all the necessary information available is on
reduce_outer_joins as the comment of the function states but the downside
is its will only be inexpensive if the query contains outer join

Said that, to me, this looks more like something we should be able to do
at the
time of constraint exclusion. But IIRC, we just prove whether constraints
refute a qual and not necessarily whether constraints imply a qual, making
it
redundant, as is required here. E.g. primary key constraint implies key NOT
NULL rendering a "key IS NOT NULL" qual redundant. It might be better to
test
the case when col IS NOT NULL is specified on a column which already has a
NOT
NULL constraint. That may be another direction to take. We may require much
lesser code.

I don’t add NOT NULL constraint optimization to the patch because cached
plan is not invalidation in case of a change in NOT NULL constraint

Please add the patch to the next commitfest
https://commitfest.postgresql.org/.

I add it is here https://commitfest.postgresql.org/29/2699/
Thank you

regards
Surafel

Tom Lane

tgl@sss.pgh.pa.us

over 5 years ago

In reply to: Surafel Temesgen (#3)

Re: Evaluate expression at planning time for two more cases

Surafel Temesgen <surafel3000@gmail.com> writes:

[ null_check_on_pkey_optimization_v1.patch ]

I took a very brief look at this.

I don’t add NOT NULL constraint optimization to the patch because cached
plan is not invalidation in case of a change in NOT NULL constraint

That's actually not a problem, even though some people (including me)
have bandied about such suppositions in the past. Relying on attnotnull
in the planner is perfectly safe [1]/messages/by-id/23564.1585885251@sss.pgh.pa.us. Plus it'd likely be cheaper as
well as more general than looking up pkey information. If we did need
to explicitly record the plan's dependency on a constraint, this patch
would be wrong anyhow because it fails to make any such notation about
the pkey constraint it relied on.

The "check_null_side" code you're proposing seems really horrid.
For one thing, it seems quite out of place for eval_const_expressions
to be doing that. For another, it's wrong in principle because
eval_const_expressions doesn't know which part of the query tree
it's being invoked on, so it cannot know whether outer-join
nullability is an issue. For another, doing that work over again
from scratch every time we see a potentially optimizable NullTest
looks expensive. (I wonder whether you have tried to measure the
performance penalty imposed by this patch in cases where it fails
to make any proof.)

I've been doing some handwaving about changing the representation
of Vars, with an eye to making it clear by inspection whether a
given Var is nullable by some lower outer join [2]/messages/by-id/15848.1576515643@sss.pgh.pa.us. If that work
ever comes to fruition then the need for "check_null_side" would
go away. So maybe we should put this idea on the back burner
until that happens.

I'm not sure what I think about Ashutosh's ideas about doing this
somewhere else than eval_const_expressions. I do not buy the argument
that it's interesting to do this separately for each child partition.
Child partitions that have attnotnull constraints different from their
parent's are at best a tiny minority use-case, if indeed we allow them
at all (I tend to think we shouldn't). On the other hand it's possible
that postponing the check would allow bypassing the outer-join problem,
ie if we only do it for quals that have dropped down to the relation
scan level then we don't need to worry about outer join effects.

Another angle here is that eval_const_expressions runs before
reduce_outer_joins, meaning that if it's doing things that depend
on outer-join-ness then it will sometimes fail to optimize cases
that could be optimized. As a not incidental example, consider

select ... from t1 left join t2 on (...) where t2.x is not null;

reduce_outer_joins will realize that the left join can be reduced
to a plain join, whereupon (if t2.x is attnotnull) the WHERE clause
really is constant-true --- and this seems like a poster-child case
for it being useful to optimize away the WHERE clause. But
we won't be able to detect that if we apply the optimization during
eval_const_expressions. So maybe that's a good reason to do it
somewhere later.

regards, tom lane

[1]: /messages/by-id/23564.1585885251@sss.pgh.pa.us
[2]: /messages/by-id/15848.1576515643@sss.pgh.pa.us

Surafel Temesgen

surafel3000@gmail.com

over 5 years ago

In reply to: Tom Lane (#4)

Re: Evaluate expression at planning time for two more cases

Hi Tom

On Tue, Sep 8, 2020 at 4:46 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:

Surafel Temesgen <surafel3000@gmail.com> writes:

[ null_check_on_pkey_optimization_v1.patch ]

I took a very brief look at this.

I don’t add NOT NULL constraint optimization to the patch because cached
plan is not invalidation in case of a change in NOT NULL constraint

That's actually not a problem, even though some people (including me)
have bandied about such suppositions in the past. Relying on attnotnull
in the planner is perfectly safe [1]. Plus it'd likely be cheaper as
well as more general than looking up pkey information. If we did need
to explicitly record the plan's dependency on a constraint, this patch
would be wrong anyhow because it fails to make any such notation about
the pkey constraint it relied on.

ok thank you. I will change my next patch accordingly

The "check_null_side" code you're proposing seems really horrid.
For one thing, it seems quite out of place for eval_const_expressions
to be doing that. For another, it's wrong in principle because
eval_const_expressions doesn't know which part of the query tree
it's being invoked on, so it cannot know whether outer-join
nullability is an issue. For another, doing that work over again
from scratch every time we see a potentially optimizable NullTest
looks expensive. (I wonder whether you have tried to measure the
performance penalty imposed by this patch in cases where it fails
to make any proof.)

I was thinking about collecting data about joins only once at the start of
eval_const_expressions but I assume most queries don't have NULL check
expressions and postpone it until we find one. Thinking about it again I
think it can be done better by storing check_null_side_state into
eval_const_expressions_context to use it for subsequent evaluation.

I'm not sure what I think about Ashutosh's ideas about doing this

somewhere else than eval_const_expressions. I do not buy the argument
that it's interesting to do this separately for each child partition.
Child partitions that have attnotnull constraints different from their
parent's are at best a tiny minority use-case, if indeed we allow them
at all (I tend to think we shouldn't). On the other hand it's possible
that postponing the check would allow bypassing the outer-join problem,
ie if we only do it for quals that have dropped down to the relation
scan level then we don't need to worry about outer join effects.

At eval_const_expressions we check every expression and optimize it if
possible. Introducing other check and optimization mechanism to same other
place just for this optimization seems expensive with respect to
performance penalty to me

Another angle here is that eval_const_expressions runs before
reduce_outer_joins, meaning that if it's doing things that depend
on outer-join-ness then it will sometimes fail to optimize cases
that could be optimized. As a not incidental example, consider

select ... from t1 left join t2 on (...) where t2.x is not null;

reduce_outer_joins will realize that the left join can be reduced
to a plain join, whereupon (if t2.x is attnotnull) the WHERE clause
really is constant-true --- and this seems like a poster-child case
for it being useful to optimize away the WHERE clause. But
we won't be able to detect that if we apply the optimization during
eval_const_expressions. So maybe that's a good reason to do it
somewhere later.

In this case the expression not changed to constant-true because the
relation is on nullable side of outer join

regards
Surafel

Ashutosh Bapat

ashutosh.bapat@2ndquadrant.com

over 5 years ago

In reply to: Tom Lane (#4)

Re: Evaluate expression at planning time for two more cases

On Tue, 8 Sep 2020 at 07:16, Tom Lane <tgl@sss.pgh.pa.us> wrote:

I'm not sure what I think about Ashutosh's ideas about doing this
somewhere else than eval_const_expressions. I do not buy the argument
that it's interesting to do this separately for each child partition.
Child partitions that have attnotnull constraints different from their
parent's are at best a tiny minority use-case, if indeed we allow them
at all (I tend to think we shouldn't).

I agree about partitions. But, IMO, a child having constraints different
from that of a parent is more common in inheritance trees.

Another point I raised in my mail was about constraint exclusion. Why
aren't these clauses constant-folded by constraint exclusion? Sorry, I
haven't looked at the constraint exclusion code myself for this.

As a not incidental example, consider

select ... from t1 left join t2 on (...) where t2.x is not null;

reduce_outer_joins will realize that the left join can be reduced
to a plain join, whereupon (if t2.x is attnotnull) the WHERE clause
really is constant-true --- and this seems like a poster-child case
for it being useful to optimize away the WHERE clause. But
we won't be able to detect that if we apply the optimization during
eval_const_expressions. So maybe that's a good reason to do it
somewhere later.

--
Best Wishes,
Ashutosh

Surafel Temesgen

surafel3000@gmail.com

over 5 years ago

In reply to: Surafel Temesgen (#5)

1 attachment(s)

Re: Evaluate expression at planning time for two more cases

On Tue, Sep 8, 2020 at 12:59 PM Surafel Temesgen <surafel3000@gmail.com>
wrote:

Hi Tom

On Tue, Sep 8, 2020 at 4:46 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:

The "check_null_side" code you're proposing seems really horrid.
For one thing, it seems quite out of place for eval_const_expressions
to be doing that. For another, it's wrong in principle because
eval_const_expressions doesn't know which part of the query tree
it's being invoked on, so it cannot know whether outer-join
nullability is an issue. For another, doing that work over again
from scratch every time we see a potentially optimizable NullTest
looks expensive. (I wonder whether you have tried to measure the
performance penalty imposed by this patch in cases where it fails
to make any proof.)

I was thinking about collecting data about joins only once at the start of
eval_const_expressions but I assume most queries don't have NULL check
expressions and postpone it until we find one. Thinking about it again I
think it can be done better by storing check_null_side_state into
eval_const_expressions_context to use it for subsequent evaluation.

Attached patch does like the above and includes NOT NULL constraint column.

regards

Surafel

Attachments:

null_check_on_pkey_optimization_v2.patchtext/x-patch; charset=US-ASCII; name=null_check_on_pkey_optimization_v2.patchDownload

diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 750586fceb..5fe4d88b5d 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -20,8 +20,10 @@
 #include "postgres.h"
 
 #include "access/htup_details.h"
+#include "access/table.h"
 #include "catalog/pg_aggregate.h"
 #include "catalog/pg_class.h"
+#include "catalog/pg_constraint.h"
 #include "catalog/pg_language.h"
 #include "catalog/pg_operator.h"
 #include "catalog/pg_proc.h"
@@ -39,6 +41,7 @@
 #include "optimizer/plancat.h"
 #include "optimizer/planmain.h"
 #include "parser/analyze.h"
+#include "parser/parsetree.h"
 #include "parser/parse_agg.h"
 #include "parser/parse_coerce.h"
 #include "parser/parse_func.h"
@@ -50,6 +53,7 @@
 #include "utils/fmgroids.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
+#include "utils/rel.h"
 #include "utils/syscache.h"
 #include "utils/typcache.h"
 
@@ -61,6 +65,14 @@ typedef struct
 	AggClauseCosts *costs;
 } get_agg_clause_costs_context;
 
+typedef struct check_null_side_state
+{
+	Relids		relids;			/* base relids within this subtree */
+	bool		contains_outer; /* does subtree contain outer join(s)? */
+	JoinType	jointype;		/* type of join */
+	List	   *sub_states;		/* List of states for subtree components */
+}			check_null_side_state;
+
 typedef struct
 {
 	ParamListInfo boundParams;
@@ -68,6 +80,7 @@ typedef struct
 	List	   *active_fns;
 	Node	   *case_val;
 	bool		estimate;
+	check_null_side_state *state;
 } eval_const_expressions_context;
 
 typedef struct
@@ -156,6 +169,9 @@ static Query *substitute_actual_srf_parameters(Query *expr,
 											   int nargs, List *args);
 static Node *substitute_actual_srf_parameters_mutator(Node *node,
 													  substitute_actual_srf_parameters_context *context);
+static bool check_null_side(PlannerInfo *root, int relid, eval_const_expressions_context *context);
+static check_null_side_state * collect_jointree_data(Node *jtnode);
+
 
 
 /*****************************************************************************
@@ -2296,6 +2312,7 @@ eval_const_expressions(PlannerInfo *root, Node *node)
 	context.active_fns = NIL;	/* nothing being recursively simplified */
 	context.case_val = NULL;	/* no CASE being examined */
 	context.estimate = false;	/* safe transformations only */
+	context.state= NULL;
 	return eval_const_expressions_mutator(node, &context);
 }
 
@@ -2626,6 +2643,7 @@ eval_const_expressions_mutator(Node *node,
 					{
 						has_null_input |= ((Const *) lfirst(arg))->constisnull;
 						all_null_input &= ((Const *) lfirst(arg))->constisnull;
+
 					}
 					else
 						has_nonconst_input = true;
@@ -3382,7 +3400,52 @@ eval_const_expressions_mutator(Node *node,
 
 					return makeBoolConst(result, false);
 				}
+				if (IsA(arg, Var) &&
+					((Var *) arg)->varlevelsup == 0 && context->root)
+				{
+					/*
+					 * Evaluate the test if it is on NOT NULL Constraint column and the
+					 * relation is not mentioned on nullable side of outer
+					 * join
+					 */
+					Var		   *var = (Var *) arg;
+					Query	   *parse = context->root->parse;
+					int			relid;
+					RangeTblEntry *rte;
 
+					relid = var->varno;
+					rte = rt_fetch(relid, parse->rtable);
+					if (rte->relkind ==RELKIND_RELATION)
+					{
+						Relation        rel;
+						Form_pg_attribute att;
+						rel = table_open(rte->relid, NoLock);
+						att = TupleDescAttr(rel->rd_att, var->varattno - 1);
+
+						if (att->attnotnull && !check_null_side(context->root, relid, context))
+						{
+							bool		result;
+
+							switch (ntest->nulltesttype)
+							{
+								case IS_NULL:
+									result = false;
+									break;
+								case IS_NOT_NULL:
+									result = true;
+									break;
+								default:
+									elog(ERROR, "unrecognized nulltesttype: %d",
+										 (int) ntest->nulltesttype);
+									result = false; /* keep compiler quiet */
+									break;
+							}
+							table_close(rel,NoLock);
+							return makeBoolConst(result, false);
+						}
+						table_close(rel,NoLock);
+					}
+				}
 				newntest = makeNode(NullTest);
 				newntest->arg = (Expr *) arg;
 				newntest->nulltesttype = ntest->nulltesttype;
@@ -3572,6 +3635,131 @@ eval_const_expressions_mutator(Node *node,
 	return ece_generic_processing(node);
 }
 
+/*
+ * Check a relation attributes on nullable side of the outer join.
+ */
+static bool
+check_null_side(PlannerInfo *root, int relid, eval_const_expressions_context *context)
+{
+	ListCell   *l;
+	check_null_side_state *state	 = context->state;
+
+	if (!state)
+	{
+		state = collect_jointree_data((Node *) root->parse->jointree);
+		context->state = state;
+	}
+
+	/* if no outer joins the relation is not on nullable side */
+	if (state == NULL || !state->contains_outer)
+		return false;
+
+	/* scan the state and check relation on nullable outer join side */
+	foreach(l, state->sub_states)
+	{
+		check_null_side_state *sub_state = (check_null_side_state *) lfirst(l);
+
+		if (sub_state->contains_outer)
+		{
+			if (sub_state->jointype == JOIN_LEFT)
+			{
+				check_null_side_state *right_state = (check_null_side_state *) lsecond(sub_state->sub_states);
+
+				if (bms_is_member(relid, right_state->relids))
+					return true;
+			}
+
+			if (sub_state->jointype == JOIN_RIGHT)
+			{
+				check_null_side_state *left_state = (check_null_side_state *) linitial(sub_state->sub_states);
+
+				if (bms_is_member(relid, left_state->relids))
+					return true;
+			}
+
+			if (sub_state->jointype == JOIN_FULL)
+			{
+				check_null_side_state *left_state = (check_null_side_state *) linitial(sub_state->sub_states);
+				check_null_side_state *right_state = (check_null_side_state *) lsecond(sub_state->sub_states);
+
+				if (bms_is_member(relid, left_state->relids) ||bms_is_member(relid, right_state->relids))
+					return true;
+			}
+
+		}
+	}
+	return false;
+}
+
+/*
+ * Collecting data about join. this function is similar to reduce_outer_joins_pass1
+ *
+ * Returns a state node describing the given jointree node.
+ */
+static check_null_side_state *
+collect_jointree_data(Node *jtnode)
+{
+	check_null_side_state *result;
+
+	result = (check_null_side_state *)
+		palloc(sizeof(check_null_side_state));
+	result->relids = NULL;
+	result->contains_outer = false;
+	result->sub_states = NIL;
+
+	if (jtnode == NULL)
+		return result;
+	if (IsA(jtnode, RangeTblRef))
+	{
+		int			varno = ((RangeTblRef *) jtnode)->rtindex;
+
+		result->relids = bms_make_singleton(varno);
+	}
+	else if (IsA(jtnode, FromExpr))
+	{
+		FromExpr   *f = (FromExpr *) jtnode;
+		ListCell   *l;
+
+		foreach(l, f->fromlist)
+		{
+			check_null_side_state *sub_state;
+
+			sub_state = collect_jointree_data(lfirst(l));
+			result->relids = bms_add_members(result->relids,
+											 sub_state->relids);
+			result->contains_outer |= sub_state->contains_outer;
+			result->sub_states = lappend(result->sub_states, sub_state);
+		}
+	}
+	else if (IsA(jtnode, JoinExpr))
+	{
+		JoinExpr   *j = (JoinExpr *) jtnode;
+		check_null_side_state *sub_state;
+
+		/* join's own RT index is not wanted in result->relids */
+		if (IS_OUTER_JOIN(j->jointype))
+			result->contains_outer = true;
+
+		sub_state = collect_jointree_data(j->larg);
+		result->jointype = j->jointype;
+		result->relids = bms_add_members(result->relids,
+										 sub_state->relids);
+		sub_state->contains_outer |= sub_state->contains_outer;
+		result->sub_states = lappend(result->sub_states, sub_state);
+
+		sub_state = collect_jointree_data(j->rarg);
+		result->jointype = j->jointype;
+		result->relids = bms_add_members(result->relids,
+										 sub_state->relids);
+		result->contains_outer |= sub_state->contains_outer;
+		result->sub_states = lappend(result->sub_states, sub_state);
+	}
+	else
+		elog(ERROR, "unrecognized node type: %d",
+			 (int) nodeTag(jtnode));
+	return result;
+}
+
 /*
  * Subroutine for eval_const_expressions: check for non-Const nodes.
  *
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index a46b1573bd..a42f58c9f8 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -4577,6 +4577,32 @@ SELECT b.* FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0
   1 |     
 (1 row)
 
+explain (costs off)
+SELECT * FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
+                QUERY PLAN                
+------------------------------------------
+ Hash Left Join
+   Hash Cond: (b.a_id = a.id)
+   Filter: ((a.id IS NULL) OR (a.id > 0))
+   ->  Seq Scan on b
+   ->  Hash
+         ->  Seq Scan on a
+(6 rows)
+
+explain (costs off)
+SELECT * FROM b RIGHT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
+                  QUERY PLAN                   
+-----------------------------------------------
+ Hash Right Join
+   Hash Cond: (b.a_id = a.id)
+   ->  Seq Scan on b
+   ->  Hash
+         ->  Bitmap Heap Scan on a
+               Recheck Cond: (id > 0)
+               ->  Bitmap Index Scan on a_pkey
+                     Index Cond: (id > 0)
+(8 rows)
+
 rollback;
 -- another join removal bug: this is not optimizable, either
 begin;
diff --git a/src/test/regress/expected/select.out b/src/test/regress/expected/select.out
index c441049f41..ef2df889bd 100644
--- a/src/test/regress/expected/select.out
+++ b/src/test/regress/expected/select.out
@@ -903,6 +903,37 @@ select unique1, unique2 from onek2
        0 |     998
 (2 rows)
 
+CREATE TEMP TABLE a (id int PRIMARY KEY, name text NOT NULL);
+explain (costs off)
+SELECT * FROM a WHERE id IS NULL;
+        QUERY PLAN        
+--------------------------
+ Result
+   One-Time Filter: false
+(2 rows)
+
+explain (costs off)
+SELECT * FROM a WHERE id IS NOT NULL;
+  QUERY PLAN   
+---------------
+ Seq Scan on a
+(1 row)
+
+explain (costs off)
+SELECT * FROM a WHERE name IS NOT NULL;
+  QUERY PLAN   
+---------------
+ Seq Scan on a
+(1 row)
+
+explain (costs off)
+SELECT * FROM a WHERE name IS NULL;
+        QUERY PLAN        
+--------------------------
+ Result
+   One-Time Filter: false
+(2 rows)
+
 --
 -- Test some corner cases that have been known to confuse the planner
 --
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 1403e0ffe7..4146562c3b 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -1589,6 +1589,11 @@ INSERT INTO b VALUES (0, 0), (1, NULL);
 SELECT * FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
 SELECT b.* FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
 
+explain (costs off)
+SELECT * FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
+
+explain (costs off)
+SELECT * FROM b RIGHT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
 rollback;
 
 -- another join removal bug: this is not optimizable, either
diff --git a/src/test/regress/sql/select.sql b/src/test/regress/sql/select.sql
index b5929b2eca..13c777863e 100644
--- a/src/test/regress/sql/select.sql
+++ b/src/test/regress/sql/select.sql
@@ -234,6 +234,19 @@ select unique1, unique2 from onek2
 select unique1, unique2 from onek2
   where (unique2 = 11 and stringu1 < 'B') or unique1 = 0;
 
+CREATE TEMP TABLE a (id int PRIMARY KEY, name text NOT NULL);
+explain (costs off)
+SELECT * FROM a WHERE id IS NULL;
+
+explain (costs off)
+SELECT * FROM a WHERE id IS NOT NULL;
+
+explain (costs off)
+SELECT * FROM a WHERE name IS NOT NULL;
+
+explain (costs off)
+SELECT * FROM a WHERE name IS NULL;
+
 --
 -- Test some corner cases that have been known to confuse the planner
 --

Pavel Borisov

pashkin.elfe@gmail.com

about 5 years ago

In reply to: Surafel Temesgen (#7)

Re: Evaluate expression at planning time for two more cases

Thank you for working on this!
I got slightly into this patch. I can be wrong, but my opinion is that planner/optimizer-related changes are not without dangers generally. So anyway, they should be justified by performance increase, or the previous behavior should be considered totally wrong. Patching the thing which is just a little sub-optimal seems for me seems not necessary.

So it would be very good to see measurements of a performance gain from this patch. And also I think tests with partitioned and inherited relations for demonstration of the right work in the cases discussed in the thread should be a must-do for this patch.

--
Best regards,
Pavel Borisov

Postgres Professional: http://postgrespro.com

The new status of this patch is: Waiting on Author

Surafel Temesgen

surafel3000@gmail.com

about 5 years ago

In reply to: Surafel Temesgen (#7)

Re: Evaluate expression at planning time for two more cases

Hi Pavel Borisov,
It's always good to select the optimal way even if it didn't have
performance gain
but in case of this patch i see 4x speed up on my laptop and it will work
on any
table that have NULL constraint

regards
Surafel

#10

Ibrar Ahmed

ibrar.ahmad@gmail.com

almost 5 years ago

In reply to: Surafel Temesgen (#9)

1 attachment(s)

Re: Evaluate expression at planning time for two more cases

On Tue, Nov 24, 2020 at 12:47 PM Surafel Temesgen <surafel3000@gmail.com>
wrote:

Hi Pavel Borisov,
It's always good to select the optimal way even if it didn't have
performance gain
but in case of this patch i see 4x speed up on my laptop and it will work
on any
table that have NULL constraint

regards
Surafel

The patch (null_check_on_pkey_optimization_v2.patch) does not apply
successfully.
http://cfbot.cputube.org/patch_32_2699.log

1 out of 10 hunks FAILED -- saving rejects to file
src/backend/optimizer/util/clauses.c.rej

It was a minor change therefore I rebased the patch, please take a look.

--
Ibrar Ahmed

Attachments:

null_check_on_pkey_optimization_v3.patchapplication/octet-stream; name=null_check_on_pkey_optimization_v3.patchDownload

diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index f3786dd2b6..aa09541439 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -20,8 +20,10 @@
 #include "postgres.h"
 
 #include "access/htup_details.h"
+#include "access/table.h"
 #include "catalog/pg_aggregate.h"
 #include "catalog/pg_class.h"
+#include "catalog/pg_constraint.h"
 #include "catalog/pg_language.h"
 #include "catalog/pg_operator.h"
 #include "catalog/pg_proc.h"
@@ -40,6 +42,7 @@
 #include "optimizer/plancat.h"
 #include "optimizer/planmain.h"
 #include "parser/analyze.h"
+#include "parser/parsetree.h"
 #include "parser/parse_agg.h"
 #include "parser/parse_coerce.h"
 #include "parser/parse_func.h"
@@ -51,9 +54,18 @@
 #include "utils/fmgroids.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
+#include "utils/rel.h"
 #include "utils/syscache.h"
 #include "utils/typcache.h"
 
+typedef struct check_null_side_state
+{
+	Relids		relids;			/* base relids within this subtree */
+	bool		contains_outer; /* does subtree contain outer join(s)? */
+	JoinType	jointype;		/* type of join */
+	List	   *sub_states;		/* List of states for subtree components */
+} check_null_side_state;
+
 typedef struct
 {
 	ParamListInfo boundParams;
@@ -61,6 +73,7 @@ typedef struct
 	List	   *active_fns;
 	Node	   *case_val;
 	bool		estimate;
+	check_null_side_state *state;
 } eval_const_expressions_context;
 
 typedef struct
@@ -147,6 +160,9 @@ static Query *substitute_actual_srf_parameters(Query *expr,
 											   int nargs, List *args);
 static Node *substitute_actual_srf_parameters_mutator(Node *node,
 													  substitute_actual_srf_parameters_context *context);
+static bool check_null_side(PlannerInfo *root, int relid, eval_const_expressions_context *context);
+static check_null_side_state * collect_jointree_data(Node *jtnode);
+
 
 
 /*****************************************************************************
@@ -2031,6 +2047,7 @@ eval_const_expressions(PlannerInfo *root, Node *node)
 	context.active_fns = NIL;	/* nothing being recursively simplified */
 	context.case_val = NULL;	/* no CASE being examined */
 	context.estimate = false;	/* safe transformations only */
+	context.state= NULL;
 	return eval_const_expressions_mutator(node, &context);
 }
 
@@ -2361,6 +2378,7 @@ eval_const_expressions_mutator(Node *node,
 					{
 						has_null_input |= ((Const *) lfirst(arg))->constisnull;
 						all_null_input &= ((Const *) lfirst(arg))->constisnull;
+
 					}
 					else
 						has_nonconst_input = true;
@@ -3122,7 +3140,52 @@ eval_const_expressions_mutator(Node *node,
 
 					return makeBoolConst(result, false);
 				}
+				if (IsA(arg, Var) &&
+					((Var *) arg)->varlevelsup == 0 && context->root)
+				{
+					/*
+					 * Evaluate the test if it is on NOT NULL Constraint column and the
+					 * relation is not mentioned on nullable side of outer
+					 * join
+					 */
+					Var		   *var = (Var *) arg;
+					Query	   *parse = context->root->parse;
+					int			relid;
+					RangeTblEntry *rte;
+
+					relid = var->varno;
+					rte = rt_fetch(relid, parse->rtable);
+					if (rte->relkind ==RELKIND_RELATION)
+					{
+						Relation        rel;
+						Form_pg_attribute att;
+						rel = table_open(rte->relid, NoLock);
+						att = TupleDescAttr(rel->rd_att, var->varattno - 1);
 
+						if (att->attnotnull && !check_null_side(context->root, relid, context))
+						{
+							bool		result;
+
+							switch (ntest->nulltesttype)
+							{
+								case IS_NULL:
+									result = false;
+									break;
+								case IS_NOT_NULL:
+									result = true;
+									break;
+								default:
+									elog(ERROR, "unrecognized nulltesttype: %d",
+										 (int) ntest->nulltesttype);
+									result = false; /* keep compiler quiet */
+									break;
+							}
+							table_close(rel,NoLock);
+							return makeBoolConst(result, false);
+						}
+						table_close(rel,NoLock);
+					}
+				}
 				newntest = makeNode(NullTest);
 				newntest->arg = (Expr *) arg;
 				newntest->nulltesttype = ntest->nulltesttype;
@@ -3312,6 +3375,131 @@ eval_const_expressions_mutator(Node *node,
 	return ece_generic_processing(node);
 }
 
+/*
+ * Check a relation attributes on nullable side of the outer join.
+ */
+static bool
+check_null_side(PlannerInfo *root, int relid, eval_const_expressions_context *context)
+{
+	ListCell   *l;
+	check_null_side_state *state	 = context->state;
+
+	if (!state)
+	{
+		state = collect_jointree_data((Node *) root->parse->jointree);
+		context->state = state;
+	}
+
+	/* if no outer joins the relation is not on nullable side */
+	if (state == NULL || !state->contains_outer)
+		return false;
+
+	/* scan the state and check relation on nullable outer join side */
+	foreach(l, state->sub_states)
+	{
+		check_null_side_state *sub_state = (check_null_side_state *) lfirst(l);
+
+		if (sub_state->contains_outer)
+		{
+			if (sub_state->jointype == JOIN_LEFT)
+			{
+				check_null_side_state *right_state = (check_null_side_state *) lsecond(sub_state->sub_states);
+
+				if (bms_is_member(relid, right_state->relids))
+					return true;
+			}
+
+			if (sub_state->jointype == JOIN_RIGHT)
+			{
+				check_null_side_state *left_state = (check_null_side_state *) linitial(sub_state->sub_states);
+
+				if (bms_is_member(relid, left_state->relids))
+					return true;
+			}
+
+			if (sub_state->jointype == JOIN_FULL)
+			{
+				check_null_side_state *left_state = (check_null_side_state *) linitial(sub_state->sub_states);
+				check_null_side_state *right_state = (check_null_side_state *) lsecond(sub_state->sub_states);
+
+				if (bms_is_member(relid, left_state->relids) ||bms_is_member(relid, right_state->relids))
+					return true;
+			}
+
+		}
+	}
+	return false;
+}
+
+/*
+ * Collecting data about join. this function is similar to reduce_outer_joins_pass1
+ *
+ * Returns a state node describing the given jointree node.
+ */
+static check_null_side_state *
+collect_jointree_data(Node *jtnode)
+{
+	check_null_side_state *result;
+
+	result = (check_null_side_state *)
+		palloc(sizeof(check_null_side_state));
+	result->relids = NULL;
+	result->contains_outer = false;
+	result->sub_states = NIL;
+
+	if (jtnode == NULL)
+		return result;
+	if (IsA(jtnode, RangeTblRef))
+	{
+		int			varno = ((RangeTblRef *) jtnode)->rtindex;
+
+		result->relids = bms_make_singleton(varno);
+	}
+	else if (IsA(jtnode, FromExpr))
+	{
+		FromExpr   *f = (FromExpr *) jtnode;
+		ListCell   *l;
+
+		foreach(l, f->fromlist)
+		{
+			check_null_side_state *sub_state;
+
+			sub_state = collect_jointree_data(lfirst(l));
+			result->relids = bms_add_members(result->relids,
+											 sub_state->relids);
+			result->contains_outer |= sub_state->contains_outer;
+			result->sub_states = lappend(result->sub_states, sub_state);
+		}
+	}
+	else if (IsA(jtnode, JoinExpr))
+	{
+		JoinExpr   *j = (JoinExpr *) jtnode;
+		check_null_side_state *sub_state;
+
+		/* join's own RT index is not wanted in result->relids */
+		if (IS_OUTER_JOIN(j->jointype))
+			result->contains_outer = true;
+
+		sub_state = collect_jointree_data(j->larg);
+		result->jointype = j->jointype;
+		result->relids = bms_add_members(result->relids,
+										 sub_state->relids);
+		sub_state->contains_outer |= sub_state->contains_outer;
+		result->sub_states = lappend(result->sub_states, sub_state);
+
+		sub_state = collect_jointree_data(j->rarg);
+		result->jointype = j->jointype;
+		result->relids = bms_add_members(result->relids,
+										 sub_state->relids);
+		result->contains_outer |= sub_state->contains_outer;
+		result->sub_states = lappend(result->sub_states, sub_state);
+	}
+	else
+		elog(ERROR, "unrecognized node type: %d",
+			 (int) nodeTag(jtnode));
+	return result;
+}
+
 /*
  * Subroutine for eval_const_expressions: check for non-Const nodes.
  *
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 5c7528c029..e9ef4d0871 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -4678,6 +4678,32 @@ SELECT b.* FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0
   1 |     
 (1 row)
 
+explain (costs off)
+SELECT * FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
+                QUERY PLAN                
+------------------------------------------
+ Hash Left Join
+   Hash Cond: (b.a_id = a.id)
+   Filter: ((a.id IS NULL) OR (a.id > 0))
+   ->  Seq Scan on b
+   ->  Hash
+         ->  Seq Scan on a
+(6 rows)
+
+explain (costs off)
+SELECT * FROM b RIGHT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
+                  QUERY PLAN                   
+-----------------------------------------------
+ Hash Right Join
+   Hash Cond: (b.a_id = a.id)
+   ->  Seq Scan on b
+   ->  Hash
+         ->  Bitmap Heap Scan on a
+               Recheck Cond: (id > 0)
+               ->  Bitmap Index Scan on a_pkey
+                     Index Cond: (id > 0)
+(8 rows)
+
 rollback;
 -- another join removal bug: this is not optimizable, either
 begin;
diff --git a/src/test/regress/expected/select.out b/src/test/regress/expected/select.out
index c441049f41..ef2df889bd 100644
--- a/src/test/regress/expected/select.out
+++ b/src/test/regress/expected/select.out
@@ -903,6 +903,37 @@ select unique1, unique2 from onek2
        0 |     998
 (2 rows)
 
+CREATE TEMP TABLE a (id int PRIMARY KEY, name text NOT NULL);
+explain (costs off)
+SELECT * FROM a WHERE id IS NULL;
+        QUERY PLAN        
+--------------------------
+ Result
+   One-Time Filter: false
+(2 rows)
+
+explain (costs off)
+SELECT * FROM a WHERE id IS NOT NULL;
+  QUERY PLAN   
+---------------
+ Seq Scan on a
+(1 row)
+
+explain (costs off)
+SELECT * FROM a WHERE name IS NOT NULL;
+  QUERY PLAN   
+---------------
+ Seq Scan on a
+(1 row)
+
+explain (costs off)
+SELECT * FROM a WHERE name IS NULL;
+        QUERY PLAN        
+--------------------------
+ Result
+   One-Time Filter: false
+(2 rows)
+
 --
 -- Test some corner cases that have been known to confuse the planner
 --
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 6a209a27aa..b2945bc0f8 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -1639,6 +1639,11 @@ INSERT INTO b VALUES (0, 0), (1, NULL);
 SELECT * FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
 SELECT b.* FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
 
+explain (costs off)
+SELECT * FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
+
+explain (costs off)
+SELECT * FROM b RIGHT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
 rollback;
 
 -- another join removal bug: this is not optimizable, either
diff --git a/src/test/regress/sql/select.sql b/src/test/regress/sql/select.sql
index b5929b2eca..13c777863e 100644
--- a/src/test/regress/sql/select.sql
+++ b/src/test/regress/sql/select.sql
@@ -234,6 +234,19 @@ select unique1, unique2 from onek2
 select unique1, unique2 from onek2
   where (unique2 = 11 and stringu1 < 'B') or unique1 = 0;
 
+CREATE TEMP TABLE a (id int PRIMARY KEY, name text NOT NULL);
+explain (costs off)
+SELECT * FROM a WHERE id IS NULL;
+
+explain (costs off)
+SELECT * FROM a WHERE id IS NOT NULL;
+
+explain (costs off)
+SELECT * FROM a WHERE name IS NOT NULL;
+
+explain (costs off)
+SELECT * FROM a WHERE name IS NULL;
+
 --
 -- Test some corner cases that have been known to confuse the planner
 --

#11

Surafel Temesgen

surafel3000@gmail.com

almost 5 years ago

In reply to: Ibrar Ahmed (#10)

Re: Evaluate expression at planning time for two more cases

Hi Ibrar,

On Mon, Mar 8, 2021 at 8:13 AM Ibrar Ahmed <ibrar.ahmad@gmail.com> wrote:

It was a minor change therefore I rebased the patch, please take a look.

It is perfect thank you

regards
Surafel

#12

David Rowley

dgrowleyml@gmail.com

over 4 years ago

In reply to: Tom Lane (#4)

Re: Evaluate expression at planning time for two more cases

On Tue, 8 Sept 2020 at 13:46, Tom Lane <tgl@sss.pgh.pa.us> wrote:

I've been doing some handwaving about changing the representation
of Vars, with an eye to making it clear by inspection whether a
given Var is nullable by some lower outer join [2]. If that work
ever comes to fruition then the need for "check_null_side" would
go away. So maybe we should put this idea on the back burner
until that happens.

I looked at this patch too. I agree that we should delay adding any
new smarts in regards to NULL or NOT NULL until we have some more
robust infrastructure to make this sort of patch easier and cheaper.

My vote is to just return this patch with feedback. Maybe Surafel
will be interested in pursuing this later when we have better
infrastructure or perhaps helping review the patch you're talking
about.

David

#13

David Rowley

dgrowleyml@gmail.com

over 4 years ago

In reply to: Ibrar Ahmed (#10)

Re: Evaluate expression at planning time for two more cases

On Tue, 9 Mar 2021 at 05:13, Ibrar Ahmed <ibrar.ahmad@gmail.com> wrote:

It was a minor change therefore I rebased the patch, please take a look.

I only had a quick look at the v3 patch.

+ rel = table_open(rte->relid, NoLock);
+ att = TupleDescAttr(rel->rd_att, var->varattno - 1);

+ if (att->attnotnull && !check_null_side(context->root, relid, context))

This is not really an acceptable way to determine the notnull
attribute value. Andy Fan proposes a much better way in [1]/messages/by-id/CAKU4AWpQjAqJwQ2X-aR9g3+ZHRzU1k8hNP7A+_mLuOv-n5aVKA@mail.gmail.com.
RelOptInfo is meant to cache the required Relation data that we need
during query planning. IIRC, Andy's patch correctly uses this and does
so in an efficient way.

In any case, as you can see there's a bit of other work going on to
smarten up the planner around NULL value detection. The UniqueKeys
patch requires this and various other things have come up that really
should be solved.

Surafel, I'd suggest we return this patch with feedback and maybe you
could instead help reviewing the other patches in regards to the NOT
NULL tracking and maybe come back to this once the dust has settled
and everyone is clear on how we determine if a column is NULL or not.

Let me know your thoughts.

David

[1]: /messages/by-id/CAKU4AWpQjAqJwQ2X-aR9g3+ZHRzU1k8hNP7A+_mLuOv-n5aVKA@mail.gmail.com

#14

Jaime Casanova

jcasanov@systemguards.com.ec

over 4 years ago

In reply to: David Rowley (#13)

Re: Evaluate expression at planning time for two more cases

On Tue, Jul 06, 2021 at 01:09:56PM +1200, David Rowley wrote:

On Tue, 9 Mar 2021 at 05:13, Ibrar Ahmed <ibrar.ahmad@gmail.com> wrote:

It was a minor change therefore I rebased the patch, please take a look.

[...]

This is not really an acceptable way to determine the notnull
attribute value. Andy Fan proposes a much better way in [1].
RelOptInfo is meant to cache the required Relation data that we need
during query planning. IIRC, Andy's patch correctly uses this and does
so in an efficient way.

In any case, as you can see there's a bit of other work going on to
smarten up the planner around NULL value detection. The UniqueKeys
patch requires this and various other things have come up that really
should be solved.

Surafel, I'd suggest we return this patch with feedback and maybe you
could instead help reviewing the other patches in regards to the NOT
NULL tracking and maybe come back to this once the dust has settled
and everyone is clear on how we determine if a column is NULL or not.

Let me know your thoughts.

Hi Surafel,

We haven't seen an answer from you on this.
I'm marking the patch as "Returned with feedback" as was suggested.

--
Jaime Casanova
Director de Servicios Profesionales
SystemGuards - Consultores de PostgreSQL