commit f4b47feed8a8a2307121cfbecda7105930143f61
Author: Tom Lane <tgl@sss.pgh.pa.us>
Date:   Mon Jan 23 14:51:02 2023 -0500

    Don't use RestrictInfo.nullable_relids in join_clause_is_movable_to.
    
    Instead of using per-clause nullable_relids data, compute a
    per-baserel set of outer joins that can null each relation, and
    check for overlap between that and clause_relids to detect whether
    the clause can safely be pushed down to relation scan level.
    
    join_clause_is_movable_into also uses nullable_relids, but it
    turns out that that test can just be dropped entirely.  Now that
    clause_relids includes nulling outer joins, the preceding tests
    in the function are sufficient to reject clauses that can't be
    pushed down.
    
    This might seem like a net loss given that we have to add a bit
    of code to initsplan.c to compute RelOptInfo.nulling_relids.
    However, that's not much code at all, and the payoff is this:
    we no longer need RestrictInfo.nullable_relids at all.
    The next patch, which removes that field and the extensive
    infrastructure that maintains it, saves way more code and cycles
    than we add here.  Also, I think there are likely going to be
    other uses for RelOptInfo.nulling_relids.

diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index 7db8291549..c285806a81 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -94,6 +94,8 @@ static void deconstruct_distribute(PlannerInfo *root, JoinTreeItem *jtitem,
 static void process_security_barrier_quals(PlannerInfo *root,
 										   int rti, Relids qualscope,
 										   bool below_outer_join);
+static void mark_rels_nulled_by_join(PlannerInfo *root, Index ojrelid,
+									 Relids lower_rels);
 static SpecialJoinInfo *make_outerjoininfo(PlannerInfo *root,
 										   Relids left_rels, Relids right_rels,
 										   Relids inner_join_rels,
@@ -976,6 +978,8 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode,
 													   j->rtindex);
 					root->outer_join_rels = bms_add_member(root->outer_join_rels,
 														   j->rtindex);
+					mark_rels_nulled_by_join(root, j->rtindex,
+											 right_item->qualscope);
 				}
 				jtitem->inner_join_rels = bms_union(left_item->inner_join_rels,
 													right_item->inner_join_rels);
@@ -1031,6 +1035,10 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode,
 												   j->rtindex);
 				root->outer_join_rels = bms_add_member(root->outer_join_rels,
 													   j->rtindex);
+				mark_rels_nulled_by_join(root, j->rtindex,
+										 left_item->qualscope);
+				mark_rels_nulled_by_join(root, j->rtindex,
+										 right_item->qualscope);
 				jtitem->inner_join_rels = bms_union(left_item->inner_join_rels,
 													right_item->inner_join_rels);
 				jtitem->left_rels = left_item->qualscope;
@@ -1345,6 +1353,33 @@ process_security_barrier_quals(PlannerInfo *root,
 	Assert(security_level <= root->qual_security_level);
 }
 
+/*
+ * mark_rels_nulled_by_join
+ *	  Fill RelOptInfo.nulling_relids of baserels nulled by this outer join
+ *
+ * Inputs:
+ *	ojrelid: RT index of the join RTE (must not be 0)
+ *	lower_rels: the base+OJ Relids syntactically below nullable side of join
+ */
+static void
+mark_rels_nulled_by_join(PlannerInfo *root, Index ojrelid,
+						 Relids lower_rels)
+{
+	int			relid = -1;
+
+	while ((relid = bms_next_member(lower_rels, relid)) > 0)
+	{
+		RelOptInfo *rel = root->simple_rel_array[relid];
+
+		if (rel == NULL)		/* must be an outer join */
+		{
+			Assert(bms_is_member(relid, root->outer_join_rels));
+			continue;
+		}
+		rel->nulling_relids = bms_add_member(rel->nulling_relids, ojrelid);
+	}
+}
+
 /*
  * make_outerjoininfo
  *	  Build a SpecialJoinInfo for the current outer join
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index ebfb4ddd12..6e0e3a1a58 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -283,6 +283,12 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
 		rel->top_parent = parent->top_parent ? parent->top_parent : parent;
 		rel->top_parent_relids = rel->top_parent->relids;
 
+		/*
+		 * A child rel is below the same outer joins as its parent.  (We
+		 * presume this info was already calculated for the parent.)
+		 */
+		rel->nulling_relids = parent->nulling_relids;
+
 		/*
 		 * Also propagate lateral-reference information from appendrel parent
 		 * rels to their child rels.  We intentionally give each child rel the
@@ -306,6 +312,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
 		rel->parent = NULL;
 		rel->top_parent = NULL;
 		rel->top_parent_relids = NULL;
+		rel->nulling_relids = NULL;
 		rel->direct_lateral_relids = NULL;
 		rel->lateral_relids = NULL;
 		rel->lateral_referencers = NULL;
@@ -685,6 +692,7 @@ build_join_rel(PlannerInfo *root,
 	joinrel->max_attr = 0;
 	joinrel->attr_needed = NULL;
 	joinrel->attr_widths = NULL;
+	joinrel->nulling_relids = NULL;
 	joinrel->lateral_vars = NIL;
 	joinrel->lateral_referencers = NULL;
 	joinrel->indexlist = NIL;
@@ -874,6 +882,7 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
 	joinrel->max_attr = 0;
 	joinrel->attr_needed = NULL;
 	joinrel->attr_widths = NULL;
+	joinrel->nulling_relids = NULL;
 	joinrel->lateral_vars = NIL;
 	joinrel->lateral_referencers = NULL;
 	joinrel->indexlist = NIL;
diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c
index 1350f011a6..e3b43a7cf3 100644
--- a/src/backend/optimizer/util/restrictinfo.c
+++ b/src/backend/optimizer/util/restrictinfo.c
@@ -618,8 +618,17 @@ join_clause_is_movable_to(RestrictInfo *rinfo, RelOptInfo *baserel)
 	if (bms_is_member(baserel->relid, rinfo->outer_relids))
 		return false;
 
-	/* Target rel must not be nullable below the clause */
-	if (bms_is_member(baserel->relid, rinfo->nullable_relids))
+	/*
+	 * Target rel's Vars must not be nulled by any outer join.  We can check
+	 * this without groveling through the individual Vars by seeing whether
+	 * clause_relids (which includes all such Vars' varnullingrels) includes
+	 * any outer join that can null the target rel.  You might object that
+	 * this could reject the clause on the basis of an OJ relid that came from
+	 * some other rel's Var.  However, that would still mean that the clause
+	 * came from above that outer join and shouldn't be pushed down; so there
+	 * should be no false positives.
+	 */
+	if (bms_overlap(rinfo->clause_relids, baserel->nulling_relids))
 		return false;
 
 	/* Clause must not use any rels with LATERAL references to this rel */
@@ -651,16 +660,17 @@ join_clause_is_movable_to(RestrictInfo *rinfo, RelOptInfo *baserel)
  * relation plus the outer rels.  We also check that it does reference at
  * least one current Var, ensuring that the clause will be pushed down to
  * a unique place in a parameterized join tree.  And we check that we're
- * not pushing the clause into its outer-join outer side, nor down into
- * a lower outer join's inner side.
- *
- * The check about pushing a clause down into a lower outer join's inner side
- * is only approximate; it sometimes returns "false" when actually it would
- * be safe to use the clause here because we're still above the outer join
- * in question.  This is okay as long as the answers at different join levels
- * are consistent: it just means we might sometimes fail to push a clause as
- * far down as it could safely be pushed.  It's unclear whether it would be
- * worthwhile to do this more precisely.  (But if it's ever fixed to be
+ * not pushing the clause into its outer-join outer side.
+ *
+ * We used to need to check that we're not pushing the clause into a lower
+ * outer join's inner side.  However, now that clause_relids includes
+ * references to potentially-nulling outer joins, the other tests handle that
+ * concern.  If the clause references any Var coming from the inside of a
+ * lower outer join, its clause_relids will mention that outer join, causing
+ * the evaluability check to fail; while if it references no such Vars, the
+ * references-a-target-rel check will fail.
+ *
+ * XXX not clear if we can do this yet: (But if it's ever fixed to be
  * exactly accurate, there's an Assert in get_joinrel_parampathinfo() that
  * should be re-enabled.)
  *
@@ -704,14 +714,5 @@ join_clause_is_movable_into(RestrictInfo *rinfo,
 	if (bms_overlap(currentrelids, rinfo->outer_relids))
 		return false;
 
-	/*
-	 * Target rel(s) must not be nullable below the clause.  This is
-	 * approximate, in the safe direction, because the current join might be
-	 * above the join where the nulling would happen, in which case the clause
-	 * would work correctly here.  But we don't have enough info to be sure.
-	 */
-	if (bms_overlap(currentrelids, rinfo->nullable_relids))
-		return false;
-
 	return true;
 }
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 630a044089..cbe99ca397 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -694,6 +694,7 @@ typedef struct PartitionSchemeData *PartitionScheme;
  *				the attribute is needed as part of final targetlist
  *		attr_widths - cache space for per-attribute width estimates;
  *					  zero means not computed yet
+ *		nulling_relids - relids of outer joins that can null this rel
  *		lateral_vars - lateral cross-references of rel, if any (list of
  *					   Vars and PlaceHolderVars)
  *		lateral_referencers - relids of rels that reference this one laterally
@@ -927,6 +928,8 @@ typedef struct RelOptInfo
 	Relids	   *attr_needed pg_node_attr(read_write_ignore);
 	/* array indexed [min_attr .. max_attr] */
 	int32	   *attr_widths pg_node_attr(read_write_ignore);
+	/* relids of outer joins that can null this baserel */
+	Relids		nulling_relids;
 	/* LATERAL Vars and PHVs referenced by rel */
 	List	   *lateral_vars;
 	/* rels that reference this baserel laterally */
