Convert coalesce to or/and

Started by Nicolas Adenis-Lamarre22 days ago4 messages
#1Nicolas Adenis-Lamarre
nicolas.adenis.lamarre@gmail.com

I noticed that in the code coalesce is optimized by removing values
after non null variables.
Before seeing that, i would think that functions would not have been
optimized because in a sens, it hardcodes its behavior (i guess
coalesce could be overwritten).

So my question is :
would it be accepted to do a patch to replace coalesce by and/or,
mainly to fix related estimations.
This is an a mistake i fix from times to times on developments.
It could be restricted to coalesce containing only simple columns variables.

example:

explain analyze
select *
from people p
where coalesce(firstname, lastname) = 'Louis'
-- Seq Scan on people p (rows=732) (actual rows=3856.00 loops=1)
-- always computed to 732 for any value

explain analyze
select *
from people p
where firstname = 'Louis' or (firstname is null and lastname = 'Louis')
-- Seq Scan on people p (rows=3862) (actual rows=3856.00 loops=1)

#2Tom Lane
tgl@sss.pgh.pa.us
In reply to: Nicolas Adenis-Lamarre (#1)
Re: Convert coalesce to or/and

Nicolas Adenis-Lamarre <nicolas.adenis.lamarre@gmail.com> writes:

would it be accepted to do a patch to replace coalesce by and/or,
mainly to fix related estimations.

Almost certainly not. It'd be very hard to do that while preserving
the expected semantics of COALESCE: no argument is to be evaluated
more than once, and people sometimes expect strict left-to-right
evaluation. I've even seen it used as an intentional optimization
fence.

If you think you can improve the estimation around it, I'd suggest
tackling that directly.

regards, tom lane

#3Nicolas Adenis-Lamarre
nicolas.adenis.lamarre@gmail.com
In reply to: Tom Lane (#2)
1 attachment(s)
Re: Convert coalesce to or/and

I attached a patch proposition.

explain analyze
select *
from people p
where coalesce(firstname, lastname) = 'Louis'
-- before: Seq Scan on people p (cost=0.00..4015.04 rows=732
width=321) (actual time=0.019..11.217 rows=3856.00 loops=1)
-- after: Seq Scan on people p (cost=0.00..4015.04 rows=3872
width=177) (actual time=0.026..13.730 rows=3856.00 loops=1)

explain analyze
select *
from people p
where firstname = 'Louis' or (firstname is null and lastname = 'Louis')
-- Seq Scan on people p (cost=0.00..4381.24 rows=3872 width=177)
(actual time=0.016..14.899 rows=3856.00 loops=1)

Nicolas

Attachments:

v1-0001-Estimate-coalesce-returned-rows-as-done-for-or-claus.patchtext/x-patch; charset=US-ASCII; name=v1-0001-Estimate-coalesce-returned-rows-as-done-for-or-claus.patchDownload
From 1a64554b5498d6d573293388dfc99ca25f01f1a1 Mon Sep 17 00:00:00 2001
From: Nicolas Adenis-Lamarre <nicolas.adenis.lamarre.pro@gmail.com>
Date: Sat, 3 Jan 2026 16:18:31 +0100
Subject: [PATCH] Estimate coalesce returned rows as done for or clauses

In order to get more precise estimations on coalesce function calls,
at the selectivity compute, replace the node by the equivalent using
or expressions.
This estimation is limited to expression of the form :
COALESCE(...) = VAR
or
COALESCE(...) = CONST
---
 src/backend/optimizer/path/clausesel.c | 169 +++++++++++++++++++++++++
 1 file changed, 169 insertions(+)

diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index 25c4d177ad9..2cfa6c0edf3 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -23,6 +23,7 @@
 #include "utils/fmgroids.h"
 #include "utils/lsyscache.h"
 #include "utils/selfuncs.h"
+#include "nodes/makefuncs.h"
 
 /*
  * Data structure for accumulating info about possible range-query
@@ -49,6 +50,16 @@ static Selectivity clauselist_selectivity_or(PlannerInfo *root,
 											 SpecialJoinInfo *sjinfo,
 											 bool use_extended_stats);
 
+static Selectivity clauselist_selectivity_coalesce_op_expr(PlannerInfo *root,
+														   CoalesceExpr* cexpr,
+														   Expr* expr,
+														   Oid opno,
+														   Oid inputcollid,
+														   int varRelid,
+														   JoinType jointype,
+														   SpecialJoinInfo *sjinfo,
+														   bool use_extended_stats);
+
 /****************************************************************************
  *		ROUTINES TO COMPUTE SELECTIVITIES
  ****************************************************************************/
@@ -418,6 +429,72 @@ clauselist_selectivity_or(PlannerInfo *root,
 	return s1;
 }
 
+/*
+ * clauselist_selectivity_coalesce_op_expr -
+      Compute selectivity for expression having form:
+         COALESCE(...) OP VAR
+         COALESCE(...) OP CONST
+      by using clauselist_selectivity_or instead of the generic function selectivity.
+      To archieve that, it converts the coalesce clause into a list of or clauses.
+ */
+static Selectivity
+clauselist_selectivity_coalesce_op_expr(PlannerInfo *root,
+										CoalesceExpr* cexpr,
+										Expr* expr,
+										Oid opno,
+										Oid inputcollid,
+										int varRelid,
+										JoinType jointype,
+										SpecialJoinInfo *sjinfo,
+										bool use_extended_stats)
+{
+	ListCell     *lc;
+	List         *or_clauses = NIL;
+	List         *previous_null_clauses = NIL;
+
+	/* build new clauses for each coalesce element
+	   foreach each element, check that the previous one are NULL
+       the is null checks are copied and reused for the next loop.
+    */
+	foreach(lc, cexpr->args)
+	{
+		Expr *e = (Expr *) lfirst(lc);
+		NullTest *ntest;
+
+		/* build e OP expr */
+		Node *opexpr = (Node *) make_opclause(opno, BOOLOID, false, e, expr, InvalidOid, inputcollid);
+		if(previous_null_clauses == NIL) {
+			or_clauses = lappend(or_clauses, opexpr);
+		} else {
+			Expr *andexpr;
+
+			/*
+			  do a copy because the previous_null_clauses evolves during the loop on coalesce expressions
+			 */
+			List *and_clauses = copyObject(previous_null_clauses);
+			and_clauses = lappend(and_clauses, opexpr);
+			andexpr = makeBoolExpr(AND_EXPR, and_clauses, -1);
+			or_clauses = lappend(or_clauses, andexpr);
+		}
+
+		/* e IS NULL check */
+		ntest = makeNode(NullTest);
+		ntest->arg = e;
+		ntest->nulltesttype = IS_NULL;
+		ntest->argisrow = false;
+		ntest->location = -1;
+		
+		previous_null_clauses = lappend(previous_null_clauses, ntest);
+	}
+
+	return clauselist_selectivity_or(root,
+									 or_clauses,
+									 varRelid,
+									 jointype,
+									 sjinfo,
+									 use_extended_stats);
+}
+
 /*
  * addRangeClause --- add a new range clause for clauselist_selectivity
  *
@@ -624,6 +701,72 @@ treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo,
 	}
 }
 
+/*
+  Identify if a clause has a coalesce part to be eligible to a specific coalesce selectivity computation
+*/
+static inline CoalesceExpr* get_coalesce_op_expr_clause_coalesce_part_recurse(Node* clause) {
+	if(IsA(clause, CoalesceExpr))     return ((CoalesceExpr*) clause);
+	else if(IsA(clause, RelabelType)) return get_coalesce_op_expr_clause_coalesce_part_recurse((Node*) ((RelabelType*)clause)->arg);
+	return NULL;
+}
+
+static inline CoalesceExpr* get_coalesce_op_expr_clause_coalesce_part(Node* clause) {
+	Expr	   *leftOp;
+	Expr	   *rightOp;
+
+	if(!is_opclause(clause)) return false;
+	leftOp  = (Expr *) get_leftop(clause);
+	rightOp = (Expr *) get_rightop(clause);
+
+	if(leftOp != NULL) {
+		CoalesceExpr* cexpr = get_coalesce_op_expr_clause_coalesce_part_recurse((Node*)leftOp);
+		if(cexpr != NULL) return cexpr;
+	}
+
+	if(rightOp != NULL) {
+		CoalesceExpr* cexpr = get_coalesce_op_expr_clause_coalesce_part_recurse((Node*)rightOp);
+		if(cexpr != NULL) return cexpr;
+	}
+
+	return NULL;
+}
+
+/*
+ * Identify if a clause has a simple expression part to be eligible to a specific coalesce selectivity computation
+ * actually, only var and const are considered
+ * while it seems where the benefic seems obvious
+*/
+static inline Expr* get_coalesce_op_expr_clause_expression_part_recurse(Node* clause) {
+	if(IsA(clause, Const)) return ((Expr*)clause);
+	else if(IsA(clause, Var)) return ((Expr*)clause);
+	else if(IsA(clause, RelabelType)) return get_coalesce_op_expr_clause_expression_part_recurse((Node*)((RelabelType*)clause)->arg);
+	return NULL;
+}
+
+/*
+ *  - get_coalesce_op_expr_clause_expression_part
+ *	  Detect nodes of the form coalesce(...) = <simple expression (var/constant)>
+ */
+static inline Expr* get_coalesce_op_expr_clause_expression_part(Node* clause) {
+	Expr	   *leftOp;
+	Expr	   *rightOp;
+
+	if(!is_opclause(clause)) return false;
+	leftOp  = (Expr *) get_leftop(clause);
+	rightOp = (Expr *) get_rightop(clause);
+
+	if(leftOp != NULL) {
+		Expr* expr = get_coalesce_op_expr_clause_expression_part_recurse((Node*)leftOp);
+		if(expr != NULL) return expr;
+	}
+
+	if(rightOp != NULL) {
+		Expr* expr = get_coalesce_op_expr_clause_expression_part_recurse((Node*)rightOp);
+		if(expr != NULL) return expr;
+	}
+
+	return NULL;
+}
 
 /*
  * clause_selectivity -
@@ -833,6 +976,18 @@ clause_selectivity_ext(PlannerInfo *root,
 		OpExpr	   *opclause = (OpExpr *) clause;
 		Oid			opno = opclause->opno;
 
+		/*
+		 * if the opclause is composed of:
+		 * - a coalesce side
+		 * - a simple expression
+		 * a specific estimate is done for the coalesce function
+		 */
+		CoalesceExpr *coalesce_cexpr = get_coalesce_op_expr_clause_coalesce_part(clause);
+		Expr *coalesce_expr = NULL;
+		if(coalesce_cexpr != NULL) {
+			coalesce_expr = get_coalesce_op_expr_clause_expression_part(clause);
+		}
+
 		if (treat_as_join_clause(root, clause, rinfo, varRelid, sjinfo))
 		{
 			/* Estimate selectivity for a join clause. */
@@ -842,6 +997,20 @@ clause_selectivity_ext(PlannerInfo *root,
 								  jointype,
 								  sjinfo);
 		}
+		else if (coalesce_cexpr != NULL && coalesce_expr != NULL) {
+			/* 
+			 * Almost the same thing as is_orclause
+			 */
+			s1 = clauselist_selectivity_coalesce_op_expr(root,
+														 coalesce_cexpr,
+														 coalesce_expr,
+														 ((OpExpr *) clause)->opno,
+														 ((OpExpr *) clause)->inputcollid,
+														 varRelid,
+														 jointype,
+														 sjinfo,
+														 use_extended_stats);
+		}
 		else
 		{
 			/* Estimate selectivity for a restriction clause. */
-- 
2.34.1

#4Nicolas Adenis-Lamarre
nicolas.adenis.lamarre@gmail.com
In reply to: Nicolas Adenis-Lamarre (#3)
1 attachment(s)
Re: Convert coalesce to or/and

I did a version 2
while it think we should avoid it if the coalesce is not composed of
simple elements.

Nicolas

Attachments:

v2-0001-Estimate-coalesce-returned-rows-as-done-for-or-claus.patchtext/x-patch; charset=US-ASCII; name=v2-0001-Estimate-coalesce-returned-rows-as-done-for-or-claus.patchDownload
From b0f550eb6e6c6046fe5a6b36910a03f18cde91f5 Mon Sep 17 00:00:00 2001
From: Nicolas Adenis-Lamarre <nicolas.adenis.lamarre.pro@gmail.com>
Date: Sat, 3 Jan 2026 16:18:31 +0100
Subject: [PATCH] Estimate coalesce returned rows as done for or clauses

In order to get more precise estimations on coalesce function calls,
at the selectivity compute, replace the node by the equivalent using
or expressions.
This estimation is limited to expression of the form :
COALESCE(...) = VAR
or
COALESCE(...) = CONST
---
 src/backend/optimizer/path/clausesel.c | 182 +++++++++++++++++++++++++
 1 file changed, 182 insertions(+)

diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index 25c4d177ad9..2a67857c3f6 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -23,6 +23,7 @@
 #include "utils/fmgroids.h"
 #include "utils/lsyscache.h"
 #include "utils/selfuncs.h"
+#include "nodes/makefuncs.h"
 
 /*
  * Data structure for accumulating info about possible range-query
@@ -49,6 +50,16 @@ static Selectivity clauselist_selectivity_or(PlannerInfo *root,
 											 SpecialJoinInfo *sjinfo,
 											 bool use_extended_stats);
 
+static Selectivity clauselist_selectivity_coalesce_op_expr(PlannerInfo *root,
+														   CoalesceExpr* cexpr,
+														   Expr* expr,
+														   Oid opno,
+														   Oid inputcollid,
+														   int varRelid,
+														   JoinType jointype,
+														   SpecialJoinInfo *sjinfo,
+														   bool use_extended_stats);
+
 /****************************************************************************
  *		ROUTINES TO COMPUTE SELECTIVITIES
  ****************************************************************************/
@@ -418,6 +429,73 @@ clauselist_selectivity_or(PlannerInfo *root,
 	return s1;
 }
 
+/*
+ * clauselist_selectivity_coalesce_op_expr -
+ *    Compute selectivity for expression having form:
+ *       COALESCE(...) OP VAR
+ *       COALESCE(...) OP CONST
+ *    by using clauselist_selectivity_or instead of the generic function selectivity.
+ *    To archieve that, it converts the coalesce clause into a list of 'or' clauses.
+ */
+static Selectivity
+clauselist_selectivity_coalesce_op_expr(PlannerInfo *root,
+										CoalesceExpr* cexpr,
+										Expr* expr,
+										Oid opno,
+										Oid inputcollid,
+										int varRelid,
+										JoinType jointype,
+										SpecialJoinInfo *sjinfo,
+										bool use_extended_stats)
+{
+    ListCell     *lc;
+    List         *or_clauses = NIL;
+    List         *previous_null_clauses = NIL;
+
+    /*
+     * build new clauses for each coalesce element
+     * foreach each element, check that the previous ones are NULL
+     * the is null checks are copied and reused for the next loop.
+     */
+    foreach(lc, cexpr->args)
+    {
+		Expr *e = (Expr *) lfirst(lc);
+		NullTest *ntest;
+
+		/* build e OP expr */
+		Node *opexpr = (Node *) make_opclause(opno, BOOLOID, false, e, expr, InvalidOid, inputcollid);
+		if(previous_null_clauses == NIL) {
+			or_clauses = lappend(or_clauses, opexpr);
+		} else {
+			Expr *andexpr;
+
+			/*
+			  do a copy because the previous_null_clauses evolves during the loop on coalesce expressions
+			 */
+			List *and_clauses = copyObject(previous_null_clauses);
+			and_clauses = lappend(and_clauses, opexpr);
+			andexpr = makeBoolExpr(AND_EXPR, and_clauses, -1);
+			or_clauses = lappend(or_clauses, andexpr);
+		}
+
+		/* e IS NULL check */
+		ntest = makeNode(NullTest);
+		ntest->arg = e;
+		ntest->nulltesttype = IS_NULL;
+		ntest->argisrow = false;
+		ntest->location = -1;
+		
+		previous_null_clauses = lappend(previous_null_clauses, ntest);
+    }
+
+    return clauselist_selectivity_or(root,
+                                     or_clauses,
+                                     varRelid,
+                                     jointype,
+                                     sjinfo,
+                                     use_extended_stats);
+}
+
 /*
  * addRangeClause --- add a new range clause for clauselist_selectivity
  *
@@ -624,6 +702,84 @@ treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo,
 	}
 }
 
+/*
+  Identify if a clause has a coalesce part to be eligible to a specific coalesce selectivity computation
+*/
+static inline CoalesceExpr* get_coalesce_op_expr_clause_coalesce_part_recurse(Node* clause) {
+	if(IsA(clause, CoalesceExpr))     return ((CoalesceExpr*) clause);
+	else if(IsA(clause, RelabelType)) return get_coalesce_op_expr_clause_coalesce_part_recurse((Node*) ((RelabelType*)clause)->arg);
+	return NULL;
+}
+
+/*
+ * Identify if a clause has a simple expression part to be eligible to a specific coalesce selectivity computation
+ * actually, only var and const are considered
+ * while it seems where the benefic seems obvious
+*/
+static inline Expr* get_coalesce_op_expr_clause_expression_part_recurse(Node* clause) {
+	if(IsA(clause, Const)) return ((Expr*)clause);
+	else if(IsA(clause, Var)) return ((Expr*)clause);
+	else if(IsA(clause, RelabelType)) return get_coalesce_op_expr_clause_expression_part_recurse((Node*)((RelabelType*)clause)->arg);
+	return NULL;
+}
+
+static inline CoalesceExpr* get_coalesce_op_expr_clause_coalesce_part(Node* clause) {
+	Expr	   *leftOp;
+	Expr	   *rightOp;
+	CoalesceExpr* cexpr = NULL;
+
+	if(!is_opclause(clause)) return NULL;
+	leftOp  = (Expr *) get_leftop(clause);
+	rightOp = (Expr *) get_rightop(clause);
+
+	if(leftOp != NULL) {
+		cexpr = get_coalesce_op_expr_clause_coalesce_part_recurse((Node*)leftOp);
+	} else if(rightOp != NULL) {
+		cexpr = get_coalesce_op_expr_clause_coalesce_part_recurse((Node*)rightOp);
+	}
+
+	/*
+	 * Check that all the coalesce elements are simple element
+	 */
+	if(cexpr != NULL) {
+		ListCell *lc;
+
+		foreach(lc, cexpr->args)
+		{
+			Expr *e = (Expr *) lfirst(lc);
+			if(get_coalesce_op_expr_clause_expression_part_recurse((Node*)e) == NULL) {
+				return NULL;
+			}
+		}
+	}
+
+	return cexpr;
+}
+
+/*
+ *  - get_coalesce_op_expr_clause_expression_part
+ *	  Detect nodes of the form coalesce(...) = <simple expression (var/constant)>
+ */
+static inline Expr* get_coalesce_op_expr_clause_expression_part(Node* clause) {
+	Expr	   *leftOp;
+	Expr	   *rightOp;
+
+	if(!is_opclause(clause)) return false;
+	leftOp  = (Expr *) get_leftop(clause);
+	rightOp = (Expr *) get_rightop(clause);
+
+	if(leftOp != NULL) {
+		Expr* expr = get_coalesce_op_expr_clause_expression_part_recurse((Node*)leftOp);
+		if(expr != NULL) return expr;
+	}
+
+	if(rightOp != NULL) {
+		Expr* expr = get_coalesce_op_expr_clause_expression_part_recurse((Node*)rightOp);
+		if(expr != NULL) return expr;
+	}
+
+	return NULL;
+}
 
 /*
  * clause_selectivity -
@@ -833,6 +989,18 @@ clause_selectivity_ext(PlannerInfo *root,
 		OpExpr	   *opclause = (OpExpr *) clause;
 		Oid			opno = opclause->opno;
 
+		/*
+		 * if the opclause is composed of:
+		 * - a coalesce side composed of simple expressions
+		 * - a simple expression
+		 * a specific estimate is done for the coalesce function
+		 */
+		CoalesceExpr *coalesce_cexpr = get_coalesce_op_expr_clause_coalesce_part(clause);
+		Expr *coalesce_expr = NULL;
+		if(coalesce_cexpr != NULL) {
+			coalesce_expr = get_coalesce_op_expr_clause_expression_part(clause);
+		}
+
 		if (treat_as_join_clause(root, clause, rinfo, varRelid, sjinfo))
 		{
 			/* Estimate selectivity for a join clause. */
@@ -842,6 +1010,20 @@ clause_selectivity_ext(PlannerInfo *root,
 								  jointype,
 								  sjinfo);
 		}
+		else if (coalesce_cexpr != NULL && coalesce_expr != NULL) {
+			/* 
+			 * Almost the same thing as is_orclause
+			 */
+			s1 = clauselist_selectivity_coalesce_op_expr(root,
+														 coalesce_cexpr,
+														 coalesce_expr,
+														 ((OpExpr *) clause)->opno,
+														 ((OpExpr *) clause)->inputcollid,
+														 varRelid,
+														 jointype,
+														 sjinfo,
+														 use_extended_stats);
+		}
 		else
 		{
 			/* Estimate selectivity for a restriction clause. */
-- 
2.34.1