From 68af739040a57f35de1e0084067b76218befcd22 Mon Sep 17 00:00:00 2001
From: Marina Polyakova <m.polyakova@postgrespro.ru>
Date: Mon, 17 Jul 2017 13:08:36 +0300
Subject: [PATCH v5 3/3] Precalculate stable functions, costs

Now in Postgresql only immutable functions are precalculated; stable functions
are calculated for every row so in fact they don't differ from volatile
functions.

This patch includes:
- cost changes for cached expressions (according to their behaviour)
---
 src/backend/optimizer/path/costsize.c | 194 +++++++++++++++++++++++-----------
 1 file changed, 134 insertions(+), 60 deletions(-)

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index eb653cf..0ce57d5 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -140,6 +140,7 @@ static MergeScanSelCache *cached_scansel(PlannerInfo *root,
 			   PathKey *pathkey);
 static void cost_rescan(PlannerInfo *root, Path *path,
 			Cost *rescan_startup_cost, Cost *rescan_total_cost);
+static double cost_eval_cacheable_expr_per_tuple(CacheableExpr *node);
 static bool cost_qual_eval_walker(Node *node, cost_qual_eval_context *context);
 static void get_restriction_qual_cost(PlannerInfo *root, RelOptInfo *baserel,
 						  ParamPathInfo *param_info,
@@ -3474,6 +3475,123 @@ cost_qual_eval_node(QualCost *cost, Node *qual, PlannerInfo *root)
 	*cost = context.total;
 }
 
+/*
+ * cost_eval_cacheable_expr_per_tuple
+ *		Evaluate per tuple cost for expressions that can be cacheable.
+ *
+ * This function was created to not duplicate code for some expression and
+ * cached some expression.
+ */
+static double
+cost_eval_cacheable_expr_per_tuple(CacheableExpr *node)
+{
+	double		result;
+
+	/*
+	 * For each operator or function node in the given tree, we charge the
+	 * estimated execution cost given by pg_proc.procost (remember to multiply
+	 * this by cpu_operator_cost).
+	 *
+	 * Vars and Consts are charged zero, and so are boolean operators (AND,
+	 * OR, NOT). Simplistic, but a lot better than no model at all.
+	 */
+	if (IsA(node, ArrayRef) ||
+		IsA(node, BoolExpr) ||
+		IsA(node, FieldSelect) ||
+		IsA(node, RelabelType) ||
+		IsA(node, ConvertRowtypeExpr) ||
+		IsA(node, CaseExpr) ||
+		IsA(node, CaseTestExpr) ||
+		IsA(node, ArrayExpr) ||
+		IsA(node, RowExpr) ||
+		IsA(node, CoalesceExpr) ||
+		IsA(node, MinMaxExpr) ||
+		IsA(node, SQLValueFunction) ||
+		IsA(node, XmlExpr) ||
+		IsA(node, NullTest) ||
+		IsA(node, BooleanTest) ||
+		IsA(node, CoerceToDomain) ||
+		IsA(node, CoerceToDomainValue))
+	{
+		result = 0;
+	}
+	else if (IsA(node, FuncExpr))
+	{
+		result = get_func_cost(((FuncExpr *) node)->funcid) * cpu_operator_cost;
+	}
+	else if (IsA(node, OpExpr) ||
+			 IsA(node, DistinctExpr) ||
+			 IsA(node, NullIfExpr))
+	{
+		OpExpr	   *opexpr = (OpExpr *) node;
+
+		/* rely on struct equivalence to treat these all alike */
+		set_opfuncid(opexpr);
+
+		result = get_func_cost(opexpr->opfuncid) * cpu_operator_cost;
+	}
+	else if (IsA(node, ScalarArrayOpExpr))
+	{
+		/*
+		 * Estimate that the operator will be applied to about half of the
+		 * array elements before the answer is determined.
+		 */
+		ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) node;
+		Node	   *arraynode = (Node *) lsecond(saop->args);
+
+		set_sa_opfuncid(saop);
+		result = get_func_cost(saop->opfuncid) * cpu_operator_cost *
+			estimate_array_length(arraynode) * 0.5;
+	}
+	else if (IsA(node, CoerceViaIO))
+	{
+		CoerceViaIO *iocoerce = (CoerceViaIO *) node;
+		Oid			iofunc;
+		Oid			typioparam;
+		bool		typisvarlena;
+
+		/* check the result type's input function */
+		getTypeInputInfo(iocoerce->resulttype,
+						 &iofunc, &typioparam);
+		result = get_func_cost(iofunc) * cpu_operator_cost;
+		/* check the input type's output function */
+		getTypeOutputInfo(exprType((Node *) iocoerce->arg),
+						  &iofunc, &typisvarlena);
+		result += get_func_cost(iofunc) * cpu_operator_cost;
+	}
+	else if (IsA(node, ArrayCoerceExpr))
+	{
+		ArrayCoerceExpr *acoerce = (ArrayCoerceExpr *) node;
+		Node	   *arraynode = (Node *) acoerce->arg;
+
+		if (OidIsValid(acoerce->elemfuncid))
+			result = get_func_cost(acoerce->elemfuncid) * cpu_operator_cost *
+				estimate_array_length(arraynode);
+		else
+			result = 0;
+	}
+	else if (IsA(node, RowCompareExpr))
+	{
+		/* Conservatively assume we will check all the columns */
+		RowCompareExpr *rcexpr = (RowCompareExpr *) node;
+		ListCell   *lc;
+
+		result = 0;
+		foreach(lc, rcexpr->opnos)
+		{
+			Oid			opid = lfirst_oid(lc);
+
+			result += get_func_cost(get_opcode(opid)) * cpu_operator_cost;
+		}
+	}
+	else
+	{
+		elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node));
+	}
+
+	return result;
+}
+
 static bool
 cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
 {
@@ -3547,32 +3665,27 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
 	 * moreover, since our rowcount estimates for functions tend to be pretty
 	 * phony, the results would also be pretty phony.
 	 */
-	if (IsA(node, FuncExpr))
+	if (IsA(node, FuncExpr) ||
+		IsA(node, OpExpr) ||
+		IsA(node, DistinctExpr) ||
+		IsA(node, NullIfExpr) ||
+		IsA(node, ScalarArrayOpExpr) ||
+		IsA(node, CoerceViaIO) ||
+		IsA(node, ArrayCoerceExpr) ||
+		IsA(node, RowCompareExpr))
 	{
-		context->total.per_tuple +=
-			get_func_cost(((FuncExpr *) node)->funcid) * cpu_operator_cost;
+		context->total.per_tuple += cost_eval_cacheable_expr_per_tuple(
+			(CacheableExpr *) node);
 	}
-	else if (IsA(node, OpExpr) ||
-			 IsA(node, DistinctExpr) ||
-			 IsA(node, NullIfExpr))
-	{
-		/* rely on struct equivalence to treat these all alike */
-		set_opfuncid((OpExpr *) node);
-		context->total.per_tuple +=
-			get_func_cost(((OpExpr *) node)->opfuncid) * cpu_operator_cost;
-	}
-	else if (IsA(node, ScalarArrayOpExpr))
+	else if (IsA(node, CachedExpr))
 	{
 		/*
-		 * Estimate that the operator will be applied to about half of the
-		 * array elements before the answer is determined.
+		 * Calculate subexpression cost per tuple as usual and add it to startup
+		 * cost (because subexpression will be executed only once for all
+		 * tuples).
 		 */
-		ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) node;
-		Node	   *arraynode = (Node *) lsecond(saop->args);
-
-		set_sa_opfuncid(saop);
-		context->total.per_tuple += get_func_cost(saop->opfuncid) *
-			cpu_operator_cost * estimate_array_length(arraynode) * 0.5;
+		context->total.startup += cost_eval_cacheable_expr_per_tuple(
+			((CachedExpr *) node)->subexpr);
 	}
 	else if (IsA(node, Aggref) ||
 			 IsA(node, WindowFunc))
@@ -3588,45 +3701,6 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
 		 */
 		return false;			/* don't recurse into children */
 	}
-	else if (IsA(node, CoerceViaIO))
-	{
-		CoerceViaIO *iocoerce = (CoerceViaIO *) node;
-		Oid			iofunc;
-		Oid			typioparam;
-		bool		typisvarlena;
-
-		/* check the result type's input function */
-		getTypeInputInfo(iocoerce->resulttype,
-						 &iofunc, &typioparam);
-		context->total.per_tuple += get_func_cost(iofunc) * cpu_operator_cost;
-		/* check the input type's output function */
-		getTypeOutputInfo(exprType((Node *) iocoerce->arg),
-						  &iofunc, &typisvarlena);
-		context->total.per_tuple += get_func_cost(iofunc) * cpu_operator_cost;
-	}
-	else if (IsA(node, ArrayCoerceExpr))
-	{
-		ArrayCoerceExpr *acoerce = (ArrayCoerceExpr *) node;
-		Node	   *arraynode = (Node *) acoerce->arg;
-
-		if (OidIsValid(acoerce->elemfuncid))
-			context->total.per_tuple += get_func_cost(acoerce->elemfuncid) *
-				cpu_operator_cost * estimate_array_length(arraynode);
-	}
-	else if (IsA(node, RowCompareExpr))
-	{
-		/* Conservatively assume we will check all the columns */
-		RowCompareExpr *rcexpr = (RowCompareExpr *) node;
-		ListCell   *lc;
-
-		foreach(lc, rcexpr->opnos)
-		{
-			Oid			opid = lfirst_oid(lc);
-
-			context->total.per_tuple += get_func_cost(get_opcode(opid)) *
-				cpu_operator_cost;
-		}
-	}
 	else if (IsA(node, CurrentOfExpr))
 	{
 		/* Report high cost to prevent selection of anything but TID scan */
-- 
1.9.1