Replace FunctionCall2Coll with FunctionCallInvoke

Started by Andy Fanalmost 2 years ago3 messages
#1Andy Fan
zhihuifan1213@163.com
2 attachment(s)

Hello,

During the review of using extended statistics to improve join estimates
[1]: /messages/by-id/c8c0ff31-3a8a-7562-bbd3-78b2ec65f16c@enterprisedb.com
existing code as well. To make the discussion easier, open this new
thread.

I don't know how to name the thread name, just use the patch 1 for the
naming.

commit d228d9734e70b4f43ad824d736fb1279d2aad5fc (HEAD -> misc_stats)
Author: yizhi.fzh <yizhi.fzh@alibaba-inc.com>
Date: Mon Apr 8 11:43:37 2024 +0800

Fast path for empty clauselist in clauselist_selectivity_ext

It should be common in the real life, for example:

SELECT * FROM t1, t2 WHERE t1.a = t2.a AND t1.a > 3;

clauses == NIL at the scan level of t2.

This would also make some debug life happier.

commit e852ce631f9348d5d29c8a53090ee71f7253767c
Author: yizhi.fzh <yizhi.fzh@alibaba-inc.com>
Date: Mon Apr 8 11:13:57 2024 +0800

Improve FunctionCall2Coll with FunctionCallInvoke

If a FunctionCall2Coll is called multi times with the same FmgrInfo,
turning it into FunctionCallInvoke will be helpful since the later one
can use the shared FunctionCallInfo.

There are many other places like GITSTATE which have the similar
chances, but I'd see if such changes is necessary at the first
place.

[1]: /messages/by-id/c8c0ff31-3a8a-7562-bbd3-78b2ec65f16c@enterprisedb.com
/messages/by-id/c8c0ff31-3a8a-7562-bbd3-78b2ec65f16c@enterprisedb.com

--
Best Regards
Andy Fan

Attachments:

v1-0001-Improve-FunctionCall2Coll-with-FunctionCallInvoke.patchtext/x-diffDownload
From e852ce631f9348d5d29c8a53090ee71f7253767c Mon Sep 17 00:00:00 2001
From: "yizhi.fzh" <yizhi.fzh@alibaba-inc.com>
Date: Mon, 8 Apr 2024 11:13:57 +0800
Subject: [PATCH v1 1/2] Improve FunctionCall2Coll with FunctionCallInvoke

If a FunctionCall2Coll is called multi times with the same FmgrInfo,
turning it into FunctionCallInvoke will be helpful since the later one
can use the shared FunctionCallInfo.

There are many other places like GITSTATE which have the similar
chances, but I'd see if such changes is necessary at the first place.
---
 src/backend/utils/adt/selfuncs.c | 35 +++++++++++++++++---------------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 35f8f306ee..8dfeba5444 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -196,7 +196,7 @@ static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata,
 							   Oid sortop, Oid collation,
 							   Datum *min, Datum *max);
 static void get_stats_slot_range(AttStatsSlot *sslot,
-								 Oid opfuncoid, FmgrInfo *opproc,
+								 FunctionCallInfo fcinfo,
 								 Oid collation, int16 typLen, bool typByVal,
 								 Datum *min, Datum *max, bool *p_have_data);
 static bool get_actual_variable_range(PlannerInfo *root,
@@ -5905,6 +5905,7 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata,
 	bool		typByVal;
 	Oid			opfuncoid;
 	FmgrInfo	opproc;
+	LOCAL_FCINFO(fcinfo, 2);
 	AttStatsSlot sslot;
 
 	/*
@@ -5936,8 +5937,6 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata,
 									   (opfuncoid = get_opcode(sortop))))
 		return false;
 
-	opproc.fn_oid = InvalidOid; /* mark this as not looked up yet */
-
 	get_typlenbyval(vardata->atttype, &typLen, &typByVal);
 
 	/*
@@ -5957,6 +5956,11 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata,
 		free_attstatsslot(&sslot);
 	}
 
+	fmgr_info(opfuncoid, &opproc);
+	InitFunctionCallInfoData(*fcinfo, &opproc, 2, collation, NULL, NULL);
+	fcinfo->args[0].isnull = false;
+	fcinfo->args[1].isnull = false;
+
 	/*
 	 * Otherwise, if there is a histogram with some other ordering, scan it
 	 * and get the min and max values according to the ordering we want.  This
@@ -5968,7 +5972,7 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata,
 						 STATISTIC_KIND_HISTOGRAM, InvalidOid,
 						 ATTSTATSSLOT_VALUES))
 	{
-		get_stats_slot_range(&sslot, opfuncoid, &opproc,
+		get_stats_slot_range(&sslot, fcinfo,
 							 collation, typLen, typByVal,
 							 &tmin, &tmax, &have_data);
 		free_attstatsslot(&sslot);
@@ -6003,7 +6007,7 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata,
 		}
 
 		if (use_mcvs)
-			get_stats_slot_range(&sslot, opfuncoid, &opproc,
+			get_stats_slot_range(&sslot, fcinfo,
 								 collation, typLen, typByVal,
 								 &tmin, &tmax, &have_data);
 		free_attstatsslot(&sslot);
@@ -6021,7 +6025,7 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata,
  * to what we find in the statistics array.
  */
 static void
-get_stats_slot_range(AttStatsSlot *sslot, Oid opfuncoid, FmgrInfo *opproc,
+get_stats_slot_range(AttStatsSlot *sslot, FunctionCallInfo fcinfo,
 					 Oid collation, int16 typLen, bool typByVal,
 					 Datum *min, Datum *max, bool *p_have_data)
 {
@@ -6031,10 +6035,6 @@ get_stats_slot_range(AttStatsSlot *sslot, Oid opfuncoid, FmgrInfo *opproc,
 	bool		found_tmin = false;
 	bool		found_tmax = false;
 
-	/* Look up the comparison function, if we didn't already do so */
-	if (opproc->fn_oid != opfuncoid)
-		fmgr_info(opfuncoid, opproc);
-
 	/* Scan all the slot's values */
 	for (int i = 0; i < sslot->nvalues; i++)
 	{
@@ -6045,16 +6045,19 @@ get_stats_slot_range(AttStatsSlot *sslot, Oid opfuncoid, FmgrInfo *opproc,
 			*p_have_data = have_data = true;
 			continue;
 		}
-		if (DatumGetBool(FunctionCall2Coll(opproc,
-										   collation,
-										   sslot->values[i], tmin)))
+
+		fcinfo->args[0].value = sslot->values[i];
+		fcinfo->args[1].value = tmin;
+		if (DatumGetBool(FunctionCallInvoke(fcinfo)))
 		{
 			tmin = sslot->values[i];
 			found_tmin = true;
 		}
-		if (DatumGetBool(FunctionCall2Coll(opproc,
-										   collation,
-										   tmax, sslot->values[i])))
+
+		fcinfo->args[0].value = tmax;
+		fcinfo->args[1].value = sslot->values[i];
+
+		if (DatumGetBool(FunctionCallInvoke(fcinfo)))
 		{
 			tmax = sslot->values[i];
 			found_tmax = true;
-- 
2.34.1

v1-0002-Fast-path-for-empty-clauselist-in-clauselist_sele.patchtext/x-diffDownload
From d228d9734e70b4f43ad824d736fb1279d2aad5fc Mon Sep 17 00:00:00 2001
From: "yizhi.fzh" <yizhi.fzh@alibaba-inc.com>
Date: Mon, 8 Apr 2024 11:43:37 +0800
Subject: [PATCH v1 2/2] Fast path for empty clauselist in
 clauselist_selectivity_ext

It should be common in the real life, for example:

SELECT * FROM t1, t2 WHERE t1.a = t2.a AND t1.a > 3;

clauses == NIL at the scan level of t2.

This would also make some debug life happier.
---
 src/backend/optimizer/path/clausesel.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index 0ab021c1e8..dff498d581 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -128,6 +128,9 @@ clauselist_selectivity_ext(PlannerInfo *root,
 	ListCell   *l;
 	int			listidx;
 
+	if (list_length(clauses) == 0)
+		return 1.0;
+
 	/*
 	 * If there's exactly one clause, just go directly to
 	 * clause_selectivity_ext(). None of what we might do below is relevant.
-- 
2.34.1

#2Michael Paquier
michael@paquier.xyz
In reply to: Andy Fan (#1)
Re: Replace FunctionCall2Coll with FunctionCallInvoke

On Mon, Apr 08, 2024 at 12:25:00PM +0800, Andy Fan wrote:

During the review of using extended statistics to improve join estimates
[1], I found some code level optimization opportunities which apply to
existing code as well. To make the discussion easier, open this new
thread.

Is that measurable?
--
Michael

#3Andy Fan
zhihuifan1213@163.com
In reply to: Michael Paquier (#2)
Re: Replace FunctionCall2Coll with FunctionCallInvoke

Hello Michael,

[[PGP Signed Part:Undecided]]
On Mon, Apr 08, 2024 at 12:25:00PM +0800, Andy Fan wrote:

During the review of using extended statistics to improve join estimates
[1], I found some code level optimization opportunities which apply to
existing code as well. To make the discussion easier, open this new
thread.

Is that measurable?

I didn't spent time on testing it. Compared with if the improvement is
measureable, I'm more interested with if it is better than before or
not. As for measurable respect, I'm with the idea at [1]/messages/by-id/CA+TgmoZJ0a_Dcn+ST4YSeSrLnnmajmcsi7ZvEpgkKNiF0SwBuw@mail.gmail.com. Do you think
the measurable matter?

[1]: /messages/by-id/CA+TgmoZJ0a_Dcn+ST4YSeSrLnnmajmcsi7ZvEpgkKNiF0SwBuw@mail.gmail.com
/messages/by-id/CA+TgmoZJ0a_Dcn+ST4YSeSrLnnmajmcsi7ZvEpgkKNiF0SwBuw@mail.gmail.com

--
Best Regards
Andy Fan