From 9790421e1c81114afd0902dc4f384f07de4ef7f5 Mon Sep 17 00:00:00 2001
From: Ashutosh Bapat <ashutosh.bapat.oss@gmail.com>
Date: Thu, 26 Sep 2024 12:24:15 +0530
Subject: [PATCH 3/6] Use RestrictInfo hash table for storing EC derived child
 RestrictInfos

RestrictInfos derived from an EquivalenceClass, including the child
RestrictInfos, are stored in its EquivalenceClass::ec_derives which
is a linked list. The linked list is scanned every time
create_join_clause() is called, which in turn is called multiple times
for a given partition. The loop thus contributes O(N^2) at run time
where N is number of partitions. When there are thousands of
partition, the loops contributes significantly to the planning time.

In order to reduce the time complexity of searching a clause, store the
child clauses in a hash table, so that contribution of this loop in
overall planning time reduces to O(N). Please note that the parent
derived clauses are still stored in EquivalenceClass::ec_derives.

The hash table approach differs from the linked list based approach in
the following aspects:

1. The linked lists are per EquivalenceClass whereas we use a single
   hash table for all the child clauses. A hash table consumes more
   memory compared to the linked list, thus having one hash table per
   EquivalenceClass causes more memory to be consumed. But hash tables
   can efficiently handle thousands of entries hence one hash table to
   store all the child derived clauses from all the EquivalenceClasses
   suffices.

2. EquivalenceClass::ec_derives is searched by
   EquivalenceClass::left_em/right_em pointers whereas the hash table is
   key'ed by RestrictInfo::rinfo_serial and
   RestrictInfo::required_relids. In the future (next commits) the hash
   table will be used for storing the child RestrictInfos produced by
   translating the corresponding parent RestrictInfos which may not be
   part of EquivalenceClasses. Such RestrictInfos do not have
   EquivalenceMembers associated with them and hence can not be used as
   a key. OTOH, RestrictInfo::rinfo_serial and
   RestrictInfo::required_relids is set in all RestrictInfos.

   To cope with this, create_join_clause() searches for the parent
   clause in EquivalenceClass::ec_sources or
   EquivalenceClass::ec_derives before searching for the child clause in
   the hash table. That's some extra work compared to linked list based
   approach where a child clause could be searched directly in
   EquivalenceClass::ec_derives. But with this change, the linked lists
   should be short and thus searching for a parent clause should not
   take much time.

Please note that EquivalenceClass:ec_derives is used in functions other
than create_join_clause(). But all those usages happen before any child
EquivalenceMembers are added. This commit adds Asserts in those places
or leverages existing Asserts to validate this fact. Also the Asserts
will help in noticing any change in case the assumption is broken in the
future.  Hence moving child derived clauses out of EquivalenceClass does
not need any changes to those functions.

Ashutosh Bapat
---
 src/backend/optimizer/path/equivclass.c   | 121 +++++++++++++++++-----
 src/backend/optimizer/plan/analyzejoins.c |   9 ++
 src/include/nodes/pathnodes.h             |  10 +-
 3 files changed, 115 insertions(+), 25 deletions(-)

diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 8f6f005ecb9..333b1cf5c18 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -27,6 +27,7 @@
 #include "optimizer/optimizer.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
+#include "optimizer/cost.h"
 #include "optimizer/planmain.h"
 #include "optimizer/restrictinfo.h"
 #include "rewrite/rewriteManip.h"
@@ -269,7 +270,12 @@ process_equivalence(PlannerInfo *root,
 		{
 			EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
 
-			Assert(!cur_em->em_is_child);	/* no children yet */
+			/*
+			 * No children yet and hence no child derived clauses to process.
+			 * Child derived clauses are stored outside an EquivalenceClass,
+			 * but we don't need to worry about them here.
+			 */
+			Assert(!cur_em->em_is_child);
 
 			/*
 			 * Match constants only within the same JoinDomain (see
@@ -1166,7 +1172,12 @@ generate_base_implied_equalities_const(PlannerInfo *root,
 		Oid			eq_op;
 		RestrictInfo *rinfo;
 
-		Assert(!cur_em->em_is_child);	/* no children yet */
+		/*
+		 * No children yet, and hence no child derived clauses. Child derived
+		 * clauses are stored outside EquivalenceClass but we don't need to
+		 * worry about those in this function.
+		 */
+		Assert(!cur_em->em_is_child);
 		if (cur_em == const_em)
 			continue;
 		eq_op = select_equality_operator(ec,
@@ -1829,6 +1840,53 @@ create_join_clause(PlannerInfo *root,
 	RestrictInfo *parent_rinfo = NULL;
 	ListCell   *lc;
 	MemoryContext oldcontext;
+	Relids		qualscope;
+
+	/*
+	 * The Relids computed here may be set in the RestrictInfo created in this
+	 * function. Hence allocate the memory for them in planner context where
+	 * the memory for RestrictInfo also gets allocated.
+	 */
+	oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+	qualscope = bms_union(leftem->em_relids, rightem->em_relids);
+	MemoryContextSwitchTo(oldcontext);
+
+	/*
+	 * If either EM is a child, recursively create the corresponding
+	 * parent-to-parent clause, so that we can duplicate its rinfo_serial and
+	 * fetch the child clause if it already exists.
+	 */
+	if (leftem->em_is_child || rightem->em_is_child)
+	{
+		EquivalenceMember *leftp = leftem->em_parent ? leftem->em_parent : leftem;
+		EquivalenceMember *rightp = rightem->em_parent ? rightem->em_parent : rightem;
+
+		parent_rinfo = create_join_clause(root, ec, opno,
+										  leftp, rightp,
+										  parent_ec);
+
+		rinfo = find_restrictinfo(root, parent_rinfo->rinfo_serial, qualscope);
+		if (rinfo)
+		{
+			/*
+			 * Make sure that we got the child clause made from given
+			 * equivalence members which are part of the given equivalence
+			 * class.
+			 */
+			Assert((rinfo->left_em == leftem && rinfo->right_em == rightem) ||
+				   (rinfo->left_em == rightem && rinfo->right_em == leftem));
+			Assert(rinfo->parent_ec == parent_ec);
+			Assert(rinfo->left_ec == ec && rinfo->right_ec == ec);
+
+			/*
+			 * qualscope was just used for lookup and is not required anymore.
+			 * Free it to avoid accumulating memory in case the query involves
+			 * thousands of partitions.
+			 */
+			bms_free(qualscope);
+			return rinfo;
+		}
+	}
 
 	/*
 	 * Search to see if we already built a RestrictInfo for this pair of
@@ -1871,27 +1929,12 @@ create_join_clause(PlannerInfo *root,
 	 */
 	oldcontext = MemoryContextSwitchTo(root->planner_cxt);
 
-	/*
-	 * If either EM is a child, recursively create the corresponding
-	 * parent-to-parent clause, so that we can duplicate its rinfo_serial.
-	 */
-	if (leftem->em_is_child || rightem->em_is_child)
-	{
-		EquivalenceMember *leftp = leftem->em_parent ? leftem->em_parent : leftem;
-		EquivalenceMember *rightp = rightem->em_parent ? rightem->em_parent : rightem;
-
-		parent_rinfo = create_join_clause(root, ec, opno,
-										  leftp, rightp,
-										  parent_ec);
-	}
-
 	rinfo = build_implied_join_equality(root,
 										opno,
 										ec->ec_collation,
 										leftem->em_expr,
 										rightem->em_expr,
-										bms_union(leftem->em_relids,
-												  rightem->em_relids),
+										qualscope,
 										ec->ec_min_security);
 
 	/*
@@ -1909,10 +1952,6 @@ create_join_clause(PlannerInfo *root,
 		rinfo->clause_relids = bms_add_members(rinfo->clause_relids,
 											   rightem->em_relids);
 
-	/* If it's a child clause, copy the parent's rinfo_serial */
-	if (parent_rinfo)
-		rinfo->rinfo_serial = parent_rinfo->rinfo_serial;
-
 	/* Mark the clause as redundant, or not */
 	rinfo->parent_ec = parent_ec;
 
@@ -1926,11 +1965,35 @@ create_join_clause(PlannerInfo *root,
 	/* Mark it as usable with these EMs */
 	rinfo->left_em = leftem;
 	rinfo->right_em = rightem;
-	/* and save it for possible re-use */
-	ec->ec_derives = lappend(ec->ec_derives, rinfo);
+
+	/*
+	 * and save it for possible re-use. Parent clauses go into ec_derives but
+	 * child clauses are stored in a separate hash table separately for
+	 * performance reasons as explained in the EquivaleneClass prologue.
+	 */
+	if (!parent_rinfo)
+		ec->ec_derives = lappend(ec->ec_derives, rinfo);
 
 	MemoryContextSwitchTo(oldcontext);
 
+	if (parent_rinfo)
+	{
+		/* Copy parent's rinfo_serial to child RestrictInfo. */
+		rinfo->rinfo_serial = parent_rinfo->rinfo_serial;
+
+		/*
+		 * When merging EquivalencClasses we also merge ec_derives from. But
+		 * by the time we derive child clauses, EC merging should be complete
+		 * and hence it should be safe not to save child clauses in derived
+		 * list. If that's not the case we might miss merging child derived
+		 * clauses. Following Assert will trigger if things change.
+		 */
+		Assert(root->ec_merging_done);
+
+		/* Save child RestrictInfo for further re-use. */
+		add_restrictinfo(root, rinfo);
+	}
+
 	return rinfo;
 }
 
@@ -2659,6 +2722,16 @@ find_derived_clause_for_ec_member(EquivalenceClass *ec,
 
 	Assert(ec->ec_has_const);
 	Assert(!em->em_is_const);
+
+	/*
+	 * The child derived clauses are stored outside an EquivalenceClass. If
+	 * this function is passed a child EquivalenceMember, we have to find the
+	 * corresponding parent clause using the parent EquivalenceMember and then
+	 * find the child clause. But as of now this function does not receive
+	 * child EquivalenceMember's, so we don't worry about all that.  Following
+	 * Assert will let us know if things change.
+	 */
+	Assert(!em->em_is_child);
 	foreach(lc, ec->ec_derives)
 	{
 		RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index 928d926645e..76e4877dd63 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -659,6 +659,15 @@ remove_rel_from_eclass(EquivalenceClass *ec, int relid, int ojrelid)
 			bms_is_member(ojrelid, cur_em->em_relids))
 		{
 			Assert(!cur_em->em_is_const);
+
+			/*
+			 * The relation removal happens before any children are expanded.
+			 * Hence we do not see child EquivalenceMembers in this function.
+			 * Hence we bon't need to worry about removing child derived
+			 * clauses which are stored outside EquivalenceClass. Assert below
+			 * would trigger if things change.
+			 */
+			Assert(!cur_em->em_is_child);
 			cur_em->em_relids = bms_del_member(cur_em->em_relids, relid);
 			cur_em->em_relids = bms_del_member(cur_em->em_relids, ojrelid);
 			if (bms_is_empty(cur_em->em_relids))
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 8bcc8f11a57..58e0519bdc6 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1377,6 +1377,14 @@ typedef struct JoinDomain
  * NB: if ec_merged isn't NULL, this class has been merged into another, and
  * should be ignored in favor of using the pointed-to class.
  *
+ * When there are hundreds of partitions or inheritance children involved, there
+ * can be thousands of derived child clauses all linked in ec_derives which is
+ * scanned linearly before creating a new clause in create_join_clause(). For
+ * faster lookup the child derived clauses are stored in a hash table located
+ * outside the equivalence class in PlannerInfo. We do not need one hash table
+ * per EquivalenceClass since, unlike a linked list, a hash tables can
+ * efficiently handle thousands of entries.
+ *
  * NB: EquivalenceClasses are never copied after creation.  Therefore,
  * copyObject() copies pointers to them as pointers, and equal() compares
  * pointers to EquivalenceClasses via pointer equality.  This is implemented
@@ -1393,7 +1401,7 @@ typedef struct EquivalenceClass
 	Oid			ec_collation;	/* collation, if datatypes are collatable */
 	List	   *ec_members;		/* list of EquivalenceMembers */
 	List	   *ec_sources;		/* list of generating RestrictInfos */
-	List	   *ec_derives;		/* list of derived RestrictInfos */
+	List	   *ec_derives;		/* list of derived parent RestrictInfos */
 	Relids		ec_relids;		/* all relids appearing in ec_members, except
 								 * for child members (see below) */
 	bool		ec_has_const;	/* any pseudoconstants in ec_members? */
-- 
2.34.1

