From 578be11f949fc30ef06b06743741754dc61feda9 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.vondra@postgresql.org>
Date: Sun, 13 Nov 2022 17:12:30 +0100
Subject: [PATCH v21 4/6] review comments

---
 src/backend/commands/trigger.c                |  2 +-
 src/backend/optimizer/README                  | 27 +++++++++++
 src/backend/optimizer/path/allpaths.c         | 47 ++++++++++++++-----
 src/backend/optimizer/path/costsize.c         |  1 +
 src/backend/optimizer/plan/initsplan.c        | 10 ++++
 src/backend/optimizer/plan/planner.c          | 18 ++++++-
 src/backend/optimizer/util/pathnode.c         | 45 ++++++++++++++++--
 src/backend/utils/misc/postgresql.conf.sample |  1 +
 src/test/regress/expected/triggers.out        |  2 +-
 9 files changed, 135 insertions(+), 18 deletions(-)

diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index 78fd67c242d..aaf54874be2 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -264,7 +264,7 @@ CreateTriggerFiringOn(CreateTrigStmt *stmt, const char *queryString,
 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 						 errmsg("\"%s\" is a partitioned table",
 								RelationGetRelationName(rel)),
-						 errdetail("Triggers on partitioned tables cannot have transition tables.")));
+						 errdetail("ROW triggers with transition tables are not supported on partitioned tables.")));
 		}
 	}
 	else if (rel->rd_rel->relkind == RELKIND_VIEW)
diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README
index 2fd1a962699..6f6b7d0b93b 100644
--- a/src/backend/optimizer/README
+++ b/src/backend/optimizer/README
@@ -1168,6 +1168,12 @@ input of Agg node. However, if the groups are large enough, it may be more
 efficient to apply the partial aggregation to the output of base relation
 scan, and finalize it when we have all relations of the query joined:
 
+XXX review: Hmm, do we need to push it all the way down to base relations? Or
+would it make sense to do the agg on an intermediate level? Say, we're joining
+three tables A, B and C. Maybe the agg could/should be evaluated on top of join
+A+B, before joining with C? Say, maybe the aggregate references columns from
+both base relations?
+
   EXPLAIN
   SELECT a.i, avg(b.y)
   FROM a JOIN b ON b.j = a.i
@@ -1186,15 +1192,27 @@ Thus the join above the partial aggregate node receives fewer input rows, and
 so the number of outer-to-inner pairs of tuples to be checked can be
 significantly lower, which can in turn lead to considerably lower join cost.
 
+XXX Perhaps mention this may also mean the partial ggregate could be pushed
+to a remote server with FDW partitions?
+
 Note that there's often no GROUP BY expression to be used for the partial
 aggregation, so we use equivalence classes to derive grouping expression: in
 the example above, the grouping key "b.j" was derived from "a.i".
 
+XXX I think this is slightly confusing - there is a GROUP BY expression for the
+partial aggregate, but as stated in the query it may not reference the side of
+a join explicitly.
+
 Also note that in this case the partial aggregate uses the "b.j" as grouping
 column although the column does not appear in the query target list. The point
 is that "b.j" is needed to evaluate the join condition, and there's no other
 way for the partial aggregate to emit its values.
 
+XXX Not sure I understand what this is trying to say. Firstly, maybe it'd be
+helpful to show targetlists in the EXPLAIN, i.e. do it as VERBOSE. But more
+importantly, isn't this a direct consequence of the equivalence classes stuff
+mentioned in the preceding paragraph?
+
 Besides base relation, the aggregation can also be pushed down to join:
 
   EXPLAIN
@@ -1217,6 +1235,10 @@ Besides base relation, the aggregation can also be pushed down to join:
 	  ->  Hash
 		->  Seq Scan on a
 
+XXX Aha, so this is pretty-much an answer to my earlier comment, and matches
+my example with three tables. Maybe this suggests the initial reference to
+base relations is a bit confusing.
+
 Whether the Agg node is created out of base relation or out of join, it's
 added to a separate RelOptInfo that we call "grouped relation". Grouped
 relation can be joined to a non-grouped relation, which results in a grouped
@@ -1227,3 +1249,8 @@ If query_planner produces a grouped relation that contains valid paths, these
 are simply added to the UPPERREL_PARTIAL_GROUP_AGG relation. Further
 processing of these paths then does not differ from processing of other
 partially grouped paths.
+
+XXX I think this is a good explanation of the motivation for this patch, but
+maybe it'd be good to go into more details about how we decide if it's correct
+to actually do the pushdown, data structures etc. Similar to earlier parts of
+this README.
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index f00f900ff41..6d2c2f4fc36 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -196,9 +196,10 @@ make_one_rel(PlannerInfo *root, List *joinlist)
 	/*
 	 * Now that the sizes are known, we can estimate the sizes of the grouped
 	 * relations.
+	 *
+	 * XXX Seems more consistent with code nearby.
 	 */
-	if (root->grouped_var_list)
-		setup_base_grouped_rels(root);
+	setup_base_grouped_rels(root);
 
 	/*
 	 * We should now have size estimates for every actual table involved in
@@ -341,7 +342,7 @@ set_base_rel_sizes(PlannerInfo *root)
 }
 
 /*
- * setup_based_grouped_rels
+ * setup_base_grouped_rels
  *	  For each "plain" relation build a grouped relation if aggregate pushdown
  *    is possible and if this relation is suitable for partial aggregation.
  */
@@ -350,6 +351,10 @@ setup_base_grouped_rels(PlannerInfo *root)
 {
 	Index		rti;
 
+	/* If there are no grouped relations, estimate their sizes. */
+	if (!root->grouped_var_list)
+		return;
+
 	for (rti = 1; rti < root->simple_rel_array_size; rti++)
 	{
 		RelOptInfo *brel = root->simple_rel_array[rti];
@@ -370,15 +375,19 @@ setup_base_grouped_rels(PlannerInfo *root)
 			continue;
 
 		/* ignore RTEs that are "other rels" */
+		/* XXX Shouldn't this check be earlier? Seems cheaper than the check
+		 * calling bms_nonempty_difference, for example. */
 		if (brel->reloptkind != RELOPT_BASEREL)
 			continue;
 
 		rel_grouped = build_simple_grouped_rel(root, brel->relid, &agg_info);
-		if (rel_grouped)
-		{
-			/* Make the relation available for joining. */
-			add_grouped_rel(root, rel_grouped, agg_info);
-		}
+
+		/* XXX When does this happen? */
+		if (!rel_grouped)
+			continue;
+
+		/* Make the relation available for joining. */
+		add_grouped_rel(root, rel_grouped, agg_info);
 	}
 }
 
@@ -560,6 +569,8 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 					/* Plain relation */
 					set_plain_rel_pathlist(root, rel, rte);
 
+					/* XXX Shouldn't this really be part of set_plain_rel_pathlist? */
+
 					/* Add paths to the grouped relation if one exists. */
 					rel_grouped = find_grouped_rel(root, rel->relids,
 												   &agg_info);
@@ -3382,6 +3393,11 @@ generate_grouping_paths(PlannerInfo *root, RelOptInfo *rel_grouped,
 
 /*
  * Apply partial aggregation to a subpath and add the AggPath to the pathlist.
+ *
+ * XXX I think this is potentially quite confusing, because the existing "add"
+ * functions add_path and add_partial_path only check if the proposed path is
+ * dominated by an existing path, pathkeys, etc. But this does more than that,
+ * perhaps even constructing new path etc.
  */
 static void
 add_grouped_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
@@ -3389,7 +3405,6 @@ add_grouped_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 {
 	Path	   *agg_path;
 
-
 	if (aggstrategy == AGG_HASHED)
 		agg_path = (Path *) create_agg_hashed_path(root, rel, subpath,
 												   agg_info);
@@ -3399,9 +3414,16 @@ add_grouped_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 	else
 		elog(ERROR, "unexpected strategy %d", aggstrategy);
 
+	/*
+	 * Bail out if we failed to create a suitable aggregated path. This can
+	 * happen e.g. then the path does not support hashing (for AGG_HASHED),
+	 * or when the input path is not sorted.
+	 */
+	if (agg_path == NULL)
+		return;
+
 	/* Add the grouped path to the list of grouped base paths. */
-	if (agg_path != NULL)
-		add_path(rel, (Path *) agg_path);
+	add_path(rel, (Path *) agg_path);
 }
 
 /*
@@ -3545,7 +3567,6 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
 
 	for (lev = 2; lev <= levels_needed; lev++)
 	{
-		RelOptInfo *rel_grouped;
 		ListCell   *lc;
 
 		/*
@@ -3567,6 +3588,8 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
 		 */
 		foreach(lc, root->join_rel_level[lev])
 		{
+			RelOptInfo *rel_grouped;
+
 			rel = (RelOptInfo *) lfirst(lc);
 
 			/* Create paths for partitionwise joins. */
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index b34ad90d080..4688f561f0f 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -4999,6 +4999,7 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel)
 							   0,
 							   JOIN_INNER,
 							   NULL);
+
 	rel->rows = clamp_row_est(nrows);
 
 	cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root);
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index 8e913c92d8b..d7a9de9645e 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -278,6 +278,8 @@ add_vars_to_targetlist(PlannerInfo *root, List *vars,
  * each possible grouping expression.
  *
  * root->group_pathkeys must be setup before this function is called.
+ *
+ * XXX Perhaps this should check/reject hasWindowFuncs too?
  */
 extern void
 setup_aggregate_pushdown(PlannerInfo *root)
@@ -311,6 +313,12 @@ setup_aggregate_pushdown(PlannerInfo *root)
 	if (root->parse->hasTargetSRFs)
 		return;
 
+	/*
+	 * XXX Maybe it'd be better to move create_aggregate_grouped_var_infos and
+	 * create_grouping_expr_grouped_var_infos to a function returning bool, and
+	 * only check that here.
+	 */
+
 	/* Create GroupedVarInfo per (distinct) aggregate. */
 	create_aggregate_grouped_var_infos(root);
 
@@ -329,6 +337,8 @@ setup_aggregate_pushdown(PlannerInfo *root)
 	 * Now that we know that grouping can be pushed down, search for the
 	 * maximum sortgroupref. The base relations may need it if extra grouping
 	 * expressions get added to them.
+	 *
+	 * XXX Shouldn't we do that only when adding extra grouping expressions?
 	 */
 	Assert(root->max_sortgroupref == 0);
 	foreach(lc, root->processed_tlist)
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 0ada3ba3ebe..2f4db69c1f9 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -3899,6 +3899,10 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
 	/*
 	 * The non-partial paths can come either from the Gather above or from
 	 * aggregate push-down.
+	 *
+	 * XXX I can't quite convince myself this is correct. How come it's fine
+	 * to check pathlist and then call set_cheapest() on partially_grouped_rel?
+	 * Maybe it's correct and the comment merely needs to explain this.
 	 */
 	if (partially_grouped_rel && partially_grouped_rel->pathlist)
 		set_cheapest(partially_grouped_rel);
@@ -6847,6 +6851,12 @@ create_partial_grouping_paths(PlannerInfo *root,
 	 * push-down.
 	 */
 	partially_grouped_rel = find_grouped_rel(root, input_rel->relids, NULL);
+
+	/*
+	 * If the relation already exists, it must have been created by aggregate
+	 * pushdown. We can't check how exactly it got created, but we can at least
+	 * check that aggregate pushdown is enabled.
+	 */
 	Assert(enable_agg_pushdown || partially_grouped_rel == NULL);
 
 	/*
@@ -6872,6 +6882,8 @@ create_partial_grouping_paths(PlannerInfo *root,
 	 * If we can't partially aggregate partial paths, and we can't partially
 	 * aggregate non-partial paths, then don't bother creating the new
 	 * RelOptInfo at all, unless the caller specified force_rel_creation.
+	 *
+	 * XXX Not sure why we're checking the partially_grouped_rel here?
 	 */
 	if (cheapest_total_path == NULL &&
 		cheapest_partial_path == NULL &&
@@ -6881,7 +6893,9 @@ create_partial_grouping_paths(PlannerInfo *root,
 
 	/*
 	 * Build a new upper relation to represent the result of partially
-	 * aggregating the rows from the input relation.
+	 * aggregating the rows from the input relation. The relation may
+	 * already exist due to aggregate pushdown, in which case we don't
+	 * need to create it.
 	 */
 	if (partially_grouped_rel == NULL)
 		partially_grouped_rel = fetch_upper_rel(root,
@@ -6903,6 +6917,8 @@ create_partial_grouping_paths(PlannerInfo *root,
 	 *
 	 * If the target was already created for the sake of aggregate push-down,
 	 * it should be compatible with what we'd create here.
+	 *
+	 * XXX Why is this checking reltarget->exprs? What does that mean? 
 	 */
 	if (partially_grouped_rel->reltarget->exprs == NIL)
 		partially_grouped_rel->reltarget =
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 7025ebf94be..395bd093d34 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -3163,9 +3163,21 @@ create_agg_path(PlannerInfo *root,
 }
 
 /*
+ * create_agg_sorted_path
+ *		Creates a pathnode performing sorted aggregation/grouping
+ *
  * Apply AGG_SORTED aggregation path to subpath if it's suitably sorted.
  *
  * NULL is returned if sorting of subpath output is not suitable.
+ *
+ * XXX I'm a bit confused why we need this? We now have create_agg_path and also
+ * create_agg_sorted_path and create_agg_hashed_path.
+ *
+ * XXX This assumes the input path to be sorted in a suitable way, but for
+ * regular aggregation we check that separately and then perhaps add sort
+ * if needed (possibly incremental one). That is, we don't do such checks
+ * in create_agg_path. Shouldn't we do the same thing before calling this
+ * new functions?
  */
 AggPath *
 create_agg_sorted_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
@@ -3184,6 +3196,7 @@ create_agg_sorted_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 	agg_exprs = agg_info->agg_exprs;
 	target = agg_info->target;
 
+	/* Bail out if the input path is not sorted at all. */
 	if (subpath->pathkeys == NIL)
 		return NULL;
 
@@ -3192,6 +3205,18 @@ create_agg_sorted_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 
 	/*
 	 * Find all query pathkeys that our relation does affect.
+	 *
+	 * XXX Not sure what "that our relation does affect" means? Also, we
+	 * are not looking at query_pathkeys but group_pathkeys, so that's a
+	 * bit confusing. Perhaps something like this would be better:
+	 *
+	 * Find all grouping pathkeys produced by the subpath.
+	 *
+	 * If we find a pathkey not available in the subpath, we skip to the
+	 * next one. That means we may not produce an ordering suitable for
+	 * sorted aggregation at upper level, but that's OK - the main benefit
+	 * is reducing cardinality, and we may add sort (perhaps incremental
+	 * one) later, or just do hashed aggregation.
 	 */
 	foreach(lc1, root->group_pathkeys)
 	{
@@ -3210,10 +3235,21 @@ create_agg_sorted_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 		}
 	}
 
+	/* Bail out if the subquery has no pathkeys for the grouping. */
 	if (key_subset == NIL)
 		return NULL;
 
-	/* Check if AGG_SORTED is useful for the whole query.  */
+	/*
+	 * Check if AGG_SORTED is useful for the whole query.
+	 *
+	 * XXX So this means we require the group pathkeys matched to the
+	 * subpath have to be a prefix of subpath->pathkeys. Why is that
+	 * necessary? We'll reduce the cardinality, and in the worst case
+	 * we'll have to add a separate sort (full or incremental). Or we
+	 * could finalize using hashed aggregate.
+	 *
+	 * XXX Doesn't seem to change any regression tests when disabled.
+	 */
 	if (!pathkeys_contained_in(key_subset, subpath->pathkeys))
 		return NULL;
 
@@ -3231,7 +3267,7 @@ create_agg_sorted_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 	result = create_agg_path(root, rel, subpath, target,
 							 AGG_SORTED, aggsplit,
 							 agg_info->group_clauses,
-							 NIL,
+							 NIL,	/* qual for HAVING clause */
 							 &agg_costs,
 							 dNumGroups);
 
@@ -3283,6 +3319,9 @@ create_agg_hashed_path(PlannerInfo *root, RelOptInfo *rel,
 													  &agg_costs,
 													  dNumGroups);
 
+		/*
+		 * XXX But we can spill to disk in hashagg now, no?
+		 */
 		if (hashaggtablesize < work_mem * 1024L)
 		{
 			/*
@@ -3294,7 +3333,7 @@ create_agg_hashed_path(PlannerInfo *root, RelOptInfo *rel,
 									 AGG_HASHED,
 									 aggsplit,
 									 agg_info->group_clauses,
-									 NIL,
+									 NIL,	/* qual for HAVING clause */
 									 &agg_costs,
 									 dNumGroups);
 		}
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 868d21c351e..6e87ada684b 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -388,6 +388,7 @@
 #enable_seqscan = on
 #enable_sort = on
 #enable_tidscan = on
+#enable_agg_pushdown = on
 
 # - Planner Cost Constants -
 
diff --git a/src/test/regress/expected/triggers.out b/src/test/regress/expected/triggers.out
index 8b8eadd1812..6d80ab1a6d8 100644
--- a/src/test/regress/expected/triggers.out
+++ b/src/test/regress/expected/triggers.out
@@ -2046,7 +2046,7 @@ create trigger failed after update on parted_trig
   referencing old table as old_table
   for each row execute procedure trigger_nothing();
 ERROR:  "parted_trig" is a partitioned table
-DETAIL:  Triggers on partitioned tables cannot have transition tables.
+DETAIL:  ROW triggers with transition tables are not supported on partitioned tables.
 drop table parted_trig;
 --
 -- Verify trigger creation for partitioned tables, and drop behavior
-- 
2.38.1