From 47efe758434ba8a58de50b0124853de29f48f6b4 Mon Sep 17 00:00:00 2001
From: Antonin Houska <ah@cybertec.at>
Date: Thu, 18 Apr 2024 16:40:36 +0200
Subject: [PATCH 4/5] Teach the planner to pass UniqueKey nodes from a subquery
 to the parent query.

So far it was only possible to recognize uniqueness of the output of
relatively simple subqueries. With this feature, more complex subqueries can
be checked, even those "hidden" in a view with security_barrier enabled. Thus
there should be more opportunities to apply specific optimizations. Typically,
when the inner relation of join is unique, then INNER join can be replaced
with SEMI join.
---
 src/backend/optimizer/path/allpaths.c     |  17 +
 src/backend/optimizer/path/equivclass.c   |   5 +-
 src/backend/optimizer/path/indxpath.c     |   6 +-
 src/backend/optimizer/path/pathkeys.c     |   6 +-
 src/backend/optimizer/path/uniquekey.c    | 487 ++++++++++++++++++++--
 src/backend/optimizer/plan/analyzejoins.c |  46 ++
 src/backend/optimizer/plan/planner.c      |  84 +++-
 src/include/nodes/pathnodes.h             |  26 +-
 src/include/optimizer/paths.h             |  10 +
 src/test/regress/expected/join.out        |  66 +--
 src/test/regress/expected/subselect.out   |  24 ++
 src/test/regress/sql/join.sql             |  13 +-
 src/test/regress/sql/subselect.sql        |  13 +
 13 files changed, 729 insertions(+), 74 deletions(-)

diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index ced0fce415..faedc4a247 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -2667,6 +2667,23 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
 		return;
 	}
 
+	/*
+	 * Translate the subquery unique keys so that 'rel' can use them.
+	 *
+	 * The final relation is not guaranteed to have the target set, so
+	 * retrieve it from ->upper_targets.
+	 *
+	 * Unique keys are currently not supported for RELOPT_OTHER_MEMBER_REL.
+	 */
+	if (rel->reloptkind == RELOPT_BASEREL)
+	{
+		PathTarget	*sub_final_target;
+
+		sub_final_target = rel->subroot->upper_targets[UPPERREL_FINAL];
+		convert_unique_keys_for_rel(root, rel, NULL, sub_final_rel,
+									sub_final_target);
+	}
+
 	/*
 	 * Mark rel with estimated output rows, width, etc.  Note that we have to
 	 * do this before generating outer-query paths, else cost_subqueryscan is
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 1d6bedb399..95d4722b41 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -591,9 +591,9 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 						 Oid collation,
 						 Index sortref,
 						 Relids rel,
+						 JoinDomain *jdomain,
 						 bool create_it)
 {
-	JoinDomain *jdomain;
 	Relids		expr_relids;
 	EquivalenceClass *newec;
 	EquivalenceMember *newem;
@@ -609,7 +609,8 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 	 * Since SortGroupClause nodes are top-level expressions (GROUP BY, ORDER
 	 * BY, etc), they can be presumed to belong to the top JoinDomain.
 	 */
-	jdomain = linitial_node(JoinDomain, root->join_domains);
+	if (jdomain == NULL)
+		jdomain = linitial_node(JoinDomain, root->join_domains);
 
 	/*
 	 * Scan through the existing EquivalenceClasses for a match
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 2230b13104..87081627ac 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -151,7 +151,6 @@ static IndexClause *match_clause_to_indexcol(PlannerInfo *root,
 											 RestrictInfo *rinfo,
 											 int indexcol,
 											 IndexOptInfo *index);
-static bool IsBooleanOpfamily(Oid opfamily);
 static IndexClause *match_boolean_index_clause(PlannerInfo *root,
 											   RestrictInfo *rinfo,
 											   int indexcol, IndexOptInfo *index);
@@ -2275,8 +2274,11 @@ match_clause_to_indexcol(PlannerInfo *root,
  * If the opfamily OID is in the range of built-in objects, we can rely
  * on hard-wired knowledge of which built-in opfamilies support this.
  * For extension opfamilies, there's no choice but to do a catcache lookup.
+ *
+ * TODO If the call from equivclass.c remains, move this function to better
+ * place.
  */
-static bool
+bool
 IsBooleanOpfamily(Oid opfamily)
 {
 	if (opfamily < FirstNormalObjectId)
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 1d61881a6b..9e69085b5d 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -233,7 +233,7 @@ make_pathkey_from_sortinfo(PlannerInfo *root,
 	/* Now find or (optionally) create a matching EquivalenceClass */
 	eclass = get_eclass_for_sort_expr(root, expr,
 									  opfamilies, opcintype, collation,
-									  sortref, rel, create_it);
+									  sortref, rel, NULL, create_it);
 
 	/* Fail if no EC and !create_it */
 	if (!eclass)
@@ -1122,6 +1122,7 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
 											 sub_eclass->ec_collation,
 											 0,
 											 rel->relids,
+											 NULL,
 											 false);
 
 				/*
@@ -1203,6 +1204,7 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
 														sub_expr_coll,
 														0,
 														rel->relids,
+														NULL,
 														false);
 
 					/*
@@ -1467,6 +1469,7 @@ initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
 								 ((OpExpr *) clause)->inputcollid,
 								 0,
 								 NULL,
+								 NULL,
 								 true);
 	restrictinfo->right_ec =
 		get_eclass_for_sort_expr(root,
@@ -1476,6 +1479,7 @@ initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
 								 ((OpExpr *) clause)->inputcollid,
 								 0,
 								 NULL,
+								 NULL,
 								 true);
 }
 
diff --git a/src/backend/optimizer/path/uniquekey.c b/src/backend/optimizer/path/uniquekey.c
index 0b65ff135f..4200663b82 100644
--- a/src/backend/optimizer/path/uniquekey.c
+++ b/src/backend/optimizer/path/uniquekey.c
@@ -19,7 +19,20 @@
 #include "nodes/pathnodes.h"
 #include "optimizer/optimizer.h"
 #include "optimizer/paths.h"
+#include "utils/lsyscache.h"
 
+/*
+ * Information on a column of an unique index for which we are trying to
+ * create an unique key.
+ */
+typedef struct UniqueKeyExpr
+{
+	/* The expression itself. */
+	Expr	*expr;
+
+	/* Operator family to find / create suitable equivalence class. */
+	Oid		opfamily;
+} UniqueKeyExpr;
 
 /* Functions to populate UniqueKey */
 static bool add_uniquekey_for_uniqueindex(PlannerInfo *root,
@@ -29,23 +42,24 @@ static bool add_uniquekey_for_uniqueindex(PlannerInfo *root,
 static Bitmapset *build_ec_positions_for_exprs(PlannerInfo *root, List *exprs,
 											   RelOptInfo *rel);
 static int find_ec_position_matching_expr(PlannerInfo *root,
-										  Expr *expr,
-										  RelOptInfo *baserel);
+										  RelOptInfo *baserel,
+										  Expr *expr, List *opfamilies);
 static bool unique_ecs_useful_for_distinct(PlannerInfo *root, Bitmapset *ec_indexes);
 
 /* Helper functions to create UniqueKey. */
-static UniqueKey * make_uniquekey(Bitmapset *eclass_indexes,
+static UniqueKey * make_uniquekey(Bitmapset *item_indexes,
+								  List *opfamily_lists,
 								  bool useful_for_distinct);
 static void mark_rel_singlerow(RelOptInfo *rel, int relid);
 
 static UniqueKey * rel_singlerow_uniquekey(RelOptInfo *rel);
-static bool uniquekey_contains_multinulls(PlannerInfo *root, RelOptInfo *rel, UniqueKey * ukey);
 
 /* Debug only */
 static void print_uniquekey(PlannerInfo *root, RelOptInfo *rel);
 
 static bool uniquekey_contains_in(PlannerInfo *root, UniqueKey * ukey, Bitmapset *ecs, Relids relids);
 static bool is_uniquekey_useful_afterjoin(PlannerInfo *root, UniqueKey * ukey, RelOptInfo *joinrel);
+static int find_expr_pos_in_list(Expr *expr, List *list);
 
 /*
  * populate_baserel_uniquekeys
@@ -61,6 +75,9 @@ populate_baserel_uniquekeys(PlannerInfo *root, RelOptInfo *rel)
 	List	   *truncatable_exprs = NIL,
 			   *expr_opfamilies = NIL;
 
+	if (rel->reloptkind != RELOPT_BASEREL)
+		return;
+
 	/*
 	 * Currently we only use UniqueKey for mark-distinct-as-noop case, so if
 	 * there is no-distinct-clause at all, we can ignore the maintenance at
@@ -132,7 +149,8 @@ add_uniquekey_for_uniqueindex(PlannerInfo *root, IndexOptInfo *unique_index,
 	ListCell   *indexpr_item;
 	RelOptInfo *rel = unique_index->rel;
 	bool		used_for_distinct;
-	int			c;
+	int			c, i;
+	List		*opfamily_lists = NIL;
 
 	indexpr_item = list_head(unique_index->indexprs);
 
@@ -143,6 +161,7 @@ add_uniquekey_for_uniqueindex(PlannerInfo *root, IndexOptInfo *unique_index,
 		bool		matched_const = false;
 		ListCell   *lc1,
 				   *lc2;
+		UniqueKeyExpr	*uexpr;
 
 		if (attr > 0)
 		{
@@ -174,7 +193,10 @@ add_uniquekey_for_uniqueindex(PlannerInfo *root, IndexOptInfo *unique_index,
 		if (matched_const)
 			continue;
 
-		unique_exprs = lappend(unique_exprs, expr);
+		uexpr = palloc_object(UniqueKeyExpr);
+		uexpr->expr = expr;
+		uexpr->opfamily = unique_index->opfamily[c];
+		unique_exprs = lappend(unique_exprs, uexpr);
 	}
 
 	if (unique_exprs == NIL)
@@ -193,35 +215,94 @@ add_uniquekey_for_uniqueindex(PlannerInfo *root, IndexOptInfo *unique_index,
 	if (unique_ecs == NULL)
 		return false;
 
+	/* Collect the operator families. */
+	i = -1;
+	while ((i = bms_next_member(unique_ecs, i)) >= 0)
+	{
+		EquivalenceClass	*ec = list_nth(root->eq_classes, i);
+
+		opfamily_lists = lappend(opfamily_lists, ec->ec_opfamilies);
+	}
+
 	used_for_distinct = unique_ecs_useful_for_distinct(root, unique_ecs);
 
 
 	rel->uniquekeys = lappend(rel->uniquekeys,
-							  make_uniquekey(unique_ecs,
+							  make_uniquekey(unique_ecs, opfamily_lists,
 											 used_for_distinct));
 	return false;
 }
 
 /*
  * find_ec_position_matching_expr
- *		Locate the position of EquivalenceClass whose members matching
- *		the given expr, if any; return -1 if no match.
+ *		Locate the position of EquivalenceClass whose members matching the
+ *		given expr. Try to create EC if suitable one does not exist. Return -1
+ *		if there is not enough information in the catalog about the data type
+ *		to create the EC.
  */
 static int
-find_ec_position_matching_expr(PlannerInfo *root,
-							   Expr *expr,
-							   RelOptInfo *baserel)
+find_ec_position_matching_expr(PlannerInfo *root, RelOptInfo *baserel,
+							   Expr *expr, List *opfamilies)
 {
 	int			i = -1;
+	EquivalenceClass *ec;
+	int		ec_index;
+	JoinDomain *jdomain;
+	ListCell	*lc;
+
+	/*
+	 * XXX Currently the function is only used to build unique keys, so we
+	 * don't create ECs for boolean columns: normal EC processing does not
+	 * create them as well and build_index_pathkeys() considers boolean
+	 * pathkeys redundant, so missing boolean EC is o.k. However, if we
+	 * created the boolean ECs, build_index_pathkeys() would return the
+	 * corresponding pathkeys, and those could mismatch query_pathkeys.
+	 * Shouldn't this be handled in pathkeys_useful_for_ordering()?
+	 */
+	foreach(lc, opfamilies)
+	{
+		/* XXX The result should be the same for each item of the list. */
+		if (IsBooleanOpfamily(lfirst_oid(lc)))
+			return -1;
+	}
 
 	while ((i = bms_next_member(baserel->eclass_indexes, i)) >= 0)
 	{
-		EquivalenceClass *ec = list_nth(root->eq_classes, i);
+		List	*diff;
+
+		ec = list_nth(root->eq_classes, i);
+
+		/*
+		 * The EC must understand equality in the same way as the unique index
+		 * that guarantees the uniqueness. That is, ec_opfamilies must contain
+		 * all the opfamilies passed by the caller.
+		 */
+		diff = list_difference_oid(opfamilies, ec->ec_opfamilies);
+		if (list_length(diff) > 0)
+			continue;
 
 		if (find_ec_member_matching_expr(ec, expr, baserel->relids))
 			return i;
 	}
-	return -1;
+
+	/* Create the EC. */
+	jdomain = makeNode(JoinDomain);
+	jdomain->jd_relids = pull_varnos(root, (Node *) expr);
+	ec = get_eclass_for_sort_expr(root, expr,
+								  opfamilies,
+								  exprType((Node *) expr),
+								  exprCollation((Node *) expr),
+								  0,
+								  NULL,
+								  jdomain,
+								  true);
+	ec_index = list_length(root->eq_classes) - 1;
+
+	/* The new EC should now be known to both 'root' and 'baserel'. */
+	Assert(ec == llast(root->eq_classes));
+	Assert(bms_is_member(ec_index, baserel->eclass_indexes));
+
+	return ec_index;
 }
 
 /*
@@ -238,7 +319,10 @@ build_ec_positions_for_exprs(PlannerInfo *root, List *exprs, RelOptInfo *rel)
 
 	foreach(lc, exprs)
 	{
-		int			pos = find_ec_position_matching_expr(root, lfirst(lc), rel);
+		UniqueKeyExpr	*uexpr = (UniqueKeyExpr *) lfirst(lc);
+		int			pos = find_ec_position_matching_expr(root, rel,
+														 uexpr->expr,
+														 list_make1_oid(uexpr->opfamily));
 
 		if (pos < 0)
 		{
@@ -248,20 +332,23 @@ build_ec_positions_for_exprs(PlannerInfo *root, List *exprs, RelOptInfo *rel)
 		res = bms_add_member(res, pos);
 	}
 	return res;
+
 }
 
 /*
  *	make_uniquekey
  *		Based on UnqiueKey rules, it is impossible for a UnqiueKey
- * which have eclass_indexes and relid both set. This function just
- * handle eclass_indexes case.
+ * which have item_indexes and relid both set. This function just
+ * handle item_indexes case.
  */
 static UniqueKey *
-make_uniquekey(Bitmapset *eclass_indexes, bool useful_for_distinct)
+make_uniquekey(Bitmapset *item_indexes, List *opfamily_lists,
+			   bool useful_for_distinct)
 {
 	UniqueKey  *ukey = makeNode(UniqueKey);
 
-	ukey->eclass_indexes = eclass_indexes;
+	ukey->item_indexes = item_indexes;
+	ukey->opfamily_lists = opfamily_lists;
 	ukey->relid = 0;
 	ukey->use_for_distinct = useful_for_distinct;
 	return ukey;
@@ -321,7 +408,7 @@ print_uniquekey(PlannerInfo *root, RelOptInfo *rel)
 			UniqueKey  *ukey = lfirst_node(UniqueKey, lc);
 
 			elog(INFO, "UNIQUEKEY{indexes=%s, singlerow_rels=%d, use_for_distinct=%d}",
-				 bmsToString(ukey->eclass_indexes),
+				 bmsToString(ukey->item_indexes),
 				 ukey->relid,
 				 ukey->use_for_distinct);
 		}
@@ -352,12 +439,12 @@ var_is_nullable(PlannerInfo *root, Var *var, RelOptInfo *rel)
  *
  *	Check if the uniquekey contains nulls values.
  */
-static bool
+bool
 uniquekey_contains_multinulls(PlannerInfo *root, RelOptInfo *rel, UniqueKey * ukey)
 {
 	int			i = -1;
 
-	while ((i = bms_next_member(ukey->eclass_indexes, i)) >= 0)
+	while ((i = bms_next_member(ukey->item_indexes, i)) >= 0)
 	{
 		EquivalenceClass *ec = list_nth_node(EquivalenceClass, root->eq_classes, i);
 		ListCell   *lc;
@@ -421,7 +508,7 @@ relation_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *distinct_path
 	{
 		UniqueKey  *ukey = lfirst_node(UniqueKey, lc);
 
-		if (bms_is_subset(ukey->eclass_indexes, pathkey_bm) &&
+		if (bms_is_subset(ukey->item_indexes, pathkey_bm) &&
 			!uniquekey_contains_multinulls(root, rel, ukey))
 			return true;
 	}
@@ -611,6 +698,10 @@ populate_joinrel_uniquekeys(PlannerInfo *root, RelOptInfo *joinrel,
 		foreach(lc2, innerrel->uniquekeys)
 		{
 			UniqueKey  *inner_ukey = lfirst(lc2);
+			Bitmapset	*indexes_new;
+			List	*opfamily_lists_new = NIL;
+			int		i;
+			ListCell	*lo, *li;
 
 			if (!is_uniquekey_useful_afterjoin(root, inner_ukey, joinrel))
 				continue;
@@ -618,9 +709,35 @@ populate_joinrel_uniquekeys(PlannerInfo *root, RelOptInfo *joinrel,
 			/* singlerow will make the outeruk_still_valid true */
 			Assert(!uniquekey_is_singlerow(inner_ukey));
 
+			/* Assemble opfamily_lists_new in the correct order.  */
+			i = -1;
+			indexes_new = bms_union(outer_ukey->item_indexes, inner_ukey->item_indexes);
+			lo = list_head(outer_ukey->opfamily_lists);
+			li = list_head(inner_ukey->opfamily_lists);
+			while ((i = bms_next_member(indexes_new, i)) >= 0)
+			{
+				List	*l;
+
+				if (bms_is_member(i, outer_ukey->item_indexes))
+				{
+					l = lfirst(lo);
+					lo = lnext(outer_ukey->opfamily_lists, lo);
+				}
+				else
+				{
+					Assert(bms_is_member(i, inner_ukey->item_indexes));
+
+					l = lfirst(li);
+					li = lnext(inner_ukey->opfamily_lists, li);
+				}
+
+				opfamily_lists_new = lappend(opfamily_lists_new, l);
+			}
+
 			joinrel->uniquekeys = lappend(joinrel->uniquekeys,
 										  make_uniquekey(
-														 bms_union(outer_ukey->eclass_indexes, inner_ukey->eclass_indexes),
+														 indexes_new,
+														 opfamily_lists_new,
 														 outer_ukey->use_for_distinct || inner_ukey->use_for_distinct));
 		}
 	}
@@ -640,7 +757,7 @@ uniquekey_contains_in(PlannerInfo *root, UniqueKey * ukey, Bitmapset *ecs, Relid
 		return bms_is_member(ukey->relid, relids);
 	}
 
-	return bms_is_subset(ukey->eclass_indexes, ecs);
+	return bms_is_subset(ukey->item_indexes, ecs);
 }
 
 
@@ -656,7 +773,7 @@ uniquekey_useful_for_merging(PlannerInfo *root, UniqueKey * ukey, RelOptInfo *re
 
 	int			i = -1;
 
-	while ((i = bms_next_member(ukey->eclass_indexes, i)) >= 0)
+	while ((i = bms_next_member(ukey->item_indexes, i)) >= 0)
 	{
 		EquivalenceClass *ec = list_nth(root->eq_classes, i);
 		ListCell   *j;
@@ -709,3 +826,321 @@ is_uniquekey_useful_afterjoin(PlannerInfo *root, UniqueKey * ukey, RelOptInfo *j
 
 	return uniquekey_useful_for_merging(root, ukey, joinrel);
 }
+
+
+/*
+ * convert_unique_keys_for_rel
+ *
+ * Convert unique keys of 'input_relation' and assign them to 'rel'. The
+ * comments of UniqueKey explain the difference in the format.
+ *
+ * This function assumes that at least one of the relations is "upper
+ * relation". If 'input_rel' is upper and 'rel' is base/join rel, pass NULL
+ * for 'target'.
+ */
+void
+convert_unique_keys_for_rel(PlannerInfo *root, RelOptInfo *rel,
+							PathTarget *target, RelOptInfo *input_rel,
+							PathTarget *input_target)
+{
+	ListCell	*lc1;
+
+	/* No unique keys, in case we return early. */
+	rel->uniquekeys = NIL;
+
+	/*
+	 * Set-returning functions can break uniqueness. This does not necessarily
+	 * affect all of the upper relations, but checking is not trivial and
+	 * should be done by the caller rather than by this function. Make the
+	 * logic simple for now.
+	 */
+	if (root->parse->hasTargetSRFs)
+		return;
+
+	/* No keys to convert? */
+	if (input_rel->uniquekeys == NIL)
+		return;
+
+	/*
+	 * If both relations are upper and if they have the same target,
+	 * just copy the uniquekeys unmodified.
+	 */
+	if (IS_UPPER_REL(rel) && IS_UPPER_REL(input_rel) &&
+		target == input_target)
+	{
+		rel->uniquekeys = copyObject(input_rel->uniquekeys);
+		return;
+	}
+
+	foreach(lc1, input_rel->uniquekeys)
+	{
+		UniqueKey  *key = lfirst_node(UniqueKey, lc1);
+		Bitmapset	*item_indexes_new = NULL;
+		List	*opfamily_lists_new = NIL;
+		int		matched;
+		int		i = -1;
+		ListCell	*lc2;
+
+		if (!IS_UPPER_REL(input_rel))
+		{
+			/* Conversion between base/join rels is currently not needed. */
+			IS_UPPER_REL(rel);
+
+			/*
+			 * For each key, check its ECs and find the EM present in
+			 * 'target'.
+			 */
+			while ((i = bms_next_member(key->item_indexes, i)) >= 0)
+			{
+				EquivalenceClass *ec = list_nth(root->eq_classes, i);
+
+				matched = -1;
+				foreach(lc2, ec->ec_members)
+				{
+					EquivalenceMember *em;
+					Expr	*expr;
+
+					em = lfirst_node(EquivalenceMember, lc2);
+					expr = em->em_expr;
+
+					/* EMs can be wrapped in RelabelType. */
+					while (expr && IsA(expr, RelabelType))
+						expr = ((RelabelType *) expr)->arg;
+
+					/*
+					 * Currently we only accept Vars, for simplicity (and
+					 * because the unique key shouldn't contain other
+					 * nodes). In general, we look for expressions that map
+					 * unique input value to unique output value. Not sure
+					 * though how we can recognize them easily.
+					 */
+					if (!IsA(expr, Var))
+						continue;
+
+					matched = find_expr_pos_in_list(expr, target->exprs);
+					if (matched >= 0)
+						break;
+				}
+
+				if (matched >= 0)
+				{
+					item_indexes_new = bms_add_member(item_indexes_new,
+													  matched);
+					opfamily_lists_new = lappend(opfamily_lists_new,
+												 ec->ec_opfamilies);
+				}
+				else
+				{
+					/* The caller does not want an incomplete unique key. */
+					bms_free(item_indexes_new);
+					item_indexes_new = NULL;
+					list_free(opfamily_lists_new);
+					opfamily_lists_new = NIL;
+					break;
+				}
+			}
+		}
+		else
+		{
+			ListCell	*lc2 = list_head(key->opfamily_lists);
+
+			/* IS_UPPER_REL(input_rel) */
+
+			Assert(bms_num_members(key->item_indexes) ==
+				   list_length(key->opfamily_lists));
+
+			/*
+			 * For an upper relation, 'index_items' point to expressions of an
+			 * existing target. For each key, find its target expressions in
+			 * 'target' or (if target==NULL) in the members of the ECs 'rel'
+			 * belongs to.
+			 */
+			while ((i = bms_next_member(key->item_indexes, i)) >= 0)
+			{
+				Expr	*expr;
+
+				expr = (Expr *) list_nth(input_target->exprs, i);
+
+				if (target)
+				{
+					Assert(IS_UPPER_REL(rel));
+
+					matched = find_expr_pos_in_list(expr, target->exprs);
+				}
+				else
+				{
+					ListCell	*lc3;
+					Var		*target_var = NULL;
+
+					/*
+					 * 'rel' should be a base relation, not upper or join
+					 * relation.
+					 */
+					Assert(!IS_UPPER_REL(rel));
+					Assert(rel->relid > 0);
+
+					/*
+					 * Base relation should use plain Var nodes to reference
+					 * the subquery target. Find the Var in the base relation
+					 * target that points to this expression.
+					 */
+					foreach(lc3, rel->reltarget->exprs)
+					{
+						Var		*var;
+
+						/*
+						 * If a general expression happens to be there, give
+						 * up because it can break uniqueness of the relation
+						 * output.
+						 */
+						if (!IsA(lfirst(lc3), Var))
+							return;
+
+						var = lfirst_node(Var, lc3);
+
+						if (var->varno == rel->relid &&
+							var->varattno == (i + 1))
+						{
+							target_var = var;
+							break;
+						}
+					}
+					if (target_var)
+					{
+						List		*opfamilies = lfirst(lc2);
+
+						matched = find_ec_position_matching_expr(root, rel,
+																 (Expr *) target_var,
+																 opfamilies);
+						lc2 = lnext(key->opfamily_lists, lc2);
+					}
+					else
+						matched = -1;
+
+					if (matched >= 0)
+					{
+						/*
+						 * By generating the unique key the subquery
+						 * guarantees that the value of the output column
+						 * cannot be NULL.
+						 */
+						rel->notnullattrs = bms_add_member(rel->notnullattrs,
+														   target_var->varattno - FirstLowInvalidHeapAttributeNumber);
+					}
+					else
+						matched = -1;
+				}
+
+				if (matched >= 0)
+					item_indexes_new = bms_add_member(item_indexes_new,
+													  matched);
+				else
+				{
+					/* The caller does not want an incomplete unique key. */
+					bms_free(item_indexes_new);
+					item_indexes_new = NULL;
+					break;
+				}
+			}
+		}
+
+		if (item_indexes_new)
+		{
+			UniqueKey	*key_new;
+
+			if (!IS_UPPER_REL(input_rel))
+				Assert(opfamily_lists_new);
+			else
+			{
+				Assert(opfamily_lists_new == NIL);
+
+				opfamily_lists_new = copyObject(key->opfamily_lists);
+			}
+
+
+			key_new = make_uniquekey(item_indexes_new, opfamily_lists_new,
+									 true);
+			rel->uniquekeys = lappend(rel->uniquekeys, key_new);
+		}
+	}
+}
+
+List *
+create_uniquekeys_for_sortop(SetOperationStmt *topop, List *targetlist)
+{
+	ListCell	*l, *lg;
+	int		i;
+	Bitmapset	*key_idxs = NULL;
+	List	*opfamily_lists = NIL;
+	UniqueKey	*key;
+
+	if (topop->all)
+	{
+		/*
+		 * There is no easy way to combine unique keys of the individual
+		 * queries.
+		 */
+		return NIL;
+	}
+
+	/* Iterate like in query_is_distinct_for() */
+	lg = list_head(topop->groupClauses);
+	i = 0;
+	foreach(l, targetlist)
+	{
+		TargetEntry *tle = (TargetEntry *) lfirst(l);
+		SortGroupClause *sgc;
+		List	*sgc_opfamilies;
+
+		if (tle->resjunk)
+		{
+			i++;
+			continue;	/* ignore resjunk columns */
+		}
+
+		key_idxs = bms_add_member(key_idxs, i++);
+
+		/* non-resjunk columns should have grouping clauses */
+		Assert(lg != NULL);
+		sgc = (SortGroupClause *) lfirst(lg);
+		lg = lnext(topop->groupClauses, lg);
+		sgc_opfamilies = get_mergejoin_opfamilies(sgc->eqop);
+		if (sgc_opfamilies == NIL)
+		{
+			/* XXX Shouldn' this be ERROR? */
+			bms_free(key_idxs);
+			key_idxs = NULL;
+			break;
+		}
+		opfamily_lists = lappend(opfamily_lists, sgc_opfamilies);
+	}
+
+	if (key_idxs == NULL)
+		return NIL;
+
+	key = make_uniquekey(key_idxs, opfamily_lists, true);
+
+	return list_make1(key);
+}
+
+/*
+ * Return a zero-based index of an expression in a list, or -1 if not found.
+ */
+static int
+find_expr_pos_in_list(Expr *expr, List *list)
+{
+	ListCell	*lc;
+	int		idx = 0;
+
+	foreach(lc, list)
+	{
+		Expr	*lexpr = (Expr *) lfirst(lc);
+
+		if (equal(expr, lexpr))
+			return idx;
+
+		idx++;
+	}
+
+	return -1;
+}
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index 506fccd20c..3029d1a5b0 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -839,6 +839,13 @@ reduce_unique_semijoins(PlannerInfo *root)
 static bool
 rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
 {
+	/*
+	 * If uniquekeys could already be setup, the relation definitely supports
+	 * distinctness.
+	 */
+	if (rel->uniquekeys)
+		return true;
+
 	/* We only know about baserels ... */
 	if (rel->reloptkind != RELOPT_BASEREL)
 		return false;
@@ -899,6 +906,45 @@ static bool
 rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list,
 					List **extra_clauses)
 {
+	/*
+	 * Use unique keys if already available, but do not skip filling of
+	 * extra_clauses if caller is interested in it.
+	 */
+	if (rel->uniquekeys && extra_clauses == NULL)
+	{
+		Relids		ecs = NULL;
+		ListCell	*l;
+
+		/* First, express the clauses in the form of EC indexes. */
+		foreach(l, clause_list)
+		{
+			RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
+			EquivalenceClass	*ec;
+
+			if (rinfo->outer_is_left)
+				ec = rinfo->right_ec;
+			else
+				ec = rinfo->left_ec;
+
+			/* EC pointers might not have been updated yet. */
+			while (ec->ec_merged)
+				ec = ec->ec_merged;
+
+			ecs = bms_add_member(ecs,
+								 list_member_ptr_pos(root->eq_classes, ec));
+		}
+
+		/* Now check if a clause exists for each unique key expression. */
+		foreach(l, rel->uniquekeys)
+		{
+			UniqueKey  *ukey = lfirst_node(UniqueKey, l);
+
+			if (bms_is_subset(ukey->item_indexes, ecs) &&
+				!uniquekey_contains_multinulls(root, rel, ukey))
+				return true;
+		}
+	}
+
 	/*
 	 * We could skip a couple of tests here if we assume all callers checked
 	 * rel_supports_distinctness first, but it doesn't seem worth taking any
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index e40c729e7e..075f98ed16 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -1318,6 +1318,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction,
 	RelOptInfo *final_rel;
 	FinalPathExtraData extra;
 	ListCell   *lc;
+	PathTarget *sort_input_target = NULL;
 
 	/* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
 	if (parse->limitCount || parse->limitOffset)
@@ -1363,6 +1364,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction,
 		/* Also extract the PathTarget form of the setop result tlist */
 		final_target = current_rel->cheapest_total_path->pathtarget;
 
+		/*
+		 * If in a subquery, the target might be needed by the parent query,
+		 * in order to convert unique keys.
+		 */
+		root->upper_targets[UPPERREL_FINAL] = final_target;
+
 		/* And check whether it's parallel safe */
 		final_target_parallel_safe =
 			is_parallel_safe(root, (Node *) final_target->exprs);
@@ -1395,7 +1402,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction,
 	else
 	{
 		/* No set operations, do regular planning */
-		PathTarget *sort_input_target;
 		List	   *sort_input_targets;
 		List	   *sort_input_targets_contain_srfs;
 		bool		sort_input_target_parallel_safe;
@@ -1645,10 +1651,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction,
 
 		/*
 		 * Save the various upper-rel PathTargets we just computed into
-		 * root->upper_targets[].  The core code doesn't use this, but it
-		 * provides a convenient place for extensions to get at the info.  For
-		 * consistency, we save all the intermediate targets, even though some
-		 * of the corresponding upperrels might not be needed for this query.
+		 * root->upper_targets[]. In the core we need it to convert unique
+		 * keys of a subquery so the parent query can use them. Besides that
+		 * it provides a convenient place for extensions to get at the info.
+		 * For consistency, we save all the intermediate targets, even though
+		 * some of the corresponding upperrels might not be needed for this
+		 * query.
 		 */
 		root->upper_targets[UPPERREL_FINAL] = final_target;
 		root->upper_targets[UPPERREL_ORDERED] = final_target;
@@ -1720,6 +1728,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction,
 	 */
 	if (parse->sortClause)
 	{
+		RelOptInfo	*input_rel = current_rel;
+		PathTarget	*input_target;
+
 		current_rel = create_ordered_paths(root,
 										   current_rel,
 										   final_target,
@@ -1731,13 +1742,53 @@ grouping_planner(PlannerInfo *root, double tuple_fraction,
 			adjust_paths_for_srfs(root, current_rel,
 								  final_targets,
 								  final_targets_contain_srfs);
+
+		if (!parse->setOperations)
+			input_target = sort_input_target;
+		else
+			/*
+			 * Unlike other upper rels, UPPERREL_SETOP should have the target
+			 * initialized properly
+			 */
+			input_target = input_rel->reltarget;
+
+		convert_unique_keys_for_rel(root, current_rel, final_target,
+									input_rel, input_target);
 	}
 
 	/*
 	 * Now we are prepared to build the final-output upperrel.
 	 */
 	final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
-	/* simple_copy_uniquekeys(final_rel, current_rel); */
+
+
+	if (!parse->setOperations)
+	{
+		/*
+		 * Make sure unique keys are valid for final_rel.
+		 */
+		convert_unique_keys_for_rel(root, final_rel, final_target, current_rel,
+									final_target);
+
+	}
+	/*
+	 * TODO Consider if this is worth the effort. For set operations other
+	 * than UNION ALL, the detection of uniqueness is simple enough to be
+	 * handled by query_is_distinct_for(). (That function probably should not
+	 * be removed because it can be useful before the unique keys have been
+	 * initialized.)
+	 */
+	else
+	{
+		SetOperationStmt *topop;
+
+		Assert(IS_UPPER_REL(current_rel));
+
+		/* For set operations, we can create the keys from scratch. */
+		topop = castNode(SetOperationStmt, parse->setOperations);
+		final_rel->uniquekeys = create_uniquekeys_for_sortop(topop,
+															 root->parse->targetList);
+	}
 
 	/*
 	 * If the input rel is marked consider_parallel and there's nothing that's
@@ -3680,6 +3731,14 @@ create_grouping_paths(PlannerInfo *root,
 	grouped_rel = make_grouping_rel(root, input_rel, target,
 									target_parallel_safe, parse->havingQual);
 
+	/* Try to initialize unique keys. */
+	if (!root->parse->groupingSets)
+		convert_unique_keys_for_rel(root, grouped_rel, target, input_rel,
+									input_rel->reltarget);
+	else
+		/* Grouping sets can break uniqueness. */
+		grouped_rel->uniquekeys = NIL;
+
 	/*
 	 * Create either paths for a degenerate grouping or paths for ordinary
 	 * grouping, as appropriate.
@@ -4441,6 +4500,10 @@ create_window_paths(PlannerInfo *root,
 	/* For now, do all work in the (WINDOW, NULL) upperrel */
 	window_rel = fetch_upper_rel(root, UPPERREL_WINDOW, NULL);
 
+	/* Make sure unique keys are valid for window_rel. */
+	convert_unique_keys_for_rel(root, window_rel, output_target, input_rel,
+								input_target);
+
 	/*
 	 * If the input relation is not parallel-safe, then the window relation
 	 * can't be parallel-safe, either.  Otherwise, we need to examine the
@@ -4664,9 +4727,14 @@ create_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
 	distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL);
 
 	/*
-	 * populate_uniquekeys_from_pathkeys(root, distinct_rel,
-	 * root->distinct_pathkeys);
+	 * Make sure unique keys are valid for distinct_rel.
+	 *
+	 * Both input and output relation should use the same target in this case,
+	 * however the targets are not necessarily assigned to the relations, so
+	 * pass them explicitly.
 	 */
+	convert_unique_keys_for_rel(root, distinct_rel, target, input_rel,
+								target);
 
 	/*
 	 * We don't compute anything at this level, so distinct_rel will be
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 4e879b0b7c..3b53b82ba1 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1487,13 +1487,33 @@ typedef struct UniqueKey
 	pg_node_attr(no_read, no_query_jumble)
 
 	NodeTag		type;
-	Bitmapset  *eclass_indexes; /* Indexes in PlannerInfo's eq_class list of
-								 * ECs that is unique for a RelOptInfo. */
+
+	/*
+	 * A subset of columns that is unique across the output rows of the
+	 * underlying relation.
+	 *
+	 * For base relations and joins, the indexes point to equivalence classes
+	 * the columns belong to. (Essentially it's a subset of
+	 * RelOptInfo.eclass_indexes)
+	 *
+	 * For upper relations, the indexes point to the items of the relation
+	 * target.
+	 */
+	Bitmapset  *item_indexes;
+
+	/*
+	 * Here we store the operator families for each member of
+	 * 'item_indexes'. (i-th item of the list corresponds to i-th member of
+	 * 'item_indexes'). This is needed to create ECs in the parent query if
+	 * the upper relation represents a subquery.
+	 */
+	List	*opfamily_lists;
+
 	int			relid;
 	bool		use_for_distinct;	/* true if it is used in distinct-pathkey,
 									 * in this case we would never check if we
 									 * should discard it during join search. */
-}			UniqueKey;
+} UniqueKey;
 
 
 /*
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index b3fadd2f05..4b99ce6557 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -82,6 +82,8 @@ extern bool match_index_to_operand(Node *operand, int indexcol,
 								   IndexOptInfo *index);
 extern void check_index_predicates(PlannerInfo *root, RelOptInfo *rel);
 
+extern bool IsBooleanOpfamily(Oid opfamily);
+
 /*
  * tidpath.c
  *	  routines to generate tid paths
@@ -138,6 +140,7 @@ extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root,
 												  Oid collation,
 												  Index sortref,
 												  Relids rel,
+												  JoinDomain *jdomain,
 												  bool create_it);
 extern EquivalenceMember *find_ec_member_matching_expr(EquivalenceClass *ec,
 													   Expr *expr,
@@ -277,4 +280,11 @@ extern void populate_baserel_uniquekeys(PlannerInfo *root,
 extern void populate_joinrel_uniquekeys(PlannerInfo *root, RelOptInfo *joinrel,
 										RelOptInfo *outerrel, RelOptInfo *innerrel,
 										List *restrictlist, JoinType jointype);
+extern void convert_unique_keys_for_rel(PlannerInfo *root, RelOptInfo *rel,
+										PathTarget *target, RelOptInfo *input_rel,
+										PathTarget *input_target);
+extern List *create_uniquekeys_for_sortop(SetOperationStmt *topop,
+										  List *targetlist);
+extern bool uniquekey_contains_multinulls(PlannerInfo *root, RelOptInfo *rel,
+										  UniqueKey * ukey);
 #endif							/* PATHS_H */
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 675b5b1484..571198a86a 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -5648,38 +5648,52 @@ select d.* from d left join (select distinct * from b) s
  Seq Scan on d
 (1 row)
 
--- join removal is not possible when the GROUP BY contains a column that is
--- not in the join condition.  (Note: as of 9.6, we notice that b.id is a
--- primary key and so drop b.c_id from the GROUP BY of the resulting plan;
--- but this happens too late for join removal in the outer plan level.)
-explain (costs off)
+-- when the GROUP BY contains a column that is not in the join condition, join
+-- removal takes place too, due to "unique keys" (Note: as of 9.6, we notice
+-- that b.id is a primary key and so drop b.c_id from the GROUP BY of the
+-- resulting plan; but this happens too late for join removal in the outer
+-- plan level.)
+explain (costs off, verbose on)
 select d.* from d left join (select * from b group by b.id, b.c_id) s
   on d.a = s.id;
-                QUERY PLAN                
-------------------------------------------
- Merge Right Join
-   Merge Cond: (b.id = d.a)
-   ->  Group
-         Group Key: b.id
-         ->  Index Scan using b_pkey on b
-   ->  Sort
-         Sort Key: d.a
-         ->  Seq Scan on d
-(8 rows)
+                   QUERY PLAN                   
+------------------------------------------------
+ Hash Left Join
+   Output: d.a, d.b
+   Inner Unique: true
+   Hash Cond: (d.a = s.id)
+   ->  Seq Scan on pg_temp.d
+         Output: d.a, d.b
+   ->  Hash
+         Output: s.id
+         ->  Subquery Scan on s
+               Output: s.id
+               ->  HashAggregate
+                     Output: b.id, b.c_id
+                     Group Key: b.id
+                     ->  Seq Scan on pg_temp.b
+                           Output: b.id, b.c_id
+(15 rows)
 
 -- similarly, but keying off a DISTINCT clause
-explain (costs off)
+explain (costs off, verbose on)
 select d.* from d left join (select distinct * from b) s
   on d.a = s.id;
-             QUERY PLAN             
-------------------------------------
- Merge Right Join
-   Merge Cond: (b.id = d.a)
-   ->  Index Scan using b_pkey on b
-   ->  Sort
-         Sort Key: d.a
-         ->  Seq Scan on d
-(6 rows)
+                QUERY PLAN                
+------------------------------------------
+ Hash Left Join
+   Output: d.a, d.b
+   Inner Unique: true
+   Hash Cond: (d.a = s.id)
+   ->  Seq Scan on pg_temp.d
+         Output: d.a, d.b
+   ->  Hash
+         Output: s.id
+         ->  Subquery Scan on s
+               Output: s.id
+               ->  Seq Scan on pg_temp.b
+                     Output: b.id, b.c_id
+(12 rows)
 
 -- join removal is not possible here
 explain (costs off)
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 9eecdc1e92..b9147e8f1a 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -2105,3 +2105,27 @@ ON B.hundred in (SELECT min(c.hundred) FROM tenk2 C WHERE c.odd = b.odd);
                                                Filter: (odd = b.odd)
 (16 rows)
 
+-- Check that uniqueness of the view output is recognized.
+begin;
+create table tabx as select * from generate_series(1,100) idx;
+create table taby as select * from generate_series(1,100) idy;
+create unique index on taby using btree (idy);
+create view viewy_barrier with (security_barrier=true) as select * from taby;
+analyze tabx, taby;
+explain (costs off, verbose on)
+select * from tabx x left join viewy_barrier y on idy = idx;
+             QUERY PLAN              
+-------------------------------------
+ Hash Left Join
+   Output: x.idx, taby.idy
+   Inner Unique: true
+   Hash Cond: (x.idx = taby.idy)
+   ->  Seq Scan on public.tabx x
+         Output: x.idx
+   ->  Hash
+         Output: taby.idy
+         ->  Seq Scan on public.taby
+               Output: taby.idy
+(10 rows)
+
+rollback;
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index c4c6c7b8ba..bf8fb27072 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -2047,16 +2047,17 @@ explain (costs off)
 select d.* from d left join (select distinct * from b) s
   on d.a = s.id and d.b = s.c_id;
 
--- join removal is not possible when the GROUP BY contains a column that is
--- not in the join condition.  (Note: as of 9.6, we notice that b.id is a
--- primary key and so drop b.c_id from the GROUP BY of the resulting plan;
--- but this happens too late for join removal in the outer plan level.)
-explain (costs off)
+-- when the GROUP BY contains a column that is not in the join condition, join
+-- removal takes place too, due to "unique keys" (Note: as of 9.6, we notice
+-- that b.id is a primary key and so drop b.c_id from the GROUP BY of the
+-- resulting plan; but this happens too late for join removal in the outer
+-- plan level.)
+explain (costs off, verbose on)
 select d.* from d left join (select * from b group by b.id, b.c_id) s
   on d.a = s.id;
 
 -- similarly, but keying off a DISTINCT clause
-explain (costs off)
+explain (costs off, verbose on)
 select d.* from d left join (select distinct * from b) s
   on d.a = s.id;
 
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql
index 75a9b718b2..7b82242221 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -1020,3 +1020,16 @@ WHERE a.thousand < 750;
 explain (costs off)
 SELECT * FROM tenk1 A LEFT JOIN tenk2 B
 ON B.hundred in (SELECT min(c.hundred) FROM tenk2 C WHERE c.odd = b.odd);
+
+
+-- Check that uniqueness of the view output is recognized.
+begin;
+create table tabx as select * from generate_series(1,100) idx;
+create table taby as select * from generate_series(1,100) idy;
+create unique index on taby using btree (idy);
+create view viewy_barrier with (security_barrier=true) as select * from taby;
+analyze tabx, taby;
+explain (costs off, verbose on)
+select * from tabx x left join viewy_barrier y on idy = idx;
+
+rollback;
-- 
2.44.0

