From 987c1d08bedc0c9c7436ad2daddbab2202b52425 Mon Sep 17 00:00:00 2001
From: "yizhi.fzh" <yizhi.fzh@alibaba-inc.com>
Date: Thu, 9 Nov 2023 17:25:35 +0800
Subject: [PATCH v3 1/1] maintain uniquekey on baserel/joinrel level and use it
 for marking

distinct-as-noop.
---
 src/backend/nodes/list.c                    |  17 +
 src/backend/optimizer/path/Makefile         |   3 +-
 src/backend/optimizer/path/README.uniquekey | 220 +++++++
 src/backend/optimizer/path/allpaths.c       |   2 +
 src/backend/optimizer/path/equivclass.c     |  48 ++
 src/backend/optimizer/path/joinrels.c       |   2 +
 src/backend/optimizer/path/uniquekey.c      | 654 ++++++++++++++++++++
 src/backend/optimizer/plan/initsplan.c      |   8 +
 src/backend/optimizer/plan/planner.c        |  33 +
 src/backend/optimizer/util/plancat.c        |  10 +
 src/backend/optimizer/util/relnode.c        |   2 +
 src/include/nodes/pathnodes.h               |  19 +
 src/include/nodes/pg_list.h                 |   2 +
 src/include/optimizer/paths.h               |  13 +
 src/test/regress/expected/join.out          |  11 +-
 src/test/regress/expected/uniquekey.out     | 268 ++++++++
 src/test/regress/parallel_schedule          |   2 +-
 src/test/regress/sql/uniquekey.sql          | 104 ++++
 18 files changed, 1409 insertions(+), 9 deletions(-)
 create mode 100644 src/backend/optimizer/path/README.uniquekey
 create mode 100644 src/backend/optimizer/path/uniquekey.c
 create mode 100644 src/test/regress/expected/uniquekey.out
 create mode 100644 src/test/regress/sql/uniquekey.sql

diff --git a/src/backend/nodes/list.c b/src/backend/nodes/list.c
index 750ee5a7e5..eaeaa19ad2 100644
--- a/src/backend/nodes/list.c
+++ b/src/backend/nodes/list.c
@@ -694,6 +694,23 @@ list_member_ptr(const List *list, const void *datum)
 	return false;
 }
 
+/*
+ * Return index of the datum in list if found. otherwise return -1.
+ */
+int
+list_member_ptr_pos(const List *list, const void *datum)
+{
+	ListCell   *lc;
+
+	foreach(lc, list)
+	{
+		if (lfirst(lc) == datum)
+			return foreach_current_index(lc);
+	}
+
+	return -1;
+}
+
 /*
  * Return true iff the integer 'datum' is a member of the list.
  */
diff --git a/src/backend/optimizer/path/Makefile b/src/backend/optimizer/path/Makefile
index 1e199ff66f..63cc1505d9 100644
--- a/src/backend/optimizer/path/Makefile
+++ b/src/backend/optimizer/path/Makefile
@@ -21,6 +21,7 @@ OBJS = \
 	joinpath.o \
 	joinrels.o \
 	pathkeys.o \
-	tidpath.o
+	tidpath.o \
+	uniquekey.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/optimizer/path/README.uniquekey b/src/backend/optimizer/path/README.uniquekey
new file mode 100644
index 0000000000..be13b113b9
--- /dev/null
+++ b/src/backend/optimizer/path/README.uniquekey
@@ -0,0 +1,220 @@
+Here is a design document and a part of implementation.
+
+What is UniqueKey?
+-----------------
+
+UniqueKey represents a uniqueness information for a RelOptInfo. for
+example:
+
+SELECT id FROM t;
+
+where the ID is the UniqueKey for the RelOptInfo (t).  In the real world,
+it has the following attributes:
+
+1). It should be EquivalenceClass based. for example:
+
+SELECT a FROM t WHERE a = id;
+
+In this case, the UniqueKey should be 1 EC with two members
+- EC(EM(a), EM(id)).
+
+
+2). Each UniqueKey may be made up with 1+ EquivalenceClass. for example:
+
+CREATE TABLE t(a int not null,  b int not null);
+CREATE UNIQUE INDEX on t(a, b);
+SELECT * FROM t;
+
+Where the UniqueKey for RelOptInfo (t) will be 2 ECs with each one has 1
+member.
+
+- EC(em=a), EC(em=b)
+
+3). Each RelOptInfo may have 1+ UniqueKeys.
+
+CREATE TABLE t(a int not null,  b int not null, c int not null);
+CREATE UNIQUE INDEX on t(a, b);
+CREATE UNIQUE INDEX on t(c);
+
+SELECT * FROM t;
+
+Where the UniqueKey for RelOptInfo (t) will be
+- [EC(em=a), EC(em=b)].
+- [EC(em=c)]
+
+4). A special case is about the one-row case. It works like:
+SELECT * FROM t WHERE id = 1;
+Here every single expression in the RelOptInfo (t) is unique since only
+one row here.
+
+Where can we use it?
+--------------------
+1. mark the distinct as no-op.  SELECT DISTINCT uniquekey FROM v; This
+  optimization has been required several times in our threads.
+
+2. Figure out more pathkey within the onerow case, then some planning
+  time can be reduced to be big extend. This user case is not absurd,
+  a real user case like this:
+
+  CREATE TABLE small_t (id int primary key, b int, c int .. u int);
+  CREATE INDEX ON small_t(b);
+  CREATE INDEX ON small_t(c);
+  ..
+
+  SELECT * FROM small_t s
+    JOIN t1 on t1.sb = s.b
+    JOIN T2 on t2.sc = s.c
+    ..
+    JOIN t20 on t20.su = s.u
+  WHERE s.id = 1;
+
+  Without the above optimization, we don't know s.b /s.c is ordered
+  already, so it might keep more different paths for small_t because of
+  they have different interesting pathkey, and use more planning time
+  for sorting to support merge join.
+
+  With the above optimization, the planning time should be reduced since
+  the seq scan can produce a ordered result for every expression.
+
+3. Figure out more interesting pathkey after join with normal UniqueKey.
+
+  CREATE TABLE t(id int primary key, b int, c int);
+  CREATE INDEX on t(c);
+  ANALYZE t;
+
+  explain (costs off)
+  select t1.id, t2.c from t t1
+    join t1 t2 on t1.id = t2.b
+    and t2.c > 3
+  order by t1.id, t2.c;
+
+		    QUERY PLAN
+  --------------------------------------------------
+  Sort Key: t1.id, t2.c   <---  this sort can be avoided actually.
+  ->  Nested Loop
+	Join Filter: (t1.id = t2.b)
+	->  Index Only Scan using t_pkey on t t1
+	->  Index Scan using t1_c_idx on t1 t2
+	      Index Cond: (c > 3)
+
+  *Without knowing the t1.id is unique*,  which means there are some
+  duplicated data in t1.id, the duplication data in t1 will break the
+  order of (t1.id, t2.c), but if we know the t1.id is unique, the sort
+  will be not needed.  I'm pretty happy with this finding.
+
+4. Optimize some group by case, like
+
+  SELECT id, sum(b) FROM t GROUP BY id
+  is same with
+  SELECT id, b from t;
+
+  I'm not sure how often it is in the real life, I'm not so excited with
+  this for now.
+
+
+How to present ECs in UniqueKey?
+--------------------------------
+
+I choose "Bitmapset *eclass_indexes;" finally, which is because
+Bitmapset is memory compact and good at bms_union,  bms_is_subset
+stuffs. The value in the bitmap is the positions in root->eq_classes. It
+is also be able to present the UniqueKey which is made up from multi
+relations or upper relation.
+
+How to present single row in UniqueKey
+-------------------------------------
+
+I just use a 'Index relid', an non-zero value means the
+RelOptInfo[relid] is single row.  For the case like
+
+SELECT * FROM t WHERE id = 1;
+The UniqueKey is:
+- UniqueKey(eclass_indexes=NULL, relid=1)
+
+during a join, any unique keys join with single row, it's uniqueness can
+be kept.
+
+SELECT t1.uk, t2.a FROM t WHERE t2.id = 1 and any-qual(t1, t2);
+- UniqueKey (t1.uk)
+
+more specially, join two single row like:
+
+SELECT * FROM t1 join t2 on true where t1.id = 1 and t2.id = 2;
+
+the UniqueKey for the JoinRel will be:
+- UniqueKey(eclass_indexes=NULL, relid=1)
+- UniqueKey(eclass_indexes=NULL, relid=2)
+
+However, the current single row presentation can't works well with Upper
+relation, which I think it would be acceptable. See the following case:
+
+SELECT count(*) FROM t1 JOIN t2 on true;
+
+
+How to maintain the uniquekey?
+-------------------------------
+the uniquekey is maintained from baserel to join rel then to upper
+relation. In the base rel, it comes from unique index. From the join
+relation, it is maintained with two rules:
+
+- the uniquekey in one side is still unique if it can't be duplicated
+  after the join. for example:
+
+  SELECT t1.pk FROM t1 JOIN t2 ON t1.a = t2.pk;
+  UniqueKey on t1:  t1.pk
+  UniqueKey on t1 Join t2:  t1.pk
+
+- The combined unique key from both sides are unique all the times.
+  SELECT t1.pk , t2.pk FROM t1 join t2 on true;
+  UniqueKey on t1 join t2:  (t1.pk, t2.pk)
+
+Some other operations like DISTINCT, GROUP BY can produce UniqueKey as well.
+
+NULL values
+-----------
+notnullattrs are added into RelOptInfo, which present if these attributes
+may not be NULL after the baserestrictinfo is executed. not-null-attributes
+may be generated by not-null constraint in catalog or baserestrictinfo
+(only) filter. However it is possible become NULLs because of outer
+join, then Var.varnullingrels is used in this case. see
+'var_is_nullable' function call.
+
+To simplify the UniqueKey module, it doesn't care about the null values
+during the maintaining, which means it may contains multi NULL values
+all the time by design. However whenever a user case care about that,
+the user case can get the answer with the above logic, that is what
+'mark-distinct-as-noop' does.
+
+How to reduce the overhead
+----------------------------------
+UniqueKey employs the similar strategy like PathKey, it only maintain
+the interesting PathKey. Currently the interesting UniqueKey includes:
+1). It is subset of distinct_pathkeys.
+2). It is used in mergeable join clauses for unique key deduction (for
+the join rel case, rule 1). In this case, it can be discarded quickly if
+the join has been done.
+
+To avoid to check if an uniquekey is subset of distinct clause again and
+again, I cached the result into UnqiueKey struct during the UniqueKey
+creation.
+
+Since our first goal is just used for marking distinct as no-op, so if
+there is no distinct clause at all, unique key will be not maintained at
+the beginning. so we can have some codes like:
+
+if (root->distinct_pathkeys == NULL)
+return;
+
+This fast path is NOT added for now for better code coverage.
+
+What I have now:
+----------------
+
+The current patch just maintain the UniqueKey at the baserel/joinrel level
+and used it for mark-distinct-as-noop purpose. including the basic idea of
+
+- How the UniqueKey is defined.
+- How to find out the interesting pathkey in the baserel/joinrel level.
+- How to figure out the unique key contains NULL values.
+
+Also the test cases are prepared, see uniquekey.sql.
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 67921a0826..a1677924af 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -458,6 +458,8 @@ set_rel_size(PlannerInfo *root, RelOptInfo *rel,
 		}
 	}
 
+	populate_baserel_uniquekeys(root, rel);
+
 	/*
 	 * We insist that all non-dummy rels have a nonzero rowcount estimate.
 	 */
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 7fa502d6e2..29edc8d1b5 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -744,6 +744,54 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 	return newec;
 }
 
+/*
+ * find_ec_position_matching_expr
+ *		Locate the position of EquivalenceClass whose members matching
+ *		the given expr, if any; return -1 if no match.
+ */
+int
+find_ec_position_matching_expr(PlannerInfo *root,
+							   Expr *expr,
+							   RelOptInfo *baserel)
+{
+	int			i = -1;
+
+	while ((i = bms_next_member(baserel->eclass_indexes, i)) >= 0)
+	{
+		EquivalenceClass *ec = list_nth(root->eq_classes, i);
+
+		if (find_ec_member_matching_expr(ec, expr, baserel->relids))
+			return i;
+	}
+	return -1;
+}
+
+/*
+ * build_ec_positions_for_exprs
+ *
+ *		Given a list of exprs, find the related EC positions for each of
+ *		them. if any exprs has no EC related, return NULL;
+ */
+Bitmapset *
+build_ec_positions_for_exprs(PlannerInfo *root, List *exprs, RelOptInfo *rel)
+{
+	ListCell   *lc;
+	Bitmapset  *res = NULL;
+
+	foreach(lc, exprs)
+	{
+		int			pos = find_ec_position_matching_expr(root, lfirst(lc), rel);
+
+		if (pos < 0)
+		{
+			bms_free(res);
+			return NULL;
+		}
+		res = bms_add_member(res, pos);
+	}
+	return res;
+}
+
 /*
  * find_ec_member_matching_expr
  *		Locate an EquivalenceClass member matching the given expr, if any;
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index d03ace50a1..ee6ec4b68f 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -743,6 +743,8 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
 							 sjinfo, pushed_down_joins,
 							 &restrictlist);
 
+	populate_joinrel_uniquekeys(root, joinrel, rel1, rel2, restrictlist, sjinfo->jointype);
+
 	/*
 	 * If we've already proven this join is empty, we needn't consider any
 	 * more paths for it.
diff --git a/src/backend/optimizer/path/uniquekey.c b/src/backend/optimizer/path/uniquekey.c
new file mode 100644
index 0000000000..ecc7c6d802
--- /dev/null
+++ b/src/backend/optimizer/path/uniquekey.c
@@ -0,0 +1,654 @@
+/*-------------------------------------------------------------------------
+ *
+ * uniquekey.c
+ *	  Utilities for maintaining uniquekey.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/optimizer/path/uniquekey.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/sysattr.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/pathnodes.h"
+#include "optimizer/optimizer.h"
+#include "optimizer/paths.h"
+
+
+/* Functions to populate UniqueKey */
+static bool add_uniquekey_for_uniqueindex(PlannerInfo *root,
+										  IndexOptInfo *unique_index,
+										  List *truncatable_exprs,
+										  List *expr_opfamilies);
+static bool unique_ecs_useful_for_distinct(PlannerInfo *root, Bitmapset *ec_indexes);
+
+/* Helper functions to create UniqueKey. */
+static UniqueKey * make_uniquekey(Bitmapset *eclass_indexes,
+								  bool useful_for_distinct);
+static void mark_rel_singlerow(RelOptInfo *rel, int relid);
+
+static UniqueKey * rel_singlerow_uniquekey(RelOptInfo *rel);
+static bool uniquekey_contains_multinulls(PlannerInfo *root, RelOptInfo *rel, UniqueKey * ukey);
+
+/* Debug only */
+static void print_uniquekey(PlannerInfo *root, RelOptInfo *rel);
+
+static bool uniquekey_contains_in(PlannerInfo *root, UniqueKey * ukey, Bitmapset *ecs, Relids relids);
+static bool is_uniquekey_useful_afterjoin(PlannerInfo *root, UniqueKey * ukey, RelOptInfo *joinrel);
+
+/*
+ * populate_baserel_uniquekeys
+ *
+ *		UniqueKey on baserel comes from unique indexes. Any expression
+ * which equals with Const can be stripped and the left expressions are
+ * still unique.
+ */
+void
+populate_baserel_uniquekeys(PlannerInfo *root, RelOptInfo *rel)
+{
+	ListCell   *lc;
+	List	   *truncatable_exprs = NIL,
+			   *expr_opfamilies = NIL;
+
+	/*
+	 * Currently we only use UniqueKey for mark-distinct-as-noop case, so if
+	 * there is no-distinct-clause at all, we can ignore the maintenance at
+	 * the first place. however for code coverage at the development stage, we
+	 * bypass this fastpath on purpose.
+	 *
+	 * XXX: even we want this fastpath, we still need to distinguish even the
+	 * current subquery has no DISTINCT, but the upper query may have.
+	 */
+
+	/*
+	 * if (root->distinct_pathkeys == NIL) return;
+	 */
+	foreach(lc, rel->baserestrictinfo)
+	{
+		RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+		if (rinfo->mergeopfamilies == NIL)
+			continue;
+
+		if (!IsA(rinfo->clause, OpExpr))
+			continue;
+
+		if (bms_is_empty(rinfo->left_relids))
+			truncatable_exprs = lappend(truncatable_exprs, get_rightop(rinfo->clause));
+		else if (bms_is_empty(rinfo->right_relids))
+			truncatable_exprs = lappend(truncatable_exprs, get_leftop(rinfo->clause));
+		else
+			continue;
+
+		expr_opfamilies = lappend(expr_opfamilies, rinfo->mergeopfamilies);
+	}
+
+	foreach(lc, rel->indexlist)
+	{
+		IndexOptInfo *index = (IndexOptInfo *) lfirst(lc);
+
+		if (!index->unique || !index->immediate ||
+			(index->indpred != NIL && !index->predOK))
+			continue;
+
+		if (add_uniquekey_for_uniqueindex(root, index,
+										  truncatable_exprs,
+										  expr_opfamilies))
+			/* Find a singlerow case, no need to go through other indexes. */
+			return;
+	}
+
+	print_uniquekey(root, rel);
+}
+
+
+/*
+ * add_uniquekey_for_uniqueindex
+ *
+ *		 populate a UniqueKey if it is interesting, return true iff the
+ * UniqueKey is an SingleRow. Only the interesting UniqueKeys are kept.
+ */
+static bool
+add_uniquekey_for_uniqueindex(PlannerInfo *root, IndexOptInfo *unique_index,
+							  List *truncatable_exprs, List *expr_opfamilies)
+{
+	List	   *unique_exprs = NIL;
+	Bitmapset  *unique_ecs = NULL;
+	ListCell   *indexpr_item;
+	RelOptInfo *rel = unique_index->rel;
+	bool		used_for_distinct;
+	int			c;
+
+	indexpr_item = list_head(unique_index->indexprs);
+
+	for (c = 0; c < unique_index->nkeycolumns; c++)
+	{
+		int			attr = unique_index->indexkeys[c];
+		Expr	   *expr;		/* The candidate for UniqueKey expression. */
+		bool		matched_const = false;
+		ListCell   *lc1,
+				   *lc2;
+
+		if (attr > 0)
+		{
+			expr = list_nth_node(TargetEntry, unique_index->indextlist, c)->expr;
+		}
+		else if (attr == 0)
+		{
+			/* Expression index */
+			expr = lfirst(indexpr_item);
+			indexpr_item = lnext(unique_index->indexprs, indexpr_item);
+		}
+		else					/* attr < 0 */
+		{
+			/* Index on OID is possible, not handle it for now. */
+			return false;
+		}
+
+		/* Ignore the expr which are equals to const. */
+		forboth(lc1, truncatable_exprs, lc2, expr_opfamilies)
+		{
+			if (list_member_oid((List *) lfirst(lc2), unique_index->opfamily[c]) &&
+				match_index_to_operand((Node *) lfirst(lc1), c, unique_index))
+			{
+				matched_const = true;
+				break;
+			}
+		}
+
+		if (matched_const)
+			continue;
+
+		unique_exprs = lappend(unique_exprs, expr);
+	}
+
+	if (unique_exprs == NIL)
+	{
+		/* single row is always interesting. */
+		mark_rel_singlerow(rel, rel->relid);
+		return true;
+	}
+
+	/*
+	 * if no EquivalenceClass is found for any exprs in unique exprs, we are
+	 * sure the whole exprs are not in the DISTINCT clause or mergeable join
+	 * clauses. so it is not interesting.
+	 */
+	unique_ecs = build_ec_positions_for_exprs(root, unique_exprs, rel);
+	if (unique_ecs == NULL)
+		return false;
+
+	used_for_distinct = unique_ecs_useful_for_distinct(root, unique_ecs);
+
+
+	rel->uniquekeys = lappend(rel->uniquekeys,
+							  make_uniquekey(unique_ecs,
+											 used_for_distinct));
+	return false;
+}
+
+/*
+ *	make_uniquekey
+ *		Based on UnqiueKey rules, it is impossible for a UnqiueKey
+ * which have eclass_indexes and relid both set. This function just
+ * handle eclass_indexes case.
+ */
+static UniqueKey *
+make_uniquekey(Bitmapset *eclass_indexes, bool useful_for_distinct)
+{
+	UniqueKey  *ukey = makeNode(UniqueKey);
+
+	ukey->eclass_indexes = eclass_indexes;
+	ukey->relid = 0;
+	ukey->use_for_distinct = useful_for_distinct;
+	return ukey;
+}
+
+/*
+ * mark_rel_singlerow
+ *	mark a relation as singlerow.
+ */
+static void
+mark_rel_singlerow(RelOptInfo *rel, int relid)
+{
+	UniqueKey  *ukey = makeNode(UniqueKey);
+
+	ukey->relid = relid;
+	rel->uniquekeys = list_make1(ukey);
+}
+
+static inline bool
+uniquekey_is_singlerow(UniqueKey * ukey)
+{
+	return ukey->relid != 0;
+}
+
+/*
+ *
+ *	Return the UniqueKey if rel is a singlerow Relation. othwise
+ * return NULL.
+ */
+static UniqueKey *
+rel_singlerow_uniquekey(RelOptInfo *rel)
+{
+	if (rel->uniquekeys != NIL)
+	{
+		UniqueKey  *ukey = linitial_node(UniqueKey, rel->uniquekeys);
+
+		if (ukey->relid)
+			return ukey;
+	}
+	return NULL;
+}
+
+/*
+ * print_uniquekey
+ *	Used for easier reivew, should be removed before commit.
+ */
+static void
+print_uniquekey(PlannerInfo *root, RelOptInfo *rel)
+{
+	if (!enable_geqo)
+	{
+		ListCell   *lc;
+
+		elog(INFO, "Rel = %s", bmsToString(rel->relids));
+		foreach(lc, rel->uniquekeys)
+		{
+			UniqueKey  *ukey = lfirst_node(UniqueKey, lc);
+
+			elog(INFO, "UNIQUEKEY{indexes=%s, singlerow_rels=%d, use_for_distinct=%d}",
+				 bmsToString(ukey->eclass_indexes),
+				 ukey->relid,
+				 ukey->use_for_distinct);
+		}
+	}
+}
+
+/*
+ *	is it possible that the var contains multi NULL values in the given
+ * RelOptInfo rel?
+ */
+static bool
+var_is_nullable(PlannerInfo *root, Var *var, RelOptInfo *rel)
+{
+	RelOptInfo *base_rel;
+
+	/* check if the outer join can add the NULL values.  */
+	if (bms_overlap(var->varnullingrels, rel->relids))
+		return true;
+
+	/* check if the user data has the NULL values. */
+	base_rel = root->simple_rel_array[var->varno];
+	return !bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber, base_rel->notnullattrs);
+}
+
+
+/*
+ * uniquekey_contains_multinulls
+ *
+ *	Check if the uniquekey contains nulls values.
+ */
+static bool
+uniquekey_contains_multinulls(PlannerInfo *root, RelOptInfo *rel, UniqueKey * ukey)
+{
+	int			i = -1;
+
+	while ((i = bms_next_member(ukey->eclass_indexes, i)) >= 0)
+	{
+		EquivalenceClass *ec = list_nth_node(EquivalenceClass, root->eq_classes, i);
+		ListCell   *lc;
+
+		foreach(lc, ec->ec_members)
+		{
+			EquivalenceMember *em = lfirst_node(EquivalenceMember, lc);
+			Var		   *var;
+
+			var = (Var *) em->em_expr;
+
+			if (!IsA(var, Var))
+				continue;
+
+			if (var_is_nullable(root, var, rel))
+				return true;
+			else
+
+				/*
+				 * If any one of member in the EC is not nullable, we all the
+				 * members are not nullable since they are equal with each
+				 * other.
+				 */
+				break;
+		}
+	}
+
+	return false;
+}
+
+
+/*
+ * relation_is_distinct_for
+ *
+ * Check if the rel is distinct for distinct_pathkey.
+ */
+bool
+relation_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *distinct_pathkey)
+{
+	ListCell   *lc;
+	UniqueKey  *singlerow_ukey = rel_singlerow_uniquekey(rel);
+	Bitmapset  *pathkey_bm = NULL;
+
+	if (singlerow_ukey)
+	{
+		return !uniquekey_contains_multinulls(root, rel, singlerow_ukey);
+	}
+
+	foreach(lc, distinct_pathkey)
+	{
+		PathKey    *pathkey = lfirst_node(PathKey, lc);
+		int			pos = list_member_ptr_pos(root->eq_classes, pathkey->pk_eclass);
+
+		if (pos == -1)
+			return false;
+
+		pathkey_bm = bms_add_member(pathkey_bm, pos);
+	}
+
+	foreach(lc, rel->uniquekeys)
+	{
+		UniqueKey  *ukey = lfirst_node(UniqueKey, lc);
+
+		if (bms_is_subset(ukey->eclass_indexes, pathkey_bm) &&
+			!uniquekey_contains_multinulls(root, rel, ukey))
+			return true;
+	}
+
+	return false;
+}
+
+/*
+ * unique_ecs_useful_for_distinct
+ *
+ *	Return true if all the EquivalenceClass for ecs exists in root->distinct_pathkey.
+ */
+static bool
+unique_ecs_useful_for_distinct(PlannerInfo *root, Bitmapset *ec_indexes)
+{
+	int			i = -1;
+
+	while ((i = bms_next_member(ec_indexes, i)) >= 0)
+	{
+		EquivalenceClass *ec = list_nth(root->eq_classes, i);
+		ListCell   *p;
+		bool		found = false;
+
+		foreach(p, root->distinct_pathkeys)
+		{
+			PathKey    *pathkey = lfirst_node(PathKey, p);
+
+			if (ec == pathkey->pk_eclass)
+			{
+				found = true;
+				break;
+			}
+		}
+		if (!found)
+			return false;
+	}
+	return true;
+}
+
+/*
+ * populate_joinrel_uniquekey_for_rel
+ *
+ *    Check if the pattern of rel.any_column = other_rel.unique_key_column
+ * exists, if so, the uniquekey in rel is still valid after join and it is
+ * added into joinrel and return true. otherwise return false.
+ */
+static bool
+populate_joinrel_uniquekey_for_rel(PlannerInfo *root, RelOptInfo *joinrel,
+								   RelOptInfo *rel, RelOptInfo *other_rel,
+								   List *restrictlist)
+{
+	bool		rel_keep_unique = false;
+	Bitmapset  *other_ecs = NULL;
+	Relids		other_relids = NULL;
+	ListCell   *lc;
+
+	if (rel_singlerow_uniquekey(other_rel))
+	{
+		/*
+		 * any uniquekeys stuff join with single-row, its uniqueness is still
+		 * kept.
+		 */
+		goto done;
+	}
+
+	/* find out the ECs which the rel.any_columns equals to. */
+	foreach(lc, restrictlist)
+	{
+		RestrictInfo *r = lfirst_node(RestrictInfo, lc);
+
+		if (r->mergeopfamilies == NIL)
+			continue;
+
+		/* Build the Bitmapset for easy comparing. */
+		if (bms_equal(r->left_relids, rel->relids) && r->right_ec != NULL)
+		{
+			other_ecs = bms_add_member(other_ecs, list_member_ptr_pos(root->eq_classes, r->right_ec));
+			other_relids = bms_add_members(other_relids, r->right_relids);
+		}
+		else if (bms_equal(r->right_relids, rel->relids) && r->left_ec != NULL)
+		{
+			other_ecs = bms_add_member(other_ecs, list_member_ptr_pos(root->eq_classes, r->left_ec));
+			other_relids = bms_add_members(other_relids, r->left_relids);
+		}
+	}
+
+	/* Check if these ECs include a uniquekey of other_rel */
+	foreach(lc, other_rel->uniquekeys)
+	{
+		UniqueKey  *ukey = lfirst_node(UniqueKey, lc);
+
+		if (uniquekey_contains_in(root, ukey, other_ecs, other_relids))
+		{
+			rel_keep_unique = true;
+			break;
+		}
+	}
+
+	if (!rel_keep_unique)
+		return false;
+
+done:
+
+	/*
+	 * Now copy the uniquekey in rel to joinrel, but first we need to know if
+	 * it is useful.
+	 */
+	foreach(lc, rel->uniquekeys)
+	{
+		UniqueKey  *ukey = lfirst_node(UniqueKey, lc);
+
+		if (is_uniquekey_useful_afterjoin(root, ukey, joinrel))
+		{
+			if (uniquekey_is_singlerow(ukey))
+			{
+				/*
+				 * XXX (?): a). NULL values. b). other relids rather than
+				 * ukey->relid.
+				 */
+				mark_rel_singlerow(joinrel, ukey->relid);
+				break;
+			}
+			joinrel->uniquekeys = lappend(joinrel->uniquekeys, ukey);
+		}
+	}
+
+	return true;
+}
+
+/*
+ * populate_joinrel_uniquekeys
+ */
+void
+populate_joinrel_uniquekeys(PlannerInfo *root, RelOptInfo *joinrel,
+							RelOptInfo *outerrel, RelOptInfo *innerrel,
+							List *restrictlist, JoinType jointype)
+{
+	bool		outeruk_still_valid = false,
+				inneruk_still_valid = false;
+	ListCell   *lc,
+			   *lc2;
+
+	if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
+	{
+		foreach(lc, outerrel->uniquekeys)
+		{
+			/*
+			 * the uniquekey on the outer side is not changed after semi/anti
+			 * join.
+			 */
+			joinrel->uniquekeys = lappend(joinrel->uniquekeys, lfirst(lc));
+		}
+		return;
+	}
+
+	if (outerrel->uniquekeys == NIL || innerrel->uniquekeys == NIL)
+		return;
+
+	outeruk_still_valid = populate_joinrel_uniquekey_for_rel(root, joinrel, outerrel,
+															 innerrel, restrictlist);
+	inneruk_still_valid = populate_joinrel_uniquekey_for_rel(root, joinrel, innerrel,
+															 outerrel, restrictlist);
+
+	if (outeruk_still_valid || inneruk_still_valid)
+
+		/*
+		 * the uniquekey on outers or inners have been added into joinrel so
+		 * the combined uniuqekey from both sides is not needed.
+		 */
+		return;
+
+	/*
+	 * The combined UniqueKey is still unique no matter the join method or
+	 * join clauses. So let build the combined ones.
+	 */
+	foreach(lc, outerrel->uniquekeys)
+	{
+		UniqueKey  *outer_ukey = lfirst(lc);
+
+		if (!is_uniquekey_useful_afterjoin(root, outer_ukey, joinrel))
+			/* discard the uniquekey which is not interesting. */
+			continue;
+
+		/* singlerow will make the inneruk_still_valid true */
+		Assert(!uniquekey_is_singlerow(outer_ukey));
+
+		foreach(lc2, innerrel->uniquekeys)
+		{
+			UniqueKey  *inner_ukey = lfirst(lc2);
+
+			if (!is_uniquekey_useful_afterjoin(root, inner_ukey, joinrel))
+				continue;
+
+			/* singlerow will make the outeruk_still_valid true */
+			Assert(!uniquekey_is_singlerow(inner_ukey));
+
+			joinrel->uniquekeys = lappend(joinrel->uniquekeys,
+										  make_uniquekey(
+														 bms_union(outer_ukey->eclass_indexes, inner_ukey->eclass_indexes),
+														 outer_ukey->use_for_distinct || inner_ukey->use_for_distinct));
+		}
+	}
+}
+
+/*
+ * uniquekey_contains_in
+ *	Return if UniqueKey contains in the list of EquivalenceClass
+ * or the UniqueKey's SingleRow contains in relids.
+ */
+static bool
+uniquekey_contains_in(PlannerInfo *root, UniqueKey * ukey, Bitmapset *ecs, Relids relids)
+{
+
+	if (uniquekey_is_singlerow(ukey))
+	{
+		return bms_is_member(ukey->relid, relids);
+	}
+
+	return bms_is_subset(ukey->eclass_indexes, ecs);
+}
+
+
+/*
+ * uniquekey_useful_for_merging
+ *	Check if the uniquekey is useful for mergejoins above the given relation.
+ *
+ * similar with pathkeys_useful_for_merging.
+ */
+static bool
+uniquekey_useful_for_merging(PlannerInfo *root, UniqueKey * ukey, RelOptInfo *rel)
+{
+
+	int			i = -1;
+
+	while ((i = bms_next_member(ukey->eclass_indexes, i)) >= 0)
+	{
+		EquivalenceClass *ec = list_nth(root->eq_classes, i);
+		ListCell   *j;
+		bool		matched = false;
+
+		if (rel->has_eclass_joins && eclass_useful_for_merging(root, ec, rel))
+		{
+			matched = true;
+		}
+		else
+		{
+			foreach(j, rel->joininfo)
+			{
+				RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j);
+
+				if (restrictinfo->mergeopfamilies == NIL)
+					continue;
+				update_mergeclause_eclasses(root, restrictinfo);
+
+				if (ec == restrictinfo->left_ec || ec == restrictinfo->right_ec)
+				{
+					matched = true;
+					break;
+				}
+			}
+		}
+
+		if (!matched)
+			return false;
+	}
+
+	return true;
+}
+
+/*
+ * is_uniquekey_useful_afterjoin
+ *
+ *  uniquekey is useful when it contains in distinct_pathkey or in mergable join clauses.
+ */
+static bool
+is_uniquekey_useful_afterjoin(PlannerInfo *root, UniqueKey * ukey, RelOptInfo *joinrel)
+{
+	if (ukey->use_for_distinct)
+		return true;
+
+	/* XXX might needs a better judgement */
+	if (uniquekey_is_singlerow(ukey))
+		return true;
+
+
+	return uniquekey_useful_for_merging(root, ukey, joinrel);
+}
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index b31d892121..1ed02d6eb2 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -2647,6 +2647,14 @@ distribute_restrictinfo_to_rels(PlannerInfo *root,
 			/* Add clause to rel's restriction list */
 			rel->baserestrictinfo = lappend(rel->baserestrictinfo,
 											restrictinfo);
+			{
+				List	   *not_null_vars = find_nonnullable_vars((Node *) restrictinfo->clause);
+
+				if (not_null_vars != NIL)
+					rel->notnullattrs = bms_union(rel->notnullattrs,
+												  list_nth(not_null_vars, rel->relid));
+			}
+
 			/* Update security level info */
 			rel->baserestrict_min_security = Min(rel->baserestrict_min_security,
 												 restrictinfo->security_level);
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index a8cea5efe1..3ac2deaa21 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -1663,9 +1663,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 												gset_data);
 			/* Fix things up if grouping_target contains SRFs */
 			if (parse->hasTargetSRFs)
+			{
 				adjust_paths_for_srfs(root, current_rel,
 									  grouping_targets,
 									  grouping_targets_contain_srfs);
+				current_rel->uniquekeys = NIL;
+			}
 		}
 
 		/*
@@ -1725,6 +1728,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 	 * Now we are prepared to build the final-output upperrel.
 	 */
 	final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
+	/* simple_copy_uniquekeys(final_rel, current_rel); */
 
 	/*
 	 * If the input rel is marked consider_parallel and there's nothing that's
@@ -4043,6 +4047,22 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
 									  gd,
 									  extra->targetList);
 
+	if (root->parse->groupingSets)
+	{
+		/* nothing to do */
+	}
+	else if (root->parse->groupClause && root->group_pathkeys != NIL)
+	{
+		/*
+		 * populate_uniquekeys_from_pathkeys(root, grouped_rel,
+		 * root->group_pathkeys);
+		 */
+	}
+	else
+	{
+		/* SingleRow Case */
+	}
+
 	/* Build final grouping paths */
 	add_paths_to_grouping_rel(root, input_rel, grouped_rel,
 							  partially_grouped_rel, agg_costs, gd,
@@ -4662,9 +4682,22 @@ create_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel)
 {
 	RelOptInfo *distinct_rel;
 
+	/*
+	 * distinct_pathkeys may be NIL if it distinctClause is not sortable. XXX:
+	 * What should we do for the else?
+	 */
+	if (root->distinct_pathkeys &&
+		relation_is_distinct_for(root, input_rel, root->distinct_pathkeys))
+		return input_rel;
+
 	/* For now, do all work in the (DISTINCT, NULL) upperrel */
 	distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL);
 
+	/*
+	 * populate_uniquekeys_from_pathkeys(root, distinct_rel,
+	 * root->distinct_pathkeys);
+	 */
+
 	/*
 	 * We don't compute anything at this level, so distinct_rel will be
 	 * parallel-safe if the input rel is parallel-safe.  In particular, if
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 7159c775fb..665d3e890c 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -121,6 +121,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 	Relation	relation;
 	bool		hasindex;
 	List	   *indexinfos = NIL;
+	Index		i;
 
 	/*
 	 * We need not lock the relation since it was already locked, either by
@@ -175,6 +176,15 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 	/* Retrieve the parallel_workers reloption, or -1 if not set. */
 	rel->rel_parallel_workers = RelationGetParallelWorkers(relation, -1);
 
+	for (i = 0; i < relation->rd_att->natts; i++)
+	{
+		FormData_pg_attribute attr = relation->rd_att->attrs[i];
+
+		if (attr.attnotnull)
+			rel->notnullattrs = bms_add_member(rel->notnullattrs,
+											   attr.attnum - FirstLowInvalidHeapAttributeNumber);
+	}
+
 	/*
 	 * Make list of indexes.  Ignore indexes on system catalogs if told to.
 	 * Don't bother with indexes from traditional inheritance parents.  For
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index 5d83f60eb9..2c9e1fe016 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -284,6 +284,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
 	rel->all_partrels = NULL;
 	rel->partexprs = NULL;
 	rel->nullable_partexprs = NULL;
+	rel->uniquekeys = NIL;
 
 	/*
 	 * Pass assorted information down the inheritance hierarchy.
@@ -745,6 +746,7 @@ build_join_rel(PlannerInfo *root,
 	joinrel->all_partrels = NULL;
 	joinrel->partexprs = NULL;
 	joinrel->nullable_partexprs = NULL;
+	joinrel->uniquekeys = NIL;
 
 	/* Compute information relevant to the foreign relations. */
 	set_foreign_rel_properties(joinrel, outer_rel, inner_rel);
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index ed85dc7414..301f7e89d7 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -876,6 +876,7 @@ typedef struct RelOptInfo
 	 * Vars/Exprs, cost, width
 	 */
 	struct PathTarget *reltarget;
+	List	   *uniquekeys;		/* A list of UniqueKey. */
 
 	/*
 	 * materialization information
@@ -913,6 +914,9 @@ typedef struct RelOptInfo
 	Relids	   *attr_needed pg_node_attr(read_write_ignore);
 	/* array indexed [min_attr .. max_attr] */
 	int32	   *attr_widths pg_node_attr(read_write_ignore);
+
+	/* The not null attrs from catalogs or baserestrictinfo. */
+	Bitmapset  *notnullattrs;
 	/* relids of outer joins that can null this baserel */
 	Relids		nulling_relids;
 	/* LATERAL Vars and PHVs referenced by rel */
@@ -1456,6 +1460,21 @@ typedef struct PathKey
 	bool		pk_nulls_first; /* do NULLs come before normal values? */
 } PathKey;
 
+
+typedef struct UniqueKey
+{
+	pg_node_attr(no_read, no_query_jumble)
+
+	NodeTag		type;
+	Bitmapset  *eclass_indexes; /* Indexes in PlannerInfo's eq_class list of
+								 * ECs that is unique for a RelOptInfo. */
+	int			relid;
+	bool		use_for_distinct;	/* true if it is used in distinct-pathkey,
+									 * in this case we would never check if we
+									 * should discard it during join search. */
+}			UniqueKey;
+
+
 /*
  * VolatileFunctionStatus -- allows nodes to cache their
  * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet
diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h
index 529a382d28..1be53fe7f7 100644
--- a/src/include/nodes/pg_list.h
+++ b/src/include/nodes/pg_list.h
@@ -581,6 +581,8 @@ extern bool list_member_int(const List *list, int datum);
 extern bool list_member_oid(const List *list, Oid datum);
 extern bool list_member_xid(const List *list, TransactionId datum);
 
+extern int	list_member_ptr_pos(const List *list, const void *datum);
+
 extern pg_nodiscard List *list_delete(List *list, void *datum);
 extern pg_nodiscard List *list_delete_ptr(List *list, void *datum);
 extern pg_nodiscard List *list_delete_int(List *list, int datum);
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 9e7408c7ec..14649a6bc4 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -139,11 +139,17 @@ extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root,
 extern EquivalenceMember *find_ec_member_matching_expr(EquivalenceClass *ec,
 													   Expr *expr,
 													   Relids relids);
+extern int	find_ec_position_matching_expr(PlannerInfo *root,
+										   Expr *expr,
+										   RelOptInfo *rel);
 extern EquivalenceMember *find_computable_ec_member(PlannerInfo *root,
 													EquivalenceClass *ec,
 													List *exprs,
 													Relids relids,
 													bool require_parallel_safe);
+extern Bitmapset *build_ec_positions_for_exprs(PlannerInfo *root,
+											   List *exprs,
+											   RelOptInfo *rel);
 extern bool relation_can_be_sorted_early(PlannerInfo *root, RelOptInfo *rel,
 										 EquivalenceClass *ec,
 										 bool require_parallel_safe);
@@ -262,4 +268,11 @@ extern PathKey *make_canonical_pathkey(PlannerInfo *root,
 extern void add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 									List *live_childrels);
 
+extern bool relation_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
+									 List *distinct_pathkey);
+extern void populate_baserel_uniquekeys(PlannerInfo *root,
+										RelOptInfo *baserel);
+extern void populate_joinrel_uniquekeys(PlannerInfo *root, RelOptInfo *joinrel,
+										RelOptInfo *outerrel, RelOptInfo *innerrel,
+										List *restrictlist, JoinType jointype);
 #endif							/* PATHS_H */
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 892ea5f170..0fb7faec12 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -5645,18 +5645,15 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
 explain (costs off)
 select d.* from d left join (select distinct * from b) s
   on d.a = s.id;
-              QUERY PLAN              
---------------------------------------
+             QUERY PLAN             
+------------------------------------
  Merge Right Join
    Merge Cond: (b.id = d.a)
-   ->  Unique
-         ->  Sort
-               Sort Key: b.id, b.c_id
-               ->  Seq Scan on b
+   ->  Index Scan using b_pkey on b
    ->  Sort
          Sort Key: d.a
          ->  Seq Scan on d
-(9 rows)
+(6 rows)
 
 -- join removal is not possible here
 explain (costs off)
diff --git a/src/test/regress/expected/uniquekey.out b/src/test/regress/expected/uniquekey.out
new file mode 100644
index 0000000000..aa712c2dd7
--- /dev/null
+++ b/src/test/regress/expected/uniquekey.out
@@ -0,0 +1,268 @@
+CREATE TABLE uk_t (id int primary key, a int not null, b int not null, c int, d int, e int);
+explain (costs off)
+select distinct id from uk_t;
+    QUERY PLAN    
+------------------
+ Seq Scan on uk_t
+(1 row)
+
+explain (costs off)
+select distinct e from uk_t where id = e;
+     QUERY PLAN     
+--------------------
+ Seq Scan on uk_t
+   Filter: (id = e)
+(2 rows)
+
+create unique index on uk_t (a, b);
+create unique index on uk_t (c, d);
+explain (costs off)
+select distinct a, b from uk_t;
+    QUERY PLAN    
+------------------
+ Seq Scan on uk_t
+(1 row)
+
+explain (costs off)
+select distinct c, d from uk_t;
+       QUERY PLAN       
+------------------------
+ HashAggregate
+   Group Key: c, d
+   ->  Seq Scan on uk_t
+(3 rows)
+
+explain (costs off)
+select distinct c, d from uk_t
+where c > 0 and d > 0;
+                QUERY PLAN                 
+-------------------------------------------
+ Bitmap Heap Scan on uk_t
+   Recheck Cond: ((c > 0) AND (d > 0))
+   ->  Bitmap Index Scan on uk_t_c_d_idx
+         Index Cond: ((c > 0) AND (d > 0))
+(4 rows)
+
+explain (costs off)
+select distinct d from uk_t
+where c > 1 and d > 0;
+                   QUERY PLAN                    
+-------------------------------------------------
+ HashAggregate
+   Group Key: d
+   ->  Bitmap Heap Scan on uk_t
+         Recheck Cond: ((c > 1) AND (d > 0))
+         ->  Bitmap Index Scan on uk_t_c_d_idx
+               Index Cond: ((c > 1) AND (d > 0))
+(6 rows)
+
+explain (costs off)
+select distinct d from uk_t
+where c = 1 and d > 0;
+                QUERY PLAN                 
+-------------------------------------------
+ Bitmap Heap Scan on uk_t
+   Recheck Cond: ((c = 1) AND (d > 0))
+   ->  Bitmap Index Scan on uk_t_c_d_idx
+         Index Cond: ((c = 1) AND (d > 0))
+(4 rows)
+
+-- test the join case --
+EXPLAIN (COSTS OFF)
+SELECT distinct t1.id FROM uk_t t1 JOIN uk_t t2 ON t1.e = t2.id;
+           QUERY PLAN            
+---------------------------------
+ Hash Join
+   Hash Cond: (t1.e = t2.id)
+   ->  Seq Scan on uk_t t1
+   ->  Hash
+         ->  Seq Scan on uk_t t2
+(5 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT distinct t1.id FROM uk_t t1 JOIN uk_t t2 ON t1.e = t2.a and t1.d = t2.b;
+                   QUERY PLAN                   
+------------------------------------------------
+ Hash Join
+   Hash Cond: ((t1.e = t2.a) AND (t1.d = t2.b))
+   ->  Seq Scan on uk_t t1
+   ->  Hash
+         ->  Seq Scan on uk_t t2
+(5 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT distinct t1.id FROM uk_t t1 LEFT JOIN uk_t t2 ON t1.e = t2.id;
+     QUERY PLAN      
+---------------------
+ Seq Scan on uk_t t1
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT distinct t1.id FROM uk_t t1 LEFT JOIN uk_t t2 ON t1.e = t2.a and t1.d = t2.b;
+     QUERY PLAN      
+---------------------
+ Seq Scan on uk_t t1
+(1 row)
+
+-- outer join makes null values so distinct can't be a no-op.
+EXPLAIN (COSTS OFF)
+SELECT distinct t1.id FROM uk_t t1 RIGHT JOIN uk_t t2 ON t1.e = t2.id;
+              QUERY PLAN               
+---------------------------------------
+ HashAggregate
+   Group Key: t1.id
+   ->  Hash Right Join
+         Hash Cond: (t1.e = t2.id)
+         ->  Seq Scan on uk_t t1
+         ->  Hash
+               ->  Seq Scan on uk_t t2
+(7 rows)
+
+-- single row case.
+-- single row is unqiue all the time.
+-- case 1:
+-- t1.d is single row at base rel level, and because of t1.e = t2.id, it is
+-- still single row after join.
+EXPLAIN (COSTS OFF)
+SELECT distinct t1.d FROM uk_t t1 JOIN uk_t t2 ON t1.e = t2.id and t1.id = 1;
+                    QUERY PLAN                    
+--------------------------------------------------
+ Nested Loop
+   ->  Index Scan using uk_t_pkey on uk_t t1
+         Index Cond: (id = 1)
+   ->  Index Only Scan using uk_t_pkey on uk_t t2
+         Index Cond: (id = t1.e)
+(5 rows)
+
+-- case 2
+-- any uniquekey which join with single row whose uniqueness can be kept.
+-- t1.d is single row at base rel level because of t1.id = 1
+-- so t2's uniquekey can be kept when join with t2.any_column.
+EXPLAIN (COSTS OFF)
+SELECT distinct t2.id FROM uk_t t1 JOIN uk_t t2 ON t2.e = t1.e and t1.id = 1;
+                    QUERY PLAN                     
+---------------------------------------------------
+ Hash Join
+   Hash Cond: (t2.e = t1.e)
+   ->  Seq Scan on uk_t t2
+   ->  Hash
+         ->  Index Scan using uk_t_pkey on uk_t t1
+               Index Cond: (id = 1)
+(6 rows)
+
+insert into uk_t SELECT 1, 2, 3, 4, 5, 6;
+-- combined uniquekey cases.
+-- the combinations of uniquekeys from the 2 sides is still unique
+-- no matter the join method and join clauses.
+EXPLAIN (COSTS OFF)
+SELECT distinct t2.id, t1.id FROM uk_t t1 JOIN uk_t t2 ON true;
+           QUERY PLAN            
+---------------------------------
+ Nested Loop
+   ->  Seq Scan on uk_t t1
+   ->  Materialize
+         ->  Seq Scan on uk_t t2
+(4 rows)
+
+SELECT distinct t2.id, t1.id FROM uk_t t1 JOIN uk_t t2 ON true;
+ id | id 
+----+----
+  1 |  1
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT distinct t2.id, t1.id FROM uk_t t1 JOIN uk_t t2 ON false;
+        QUERY PLAN        
+--------------------------
+ Result
+   One-Time Filter: false
+(2 rows)
+
+SELECT distinct t2.id, t1.id FROM uk_t t1 JOIN uk_t t2 ON false;
+ id | id 
+----+----
+(0 rows)
+
+-- BAD CASE: the distinct should be marked as noop in the below 4 cases, but
+-- since it is nullable by outer join, so it can't be removed. However the rule
+-- here should be if both uniquekey are not nullable, the combinations of them
+-- is not *mutli* nullable even by outer join. It is not clear to me how to
+-- implement it since I am just feeling not maintaining the not-null stuff
+-- during the joins is pretty amazing, otherwise there are too many stuff to
+-- do to cover this special case.
+EXPLAIN (COSTS OFF)
+SELECT distinct t2.id, t1.id FROM uk_t t1 LEFT JOIN uk_t t2 ON true;
+              QUERY PLAN               
+---------------------------------------
+ HashAggregate
+   Group Key: t2.id, t1.id
+   ->  Nested Loop Left Join
+         ->  Seq Scan on uk_t t1
+         ->  Materialize
+               ->  Seq Scan on uk_t t2
+(6 rows)
+
+SELECT distinct t2.id, t1.id FROM uk_t t1 LEFT JOIN uk_t t2 ON true;
+ id | id 
+----+----
+  1 |  1
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT distinct t2.id, t1.id FROM uk_t t1 LEFT JOIN uk_t t2 ON false;
+              QUERY PLAN              
+--------------------------------------
+ HashAggregate
+   Group Key: id, t1.id
+   ->  Nested Loop Left Join
+         Join Filter: false
+         ->  Seq Scan on uk_t t1
+         ->  Result
+               One-Time Filter: false
+(7 rows)
+
+SELECT distinct t2.id, t1.id FROM uk_t t1 LEFT JOIN uk_t t2 ON false;
+ id | id 
+----+----
+    |  1
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT distinct t2.id, t1.id FROM uk_t t1 FULL JOIN uk_t t2 ON true;
+              QUERY PLAN               
+---------------------------------------
+ HashAggregate
+   Group Key: t2.id, t1.id
+   ->  Merge Full Join
+         ->  Seq Scan on uk_t t1
+         ->  Materialize
+               ->  Seq Scan on uk_t t2
+(6 rows)
+
+SELECT distinct t2.id, t1.id FROM uk_t t1 FULL JOIN uk_t t2 ON true;
+ id | id 
+----+----
+  1 |  1
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT distinct t2.id, t1.id FROM uk_t t1 FULL JOIN uk_t t2 ON false;
+                 QUERY PLAN                  
+---------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: t2.id, t1.id
+         ->  Merge Full Join
+               Join Filter: false
+               ->  Seq Scan on uk_t t1
+               ->  Materialize
+                     ->  Seq Scan on uk_t t2
+(8 rows)
+
+SELECT distinct t2.id, t1.id FROM uk_t t1 FULL JOIN uk_t t2 ON false;
+ id | id 
+----+----
+  1 |   
+    |  1
+(2 rows)
+
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index f0987ff537..1362720e36 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -62,7 +62,7 @@ test: sanity_check
 # join depends on create_misc
 # ----------
 test: select_into select_distinct select_distinct_on select_implicit select_having subselect union case join aggregates transactions random portals arrays btree_index hash_index update delete namespace prepared_xacts
-
+test: uniquekey
 # ----------
 # Another group of parallel tests
 # ----------
diff --git a/src/test/regress/sql/uniquekey.sql b/src/test/regress/sql/uniquekey.sql
new file mode 100644
index 0000000000..4db0213a04
--- /dev/null
+++ b/src/test/regress/sql/uniquekey.sql
@@ -0,0 +1,104 @@
+CREATE TABLE uk_t (id int primary key, a int not null, b int not null, c int, d int, e int);
+
+explain (costs off)
+select distinct id from uk_t;
+
+explain (costs off)
+select distinct e from uk_t where id = e;
+
+create unique index on uk_t (a, b);
+create unique index on uk_t (c, d);
+
+explain (costs off)
+select distinct a, b from uk_t;
+
+explain (costs off)
+select distinct c, d from uk_t;
+
+explain (costs off)
+select distinct c, d from uk_t
+where c > 0 and d > 0;
+
+explain (costs off)
+select distinct d from uk_t
+where c > 1 and d > 0;
+
+explain (costs off)
+select distinct d from uk_t
+where c = 1 and d > 0;
+
+
+-- test the join case --
+
+EXPLAIN (COSTS OFF)
+SELECT distinct t1.id FROM uk_t t1 JOIN uk_t t2 ON t1.e = t2.id;
+
+EXPLAIN (COSTS OFF)
+SELECT distinct t1.id FROM uk_t t1 JOIN uk_t t2 ON t1.e = t2.a and t1.d = t2.b;
+
+
+EXPLAIN (COSTS OFF)
+SELECT distinct t1.id FROM uk_t t1 LEFT JOIN uk_t t2 ON t1.e = t2.id;
+
+EXPLAIN (COSTS OFF)
+SELECT distinct t1.id FROM uk_t t1 LEFT JOIN uk_t t2 ON t1.e = t2.a and t1.d = t2.b;
+
+-- outer join makes null values so distinct can't be a no-op.
+EXPLAIN (COSTS OFF)
+SELECT distinct t1.id FROM uk_t t1 RIGHT JOIN uk_t t2 ON t1.e = t2.id;
+
+-- single row case.
+
+-- single row is unqiue all the time.
+
+-- case 1:
+-- t1.d is single row at base rel level, and because of t1.e = t2.id, it is
+-- still single row after join.
+EXPLAIN (COSTS OFF)
+SELECT distinct t1.d FROM uk_t t1 JOIN uk_t t2 ON t1.e = t2.id and t1.id = 1;
+
+-- case 2
+-- any uniquekey which join with single row whose uniqueness can be kept.
+-- t1.d is single row at base rel level because of t1.id = 1
+-- so t2's uniquekey can be kept when join with t2.any_column.
+
+EXPLAIN (COSTS OFF)
+SELECT distinct t2.id FROM uk_t t1 JOIN uk_t t2 ON t2.e = t1.e and t1.id = 1;
+
+insert into uk_t SELECT 1, 2, 3, 4, 5, 6;
+
+-- combined uniquekey cases.
+-- the combinations of uniquekeys from the 2 sides is still unique
+-- no matter the join method and join clauses.
+EXPLAIN (COSTS OFF)
+SELECT distinct t2.id, t1.id FROM uk_t t1 JOIN uk_t t2 ON true;
+SELECT distinct t2.id, t1.id FROM uk_t t1 JOIN uk_t t2 ON true;
+
+EXPLAIN (COSTS OFF)
+SELECT distinct t2.id, t1.id FROM uk_t t1 JOIN uk_t t2 ON false;
+SELECT distinct t2.id, t1.id FROM uk_t t1 JOIN uk_t t2 ON false;
+
+
+-- BAD CASE: the distinct should be marked as noop in the below 4 cases, but
+-- since it is nullable by outer join, so it can't be removed. However the rule
+-- here should be if both uniquekey are not nullable, the combinations of them
+-- is not *mutli* nullable even by outer join. It is not clear to me how to
+-- implement it since I am just feeling not maintaining the not-null stuff
+-- during the joins is pretty amazing, otherwise there are too many stuff to
+-- do to cover this special case.
+EXPLAIN (COSTS OFF)
+SELECT distinct t2.id, t1.id FROM uk_t t1 LEFT JOIN uk_t t2 ON true;
+SELECT distinct t2.id, t1.id FROM uk_t t1 LEFT JOIN uk_t t2 ON true;
+
+EXPLAIN (COSTS OFF)
+SELECT distinct t2.id, t1.id FROM uk_t t1 LEFT JOIN uk_t t2 ON false;
+SELECT distinct t2.id, t1.id FROM uk_t t1 LEFT JOIN uk_t t2 ON false;
+
+
+EXPLAIN (COSTS OFF)
+SELECT distinct t2.id, t1.id FROM uk_t t1 FULL JOIN uk_t t2 ON true;
+SELECT distinct t2.id, t1.id FROM uk_t t1 FULL JOIN uk_t t2 ON true;
+
+EXPLAIN (COSTS OFF)
+SELECT distinct t2.id, t1.id FROM uk_t t1 FULL JOIN uk_t t2 ON false;
+SELECT distinct t2.id, t1.id FROM uk_t t1 FULL JOIN uk_t t2 ON false;
-- 
2.34.1

