diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 42dcb11..93a8750 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -21,6 +21,7 @@
 #include "access/sysattr.h"
 #include "catalog/pg_am.h"
 #include "catalog/pg_collation.h"
+#include "catalog/pg_constraint.h"
 #include "catalog/pg_operator.h"
 #include "catalog/pg_opfamily.h"
 #include "catalog/pg_type.h"
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index 129fc3d..d2e8e7a 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -27,9 +27,23 @@
 #include "optimizer/paths.h"
 #include "optimizer/planmain.h"
 #include "optimizer/var.h"
+#include "optimizer/clauses.h"
+#include "parser/parsetree.h"
+#include "optimizer/tlist.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/pg_list.h"
+#include "utils/lsyscache.h"
 
 /* local functions */
-static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
+static bool leftjoin_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
+static bool semijoin_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
+static bool sortclause_is_unique_on_restrictinfo(Query *query,
+					  List *clause_list, List *sortclause);
+static bool relation_has_foreign_key_for(PlannerInfo *root, RelOptInfo *rel,
+					  RelOptInfo *referencedrel, List *referencing_exprs,
+					  List *index_exprs, List *operator_list);
+static bool expressions_match_foreign_key(ForeignKeyInfo *fk, IndexOptInfo *ind,
+					  List *exprlist, List *index_exprs, List *operator_list);
 static void remove_rel_from_query(PlannerInfo *root, int relid,
 					  Relids joinrelids);
 static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
@@ -59,10 +73,20 @@ restart:
 		int			innerrelid;
 		int			nremoved;
 
-		/* Skip if not removable */
-		if (!join_is_removable(root, sjinfo))
-			continue;
-
+		if (sjinfo->jointype == JOIN_LEFT)
+		{
+			/* Skip if not removable */
+			if (!leftjoin_is_removable(root, sjinfo))
+				continue;
+		}
+		else if (sjinfo->jointype == JOIN_SEMI)
+		{
+			/* Skip if not removable */
+			if (!semijoin_is_removable(root, sjinfo))
+				continue;
+		}
+		else
+			continue; /* we don't support this join type */
 		/*
 		 * Currently, join_is_removable can only succeed when the sjinfo's
 		 * righthand is a single baserel.  Remove that rel from the query and
@@ -132,47 +156,75 @@ clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids,
 }
 
 /*
- * join_is_removable
- *	  Check whether we need not perform this special join at all, because
+ * leftjoin_is_removable
+ *	  Check whether we need not perform this left join at all, because
  *	  it will just duplicate its left input.
  *
  * This is true for a left join for which the join condition cannot match
- * more than one inner-side row.  (There are other possibly interesting
- * cases, but we don't have the infrastructure to prove them.)  We also
- * have to check that the inner side doesn't generate any variables needed
- * above the join.
+ * more than one inner-side row. To prove the join will be unique on the
+ * join condition we must analyze the unique indexes on the right side of
+ * the join to ensure that no more than 1 record can exist for the join
+ * condition.
+ *
+ * We can also remove sub queries if we can prove the query will not produce
+ * more than 1 record for the join condition, to do this we currently look at
+ * the GROUP BY and DISTINCT clause of the query.
  */
 static bool
-join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
+leftjoin_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
 {
 	int			innerrelid;
 	RelOptInfo *innerrel;
+	Query	   *subquery;
 	Relids		joinrelids;
 	List	   *clause_list = NIL;
 	ListCell   *l;
 	int			attroff;
+	List	   *fklist = NIL;
+
+	Assert(sjinfo->jointype == JOIN_LEFT);
 
 	/*
 	 * Currently, we only know how to remove left joins to a baserel with
-	 * unique indexes.  We can check most of these criteria pretty trivially
-	 * to avoid doing useless extra work.  But checking whether any of the
-	 * indexes are unique would require iterating over the indexlist, so for
-	 * now we just make sure there are indexes of some sort or other.  If none
-	 * of them are unique, join removal will still fail, just slightly later.
+	 * unique indexes and left joins to a subquery where the subquery is
+	 * unique on the join condition. We can check most of these criteria
+	 * pretty trivially to avoid doing useless extra work.  But checking
+	 * whether any of the indexes are unique would require iterating over
+	 * the indexlist, so for now, if we're joining to a relation, we'll just
+	 * ensure that we have at least 1 index, it won't matter if that index
+	 * is unique at this stage, we'll check those details later.
 	 */
-	if (sjinfo->jointype != JOIN_LEFT ||
-		sjinfo->delay_upper_joins ||
+	if (sjinfo->delay_upper_joins ||
 		bms_membership(sjinfo->min_righthand) != BMS_SINGLETON)
 		return false;
 
 	innerrelid = bms_singleton_member(sjinfo->min_righthand);
 	innerrel = find_base_rel(root, innerrelid);
 
-	if (innerrel->reloptkind != RELOPT_BASEREL ||
-		innerrel->rtekind != RTE_RELATION ||
-		innerrel->indexlist == NIL)
+	if (innerrel->reloptkind != RELOPT_BASEREL)
 		return false;
 
+	if (innerrel->rtekind == RTE_RELATION)
+	{
+		if (innerrel->indexlist == NIL)
+			return false; /* no possibility of a unique index */
+	}
+	else if (innerrel->rtekind == RTE_SUBQUERY)
+	{
+		subquery = root->simple_rte_array[innerrelid]->subquery;
+
+		/*
+		 * The only means we currently use to check if the subquery is unique
+		 * are the GROUP BY and DISTINCT clause. If both of these are empty
+		 * then there's no point in going any further.
+		 */
+		if (subquery->groupClause == NIL &&
+			subquery->distinctClause == NIL)
+			return false;
+	}
+	else
+		return false; /* unsupported rtekind */
+
 	/* Compute the relid set for the join we are considering */
 	joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
 
@@ -275,17 +327,437 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	 * clauses for the innerrel, so we needn't do that here.
 	 */
 
-	/* Now examine the indexes to see if we have a matching unique index */
-	if (relation_has_unique_index_for(root, innerrel, clause_list, NIL, NIL))
-		return true;
+	/*
+	 * Now examine the indexes to see if we have a matching unique index.*/
+	if (innerrel->rtekind == RTE_RELATION)
+		return relation_has_unique_index_for(root, innerrel, clause_list, NIL, NIL);
 
 	/*
+	 * We can be certain that the sub query contains no duplicate values for
+	 * the join clause if item in the sub query's GROUP BY clause is also used
+	 * in the join clause using equality. This works the same way for the
+	 * DISTINCT clause. We need not pay any attention to WHERE or HAVING
+	 * clauses as these just restrict the results more and could not be the
+	 * cause of duplication in the result set. However there are a number of
+	 * pre-checks we must perform which could cause duplicate values even if
+	 * all the required columns are in the GROUP BY or DISTINCT clause.
+	 *
+	 * NB: We must also not remove the join in the subquery contains a
+	 * FOR UDPATE clause, but we can actually skip this check as GROUP BY and
+	 * DISTINCT cannot be used at the same time as FOR UPDATE.
+	 */
+	if (innerrel->rtekind == RTE_SUBQUERY)
+	{
+		Assert(subquery == root->simple_rte_array[innerrelid]->subquery);
+
+		/*
+		 * We cannot remove the subquery if the target list contains any set
+		 * returning functions as these may cause the query not to be unique
+		 * on the grouping columns, as per the following example:
+		 * "SELECT a.a,generate_series(1,2) FROM (VALUES(1)) a(a) GROUP BY a"
+		 */
+		if (expression_returns_set((Node *) subquery->targetList))
+			return false;
+
+		/*
+		 * Don't remove the join if the target list contains any volatile
+		 * functions. Doing so may remove desired side affects that calls
+		 * to the function may cause.
+		 */
+		if (contain_volatile_functions((Node *) subquery->targetList))
+			return false;
+
+		/*
+		 * It should be safe to remove the join if all the GROUP BY expressions
+		 * have matching items in the join condition.
+		 */
+		if (subquery->groupClause != NIL &&
+			sortclause_is_unique_on_restrictinfo(subquery, clause_list, subquery->groupClause))
+			return true;
+
+		/*
+		 * It should be safe to remove the join if all the DISTINCT column list have matching
+		 * items in the join condition.
+		 */
+		if (subquery->distinctClause != NIL &&
+			sortclause_is_unique_on_restrictinfo(subquery, clause_list, subquery->distinctClause))
+			return true;
+	}
+	/* XXX is this comment still needed??
 	 * Some day it would be nice to check for other methods of establishing
 	 * distinctness.
 	 */
 	return false;
 }
 
+/*
+ * semijoin_is_removable
+ *	  Check if we can remove this semi join.
+ *
+ * To prove that a semi join is redundant we have to ensure that a foreign key
+ * exists on the left side of the join which references the table at the right
+ * side of the join. This means that we can only support a single table on
+ * either side of the join. We must also ensure that the join condition matches
+ * all the foreign key columns to each index column on the referenced table. If
+ * any columns are missing then we cannot be sure we'll get exactly 1 record back,
+ * and if there are any extra conditions that are not in the foreign key then we
+ * cannot be sure that the join condition will produce at least 1 matching row.
+ */
+static bool
+semijoin_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
+{
+	int			innerrelid;
+	int			outerrelid;
+	RelOptInfo *innerrel;
+	RelOptInfo *outerrel;
+	ListCell   *lc;
+	List	   *referencing_exprs;
+	List	   *index_exprs;
+	List	   *operator_list;
+
+	Assert(sjinfo->jointype == JOIN_SEMI);
+
+	if (sjinfo->delay_upper_joins ||
+		bms_membership(sjinfo->min_righthand) != BMS_SINGLETON)
+		return false;
+
+	innerrelid = bms_singleton_member(sjinfo->min_righthand);
+	innerrel = find_base_rel(root, innerrelid);
+
+	if (innerrel->reloptkind != RELOPT_BASEREL ||
+		innerrel->rtekind != RTE_RELATION ||
+		innerrel->indexlist == NIL ||
+		bms_membership(sjinfo->min_lefthand) != BMS_SINGLETON)
+		return false;
+
+	/*
+	 * To allow the removal of a SEMI JOIN we must analyze the foreign
+	 * keys of the relation on the left side of the join, for this to work
+	 * we'll need to ensure that there is only 1 relation on the left side
+	 * of the joins, otherwise there's no possibility of foreign keys.
+	 * If the relation on the left side has no foreign keys then there's
+	 * no possibility that the join can be removed.
+	 */
+
+	outerrelid = bms_singleton_member(sjinfo->min_lefthand);
+	outerrel = find_base_rel(root, outerrelid);
+
+	/* No possibility to remove the join if there's no foreign keys */
+	if (outerrel->fklist == NIL)
+		return false;
+
+	referencing_exprs = NIL;
+	index_exprs = NIL;
+	operator_list = NIL;
+
+	foreach(lc, sjinfo->join_quals)
+	{
+		OpExpr	   *op = (OpExpr *) lfirst(lc);
+		Oid			opno;
+		Node	   *left_expr;
+		Node	   *right_expr;
+		Relids		left_varnos;
+		Relids		right_varnos;
+		Relids		all_varnos;
+		Oid			opinputtype;
+
+		/* Is it a binary opclause? */
+		if (!IsA(op, OpExpr) ||
+			list_length(op->args) != 2)
+		{
+			/* No, but does it reference both sides? */
+			all_varnos = pull_varnos((Node *) op);
+			if (!bms_overlap(all_varnos, sjinfo->syn_righthand) ||
+				bms_is_subset(all_varnos, sjinfo->syn_righthand))
+			{
+				/*
+				 * Clause refers to only one rel, so ignore it --- unless it
+				 * contains volatile functions, in which case we'd better
+				 * punt.
+				 */
+				if (contain_volatile_functions((Node *) op))
+					return false;
+				continue;
+			}
+			/* Non-operator clause referencing both sides, must punt */
+			return false;
+		}
+
+		/* Extract data from binary opclause */
+		opno = op->opno;
+		left_expr = linitial(op->args);
+		right_expr = lsecond(op->args);
+		left_varnos = pull_varnos(left_expr);
+		right_varnos = pull_varnos(right_expr);
+		all_varnos = bms_union(left_varnos, right_varnos);
+		opinputtype = exprType(left_expr);
+
+		/* Does it reference both sides? */
+		if (!bms_overlap(all_varnos, sjinfo->syn_righthand) ||
+			bms_is_subset(all_varnos, sjinfo->syn_righthand))
+		{
+			/*
+			 * Clause refers to only one rel, so ignore it --- unless it
+			 * contains volatile functions, in which case we'd better punt.
+			 */
+			if (contain_volatile_functions((Node *) op))
+				return false;
+			continue;
+		}
+
+		/* check rel membership of arguments */
+		if (!bms_is_empty(right_varnos) &&
+			bms_is_subset(right_varnos, sjinfo->syn_righthand) &&
+			!bms_overlap(left_varnos, sjinfo->syn_righthand))
+		{
+			/* typical case, right_expr is RHS variable */
+		}
+		else if (!bms_is_empty(left_varnos) &&
+				 bms_is_subset(left_varnos, sjinfo->syn_righthand) &&
+				 !bms_overlap(right_varnos, sjinfo->syn_righthand))
+		{
+			Node *tmp;
+			/* flipped case, left_expr is RHS variable */
+			opno = get_commutator(opno);
+			if (!OidIsValid(opno))
+				return false;
+
+			/* swap the operands */
+			tmp = left_expr;
+			left_expr = right_expr;
+			right_expr = tmp;
+		}
+		else
+			return false;
+
+		/* so far so good, keep building lists */
+		referencing_exprs = lappend(referencing_exprs, copyObject(left_expr));
+		operator_list = lappend_oid(operator_list, opno);
+		index_exprs = lappend(index_exprs, copyObject(right_expr));
+	}
+
+	if (referencing_exprs == NIL)
+		return false;
+
+	/* The expressions mustn't be volatile. */
+	if (contain_volatile_functions((Node *) referencing_exprs))
+		return false;
+
+	if (contain_volatile_functions((Node *) index_exprs))
+		return false;
+
+	return relation_has_foreign_key_for(root, outerrel, innerrel,
+			referencing_exprs, index_exprs, operator_list);
+}
+
+/*
+ * relation_has_foreign_key_for
+ *	  Checks if rel has a foreign key which references referencedrel with the
+ *	  given list of expressions.
+ *
+ *	For the match to succeed:
+ *	  referencing_exprs must match the columns defined in the foreign key
+ *	  index_exprs must match the columns defined in the index for the foreign key.
+ */
+static bool
+relation_has_foreign_key_for(PlannerInfo *root, RelOptInfo *rel,
+			RelOptInfo *referencedrel, List *referencing_exprs,
+			List *index_exprs, List *operator_list)
+{
+	ListCell *lc;
+
+	Assert(list_length(referencing_exprs) == list_length(index_exprs));
+	Assert(list_length(referencing_exprs) == list_length(operator_list));
+
+	/*
+	 * Short-circuit if no foreign keys exist on the relation or
+	 * there are no indexes on the referenced relation. Remember that
+	 * it is possible for the fklist to not be empty and the indexlist
+	 * to be empty as the foreign keys may be for some completely other
+	 * relation.
+	 */
+	if (rel->fklist == NIL || referencedrel->indexlist == NIL)
+		return false;
+
+	/*
+	 * Here we must look at each foreign key which is defined and see if we
+	 * can find that foreign key's index in the index list of the referenced
+	 * table. When we find a match we do some quick pre-checks on the index
+	 * then we try to see if all of the expressions can be matched to that
+	 * foreign key and index. If we don't match then we'll keep trying to
+	 * find another matching foreign key and index list.
+	 */
+	foreach(lc, rel->fklist)
+	{
+		ForeignKeyInfo *fk = (ForeignKeyInfo *) lfirst(lc);
+		ListCell *ic;
+
+		/*
+		 * We need to ensure that if the number of columns in the key is above zero
+		 * that the foreign key is of type MATCH FULL. XXX is this overly strict??
+		 */
+		if (fk->conncols > 1 && fk->confmatchtype != FKCONSTR_MATCH_FULL)
+			continue;
+
+		foreach(ic, referencedrel->indexlist)
+		{
+			IndexOptInfo *ind = (IndexOptInfo *) lfirst(ic);
+			if (fk->conindid == ind->indexoid)
+			{
+				/* Sanity check? XXX Should we complain or just skip this one? */
+				if (fk->conncols != ind->ncolumns)
+					elog(ERROR, "Number of columns in foreign key does not match number of indexed columns");
+
+				/* Index not ready? XXX Perhaps this should be an error as we
+				 * should only have fks that have been validated.
+				 */
+				if (!ind->unique || !ind->immediate ||
+					(ind->indpred != NIL && !ind->predOK))
+					continue;
+
+				if (expressions_match_foreign_key(fk, ind, referencing_exprs, index_exprs, operator_list))
+					return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+static bool
+expressions_match_foreign_key(ForeignKeyInfo *fk, IndexOptInfo *ind,
+			List *exprlist, List *index_exprs, List *operator_list)
+{
+	ListCell *lc;
+	ListCell *lc2;
+	ListCell *lc3;
+	int col;
+
+	Assert(list_length(exprlist) == list_length(index_exprs));
+	Assert(list_length(exprlist) == list_length(operator_list));
+
+	/*
+	 * For each column defined in the foreign key we must ensure that we find
+	 * a qual in the expression list which matches the foreign key on one side
+	 * of the expression and the index on the other side of the expression. It
+	 * does not matter if the same expression appears many times, we just need
+	 * to ensure all exist at least one and no extra non matching expressions
+	 * exist.
+	 */
+
+	/*
+	 * Fast path out if there's not enough conditions to match
+	 * each column in the foreign key. Note that we cannot check
+	 * that the number of expressions is equal here since it would
+	 * cause duplicate expressions to not match.
+	 */
+	if (list_length(exprlist) < fk->conncols)
+		return false;
+
+	forthree(lc, exprlist, lc2, index_exprs, lc3, operator_list)
+	{
+		Node	*expr = (Node *) lfirst(lc);
+		Node	*idxexpr = (Node *) lfirst(lc2);
+		Oid		opr = lfirst_oid(lc3);
+		bool matched = false;
+
+		/* if anything is NULL or not a var then we can it's not a match */
+		if (!expr || !IsA(expr, Var) || !idxexpr || !IsA(idxexpr, Var))
+			return false;
+
+		for (col = 0; col < fk->conncols; col++)
+		{
+			if (fk->conkey[col] == ((Var *) expr)->varattno &&
+				fk->confkey[col] == ((Var *) idxexpr)->varattno &&
+				opr == fk->conpfeqop[col])
+			{
+				matched = true;
+				break;
+			}
+		}
+
+		/*
+		 * Did we find anything matching the fk col? If not then we'll
+		 * return a no match.
+		 */
+		if (!matched)
+			return false;
+	}
+
+	return true;
+}
+
+
+/*
+ * sortclause_is_unique_on_restrictinfo
+ *
+ * Checks to see if all items in sortclause also exist in clause_list.
+ * The function will return true if clause_list is the same as or a superset
+ * of the sortclause. If the sortclause has columns that don't exist in the
+ * clause_list then the query can't be guaranteed unique on the clause_list
+ * columns.
+ *
+ * Note: The calling function must ensure that sortclause is not NIL.
+ */
+static bool
+sortclause_is_unique_on_restrictinfo(Query *query, List *clause_list, List *sortclause)
+{
+	ListCell *l;
+
+	Assert(sortclause != NIL);
+
+	/*
+	 * Loop over each sort clause to ensure that we have
+	 * an item in the join conditions that matches it.
+	 * It does not matter if we have more items in the join
+	 * condition than we have in the sort clause.
+	 */
+	foreach(l, sortclause)
+	{
+		ListCell		*ri;
+		SortGroupClause *scl = (SortGroupClause *) lfirst(l);
+		TargetEntry		*sortTarget;
+		bool			 matched = false;
+
+		/* lookup the target list entry for the current sort sort group ref */
+		sortTarget = get_sortgroupref_tle(scl->tleSortGroupRef, query->targetList);
+
+		/*
+		 * Since a constant only has 1 value the existence of one here will
+		 * not cause any duplication of the results. We'll simply ignore it!
+		 */
+		if (IsA(sortTarget->expr, Const))
+			continue;
+
+		foreach(ri, clause_list)
+		{
+			RestrictInfo *rinfo = (RestrictInfo *) lfirst(ri);
+			Node	   *rexpr;
+
+			if (rinfo->outer_is_left)
+				rexpr = get_rightop(rinfo->clause);
+			else
+				rexpr = get_leftop(rinfo->clause);
+
+			if (IsA(rexpr, Var))
+			{
+				Var *var = (Var *)rexpr;
+
+				if (var->varattno == sortTarget->resno)
+				{
+					matched = true;
+					break; /* match found */
+				}
+			}
+			else /* XXX what else could it be? */
+				return false;
+		}
+
+		if (!matched)
+			return false;
+	}
+	return true;
+}
 
 /*
  * Remove the target relid from the planner's data structures, having
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index b2becfa..ac7b38b 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -26,6 +26,8 @@
 #include "access/xlog.h"
 #include "catalog/catalog.h"
 #include "catalog/heap.h"
+#include "catalog/pg_constraint.h"
+#include "catalog/pg_type.h"
 #include "foreign/fdwapi.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
@@ -38,6 +40,7 @@
 #include "parser/parsetree.h"
 #include "rewrite/rewriteManip.h"
 #include "storage/bufmgr.h"
+#include "utils/fmgroids.h"
 #include "utils/lsyscache.h"
 #include "utils/rel.h"
 #include "utils/snapmgr.h"
@@ -384,6 +387,123 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 
 	heap_close(relation, NoLock);
 
+
+	{
+		List	   *result = NIL;
+		Relation	fkeyRel;
+		Relation	fkeyRelIdx;
+		ScanKeyData fkeyScankey;
+		SysScanDesc fkeyScan;
+		HeapTuple	tuple;
+
+		/* ConstraintRelidIndexId
+		 * Must scan pg_constraint.  Right now, it is a seqscan because there is
+		 * no available index on conrelid.
+		 */
+		ScanKeyInit(&fkeyScankey,
+			Anum_pg_constraint_conrelid,
+			BTEqualStrategyNumber, F_OIDEQ,
+			ObjectIdGetDatum(relationObjectId));
+
+		fkeyRel = heap_open(ConstraintRelationId, AccessShareLock);
+		fkeyRelIdx = index_open(ConstraintRelidIndexId, AccessShareLock);
+		fkeyScan = systable_beginscan_ordered(fkeyRel, fkeyRelIdx, NULL, 1, &fkeyScankey);
+
+		while ((tuple = systable_getnext_ordered(fkeyScan, ForwardScanDirection)) != NULL)
+		{
+			Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(tuple);
+			ForeignKeyInfo *fkinfo;
+			Datum		adatum;
+			bool		isNull;
+			ArrayType  *arr;
+			int			numkeys;
+
+			/* Not a foreign key */
+			if (con->contype != CONSTRAINT_FOREIGN)
+				continue;
+
+			/* we're not interested unless the fk has been validated */
+			if (!con->convalidated)
+				continue;
+
+			fkinfo = (ForeignKeyInfo *) palloc(sizeof(ForeignKeyInfo));
+			fkinfo->conindid = con->conindid;
+			fkinfo->confrelid = con->confrelid;
+			fkinfo->convalidated = con->convalidated;
+			fkinfo->conrelid = con->conrelid;
+			fkinfo->confupdtype = con->confupdtype;
+			fkinfo->confdeltype = con->confdeltype;
+			fkinfo->confmatchtype = con->confmatchtype;
+
+			adatum = heap_getattr(tuple, Anum_pg_constraint_conkey,
+				RelationGetDescr(fkeyRel), &isNull);
+			if (isNull)
+				elog(ERROR, "null conkey for constraint %u",
+				HeapTupleGetOid(tuple));
+			arr = DatumGetArrayTypeP(adatum);		/* ensure not toasted */
+			numkeys = ARR_DIMS(arr)[0];
+			if (ARR_NDIM(arr) != 1 ||
+				numkeys < 0 ||
+				ARR_HASNULL(arr) ||
+				ARR_ELEMTYPE(arr) != INT2OID)
+				elog(ERROR, "conkey is not a 1-D smallint array");
+
+			fkinfo->conkey = (int16 *) ARR_DATA_PTR(arr);
+
+			fkinfo->conncols = numkeys;
+
+			adatum = heap_getattr(tuple, Anum_pg_constraint_confkey,
+				RelationGetDescr(fkeyRel), &isNull);
+			if (isNull)
+				elog(ERROR, "null confkey for constraint %u",
+				HeapTupleGetOid(tuple));
+			arr = DatumGetArrayTypeP(adatum);		/* ensure not toasted */
+			numkeys = ARR_DIMS(arr)[0];
+
+			/* sanity check */
+			if (numkeys != fkinfo->conncols)
+				elog(ERROR, "number of confkey elements does not equal conkey elements");
+
+			if (ARR_NDIM(arr) != 1 ||
+				numkeys < 0 ||
+				ARR_HASNULL(arr) ||
+				ARR_ELEMTYPE(arr) != INT2OID)
+				elog(ERROR, "confkey is not a 1-D smallint array");
+
+			fkinfo->confkey = (int16 *) ARR_DATA_PTR(arr);
+
+			adatum = heap_getattr(tuple, Anum_pg_constraint_conpfeqop,
+				RelationGetDescr(fkeyRel), &isNull);
+			if (isNull)
+				elog(ERROR, "null conpfeqop for constraint %u",
+				HeapTupleGetOid(tuple));
+			arr = DatumGetArrayTypeP(adatum);		/* ensure not toasted */
+			numkeys = ARR_DIMS(arr)[0];
+
+			/* sanity check */
+			if (numkeys != fkinfo->conncols)
+				elog(ERROR, "number of conpfeqop elements does not equal conkey elements");
+
+			if (ARR_NDIM(arr) != 1 ||
+				numkeys < 0 ||
+				ARR_HASNULL(arr) ||
+				ARR_ELEMTYPE(arr) != OIDOID)
+				elog(ERROR, "conpfeqop is not a 1-D smallint array");
+
+			fkinfo->conpfeqop = (Oid *) ARR_DATA_PTR(arr);
+
+			result = lappend(result, fkinfo);
+		}
+
+		rel->fklist = result;
+
+		systable_endscan_ordered(fkeyScan);
+		index_close(fkeyRelIdx, AccessShareLock);
+		heap_close(fkeyRel, AccessShareLock);
+	}
+
+
+
 	/*
 	 * Allow a plugin to editorialize on the info we obtained from the
 	 * catalogs.  Actions might include altering the assumed relation size,
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index c938c27..a0fb8eb 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -115,6 +115,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind)
 	rel->lateral_relids = NULL;
 	rel->lateral_referencers = NULL;
 	rel->indexlist = NIL;
+	rel->fklist = NIL;
 	rel->pages = 0;
 	rel->tuples = 0;
 	rel->allvisfrac = 0;
@@ -377,6 +378,7 @@ build_join_rel(PlannerInfo *root,
 	joinrel->lateral_relids = NULL;
 	joinrel->lateral_referencers = NULL;
 	joinrel->indexlist = NIL;
+	joinrel->fklist = NIL;
 	joinrel->pages = 0;
 	joinrel->tuples = 0;
 	joinrel->allvisfrac = 0;
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 300136e..3deb59b 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -445,6 +445,7 @@ typedef struct RelOptInfo
 	Relids		lateral_relids; /* minimum parameterization of rel */
 	Relids		lateral_referencers;	/* rels that reference me laterally */
 	List	   *indexlist;		/* list of IndexOptInfo */
+	List	   *fklist;			/* list of ForeignKeyInfo */
 	BlockNumber pages;			/* size estimates derived from pg_class */
 	double		tuples;
 	double		allvisfrac;
@@ -1643,4 +1644,20 @@ typedef struct JoinCostWorkspace
 	int			numbatches;
 } JoinCostWorkspace;
 
+typedef struct ForeignKeyInfo
+{
+	Oid			conindid;		/* index supporting this constraint */
+	Oid			confrelid;		/* relation referenced by foreign key */
+	bool		convalidated;	/* constraint has been validated? */
+	Oid			conrelid;		/* relation this constraint constrains */
+	char		confupdtype;	/* foreign key's ON UPDATE action */
+	char		confdeltype;	/* foreign key's ON DELETE action */
+	char		confmatchtype;	/* foreign key's match type */
+	int			conncols;		/* number of columns references */
+	int16	   *conkey;			/* Columns of conrelid that the constraint applies to */
+	int16	   *confkey;		/* columns of confrelid that foreign key references */
+	Oid		   *conpfeqop;		/* Operator list for comparing PK to FK */
+} ForeignKeyInfo;
+
+
 #endif   /* RELATION_H */
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 934488a..fd8e048 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -3060,9 +3060,11 @@ begin;
 CREATE TEMP TABLE a (id int PRIMARY KEY, b_id int);
 CREATE TEMP TABLE b (id int PRIMARY KEY, c_id int);
 CREATE TEMP TABLE c (id int PRIMARY KEY);
+CREATE TEMP TABLE d (a INT, b INT);
 INSERT INTO a VALUES (0, 0), (1, NULL);
 INSERT INTO b VALUES (0, 0), (1, NULL);
 INSERT INTO c VALUES (0), (1);
+INSERT INTO d VALUES (1,3),(2,2),(3,1);
 -- all three cases should be optimizable into a simple seqscan
 explain (costs off) SELECT a.* FROM a LEFT JOIN b ON a.b_id = b.id;
   QUERY PLAN   
@@ -3098,7 +3100,331 @@ select id from a where id in (
          ->  Seq Scan on b
 (5 rows)
 
+-- check optimization of outer join when joining a unique sub query using group by
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT b.id FROM b GROUP BY b.id) b ON a.id = b.id;
+  QUERY PLAN   
+---------------
+ Seq Scan on a
+(1 row)
+
+-- check optimization of outer join when joining a unique sub query using distinct
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT DISTINCT b.c_id FROM b) b ON a.id = b.c_id;
+  QUERY PLAN   
+---------------
+ Seq Scan on a
+(1 row)
+
+-- check optimization of outer join when joining a unique sub query using distinct
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT DISTINCT a+b AS ab FROM d) d ON a.id = d.ab;
+  QUERY PLAN   
+---------------
+ Seq Scan on a
+(1 row)
+
+-- optimization is not possible when distinct contains a volatile function
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT DISTINCT a+b+random() AS abr FROM d) d ON a.id = d.abr;
+                                        QUERY PLAN                                        
+------------------------------------------------------------------------------------------
+ Hash Left Join
+   Hash Cond: ((a.id)::double precision = ((((d.a + d.b))::double precision + random())))
+   ->  Seq Scan on a
+   ->  Hash
+         ->  HashAggregate
+               Group Key: (((d.a + d.b))::double precision + random())
+               ->  Seq Scan on d
+(7 rows)
+
+-- check optimization of outer join when joining a unique sub query using distinct
+-- and a constant expression.
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT DISTINCT b.c_id,1 AS dummy FROM b) b ON a.id = b.c_id;
+  QUERY PLAN   
+---------------
+ Seq Scan on a
+(1 row)
+
+-- check optimization of outer join when joining a unique sub query which contains 2 tables
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT b.id FROM b INNER JOIN c ON b.id = c.id GROUP BY b.id) b ON a.id = b.id;
+  QUERY PLAN   
+---------------
+ Seq Scan on a
+(1 row)
+
+-- check optimization of outer join when joining a unique sub query which contains 2 tables
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT b.id FROM b INNER JOIN c ON b.id = c.id GROUP BY b.id) b ON a.id + 10 = b.id;
+  QUERY PLAN   
+---------------
+ Seq Scan on a
+(1 row)
+
+-- check optimization of outer join when joining a unique sub query which contains 2 tables
+-- and contains a redundant join clause
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a
+LEFT JOIN (SELECT b.id,1 as dummy FROM b INNER JOIN c ON b.id = c.id GROUP BY b.id) b ON a.id = b.id AND b.dummy = 1;
+  QUERY PLAN   
+---------------
+ Seq Scan on a
+(1 row)
+
+-- optimization is not possible when the group by contains a column which is not in the join condition.
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT b.id FROM b GROUP BY b.id,b.c_id) b ON a.id = b.id;
+           QUERY PLAN            
+---------------------------------
+ Hash Right Join
+   Hash Cond: (b.id = a.id)
+   ->  HashAggregate
+         Group Key: b.id, b.c_id
+         ->  Seq Scan on b
+   ->  Hash
+         ->  Seq Scan on a
+(7 rows)
+
+-- optimization is not possible when distinct clause contains an item that is not in the join clause
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT DISTINCT b.id,c_id FROM b) b ON a.id = b.id;
+           QUERY PLAN            
+---------------------------------
+ Hash Right Join
+   Hash Cond: (b.id = a.id)
+   ->  HashAggregate
+         Group Key: b.id, b.c_id
+         ->  Seq Scan on b
+   ->  Hash
+         ->  Seq Scan on a
+(7 rows)
+
+-- optimization is not possible when distinct contains a volatile function
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT DISTINCT b.id,random() AS r FROM b) b ON a.id = b.id AND r = random();
+                   QUERY PLAN                    
+-------------------------------------------------
+ Hash Left Join
+   Hash Cond: (a.id = b.id)
+   ->  Seq Scan on a
+   ->  Hash
+         ->  Subquery Scan on b
+               Filter: (b.r = random())
+               ->  HashAggregate
+                     Group Key: b_1.id, random()
+                     ->  Seq Scan on b b_1
+(9 rows)
+
+-- optimization is not possible when there are any volatile functions in the target list.
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT id,AVG(c_id),SUM(random()) FROM b GROUP BY id) b ON a.id = b.id;
+         QUERY PLAN         
+----------------------------
+ Hash Right Join
+   Hash Cond: (b.id = a.id)
+   ->  HashAggregate
+         Group Key: b.id
+         ->  Seq Scan on b
+   ->  Hash
+         ->  Seq Scan on a
+(7 rows)
+
+-- optimization is not possible when there are set returning functions in the target list.
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT id,generate_series(1,2) FROM b GROUP BY id) b ON a.id = b.id;
+         QUERY PLAN         
+----------------------------
+ Hash Right Join
+   Hash Cond: (b.id = a.id)
+   ->  HashAggregate
+         Group Key: b.id
+         ->  Seq Scan on b
+   ->  Hash
+         ->  Seq Scan on a
+(7 rows)
+
 rollback;
+BEGIN;
+-- Test join removals for semi joins
+CREATE TEMP TABLE b (id INT NOT NULL PRIMARY KEY);
+CREATE TEMP TABLE a (id INT NOT NULL PRIMARY KEY, b_id INT NOT NULL REFERENCES b(id));
+-- should remove semi join to b
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE b_id IN(SELECT id FROM b);
+  QUERY PLAN   
+---------------
+ Seq Scan on a
+(1 row)
+
+-- should remove semi join to b
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id = id);
+  QUERY PLAN   
+---------------
+ Seq Scan on a
+(1 row)
+
+-- should remove semi join to b (swapped condition order)
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE id = a.b_id);
+  QUERY PLAN   
+---------------
+ Seq Scan on a
+(1 row)
+
+-- should not remove semi join (since not using equals)
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE id >= a.b_id);
+               QUERY PLAN                
+-----------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on a
+   ->  Index Only Scan using b_pkey on b
+         Index Cond: (id >= a.b_id)
+(4 rows)
+
+-- should not remove semi join
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE b_id+0 IN(SELECT id FROM b);
+             QUERY PLAN             
+------------------------------------
+ Hash Semi Join
+   Hash Cond: ((a.b_id + 0) = b.id)
+   ->  Seq Scan on a
+   ->  Hash
+         ->  Seq Scan on b
+(5 rows)
+
+-- should not remove semi join
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE b_id IN(SELECT id+0 FROM b);
+             QUERY PLAN             
+------------------------------------
+ Hash Semi Join
+   Hash Cond: (a.b_id = (b.id + 0))
+   ->  Seq Scan on a
+   ->  Hash
+         ->  Seq Scan on b
+(5 rows)
+
+-- should not remove semi join (wrong column)
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE id IN(SELECT id FROM b);
+         QUERY PLAN         
+----------------------------
+ Hash Join
+   Hash Cond: (b.id = a.id)
+   ->  Seq Scan on b
+   ->  Hash
+         ->  Seq Scan on a
+(5 rows)
+
+ROLLBACK;
+BEGIN;
+-- Semi join removal code with 2 column foreign keys
+CREATE TEMP TABLE b (id1 INT NOT NULL, id2 INT NOT NULL, PRIMARY KEY(id1,id2));
+CREATE TEMP TABLE a (id INT NOT NULL PRIMARY KEY, b_id1 INT NOT NULL, b_id2 INT NOT NULL);
+ALTER TABLE a ADD CONSTRAINT a_b_id1_b_id2_fkey FOREIGN KEY (b_id1,b_id2) REFERENCES b(id1,id2) MATCH FULL;
+-- should remove semi join to b
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2);
+  QUERY PLAN   
+---------------
+ Seq Scan on a
+(1 row)
+
+-- should not remove semi join to b (extra condition)
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2 AND a.b_id2 >= id2);
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Hash Semi Join
+   Hash Cond: ((a.b_id1 = b.id1) AND (a.b_id2 = b.id2))
+   Join Filter: (a.b_id2 >= b.id2)
+   ->  Seq Scan on a
+   ->  Hash
+         ->  Seq Scan on b
+(6 rows)
+
+-- should not remove semi join to b (wrong operator)
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 > id1 AND a.b_id2 < id2);
+                        QUERY PLAN                         
+-----------------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on a
+   ->  Index Only Scan using b_pkey on b
+         Index Cond: ((id1 < a.b_id1) AND (id2 > a.b_id2))
+(4 rows)
+
+-- should not remove semi join (only checking id1)
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1);
+           QUERY PLAN            
+---------------------------------
+ Hash Join
+   Hash Cond: (a.b_id1 = b.id1)
+   ->  Seq Scan on a
+   ->  Hash
+         ->  HashAggregate
+               Group Key: b.id1
+               ->  Seq Scan on b
+(7 rows)
+
+-- should not remove semi join (only checking id2)
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id2 = id2);
+           QUERY PLAN            
+---------------------------------
+ Hash Join
+   Hash Cond: (a.b_id2 = b.id2)
+   ->  Seq Scan on a
+   ->  Hash
+         ->  HashAggregate
+               Group Key: b.id2
+               ->  Seq Scan on b
+(7 rows)
+
+-- should not remove semi join (checking wrong columns)
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id2 = id1 AND a.b_id1 = id2);
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Hash Join
+   Hash Cond: ((a.b_id2 = b.id1) AND (a.b_id1 = b.id2))
+   ->  Seq Scan on a
+   ->  Hash
+         ->  Seq Scan on b
+(5 rows)
+
+-- should not remove semi join (no check for id2)
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id2 = id1 AND a.b_id1 = id1);
+               QUERY PLAN                
+-----------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on a
+         Filter: (b_id2 = b_id1)
+   ->  Index Only Scan using b_pkey on b
+         Index Cond: (id1 = a.b_id2)
+(5 rows)
+
+-- should not remove semi join (no check for b_id2)
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id1 = id2);
+            QUERY PLAN             
+-----------------------------------
+ Hash Join
+   Hash Cond: (a.b_id1 = b.id1)
+   ->  Seq Scan on a
+   ->  Hash
+         ->  Seq Scan on b
+               Filter: (id1 = id2)
+(6 rows)
+
+ROLLBACK;
 create temp table parent (k int primary key, pd int);
 create temp table child (k int unique, cd int);
 insert into parent values (1, 10), (2, 20), (3, 30);
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 275cb11..984a24f 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -861,9 +861,11 @@ begin;
 CREATE TEMP TABLE a (id int PRIMARY KEY, b_id int);
 CREATE TEMP TABLE b (id int PRIMARY KEY, c_id int);
 CREATE TEMP TABLE c (id int PRIMARY KEY);
+CREATE TEMP TABLE d (a INT, b INT);
 INSERT INTO a VALUES (0, 0), (1, NULL);
 INSERT INTO b VALUES (0, 0), (1, NULL);
 INSERT INTO c VALUES (0), (1);
+INSERT INTO d VALUES (1,3),(2,2),(3,1);
 
 -- all three cases should be optimizable into a simple seqscan
 explain (costs off) SELECT a.* FROM a LEFT JOIN b ON a.b_id = b.id;
@@ -878,8 +880,142 @@ select id from a where id in (
 	select b.id from b left join c on b.id = c.id
 );
 
+-- check optimization of outer join when joining a unique sub query using group by
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT b.id FROM b GROUP BY b.id) b ON a.id = b.id;
+
+-- check optimization of outer join when joining a unique sub query using distinct
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT DISTINCT b.c_id FROM b) b ON a.id = b.c_id;
+
+-- check optimization of outer join when joining a unique sub query using distinct
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT DISTINCT a+b AS ab FROM d) d ON a.id = d.ab;
+
+-- optimization is not possible when distinct contains a volatile function
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT DISTINCT a+b+random() AS abr FROM d) d ON a.id = d.abr;
+
+-- check optimization of outer join when joining a unique sub query using distinct
+-- and a constant expression.
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT DISTINCT b.c_id,1 AS dummy FROM b) b ON a.id = b.c_id;
+
+-- check optimization of outer join when joining a unique sub query which contains 2 tables
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT b.id FROM b INNER JOIN c ON b.id = c.id GROUP BY b.id) b ON a.id = b.id;
+
+-- check optimization of outer join when joining a unique sub query which contains 2 tables
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT b.id FROM b INNER JOIN c ON b.id = c.id GROUP BY b.id) b ON a.id + 10 = b.id;
+
+-- check optimization of outer join when joining a unique sub query which contains 2 tables
+-- and contains a redundant join clause
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a
+LEFT JOIN (SELECT b.id,1 as dummy FROM b INNER JOIN c ON b.id = c.id GROUP BY b.id) b ON a.id = b.id AND b.dummy = 1;
+
+-- optimization is not possible when the group by contains a column which is not in the join condition.
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT b.id FROM b GROUP BY b.id,b.c_id) b ON a.id = b.id;
+
+-- optimization is not possible when distinct clause contains an item that is not in the join clause
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT DISTINCT b.id,c_id FROM b) b ON a.id = b.id;
+
+-- optimization is not possible when distinct contains a volatile function
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT DISTINCT b.id,random() AS r FROM b) b ON a.id = b.id AND r = random();
+
+-- optimization is not possible when there are any volatile functions in the target list.
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT id,AVG(c_id),SUM(random()) FROM b GROUP BY id) b ON a.id = b.id;
+
+-- optimization is not possible when there are set returning functions in the target list.
+EXPLAIN (COSTS OFF)
+SELECT a.id FROM a LEFT JOIN (SELECT id,generate_series(1,2) FROM b GROUP BY id) b ON a.id = b.id;
+
 rollback;
 
+BEGIN;
+
+-- Test join removals for semi joins
+CREATE TEMP TABLE b (id INT NOT NULL PRIMARY KEY);
+CREATE TEMP TABLE a (id INT NOT NULL PRIMARY KEY, b_id INT NOT NULL REFERENCES b(id));
+
+-- should remove semi join to b
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE b_id IN(SELECT id FROM b);
+
+-- should remove semi join to b
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id = id);
+
+-- should remove semi join to b (swapped condition order)
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE id = a.b_id);
+
+-- should not remove semi join (since not using equals)
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE id >= a.b_id);
+
+-- should not remove semi join
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE b_id+0 IN(SELECT id FROM b);
+
+-- should not remove semi join
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE b_id IN(SELECT id+0 FROM b);
+
+-- should not remove semi join (wrong column)
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE id IN(SELECT id FROM b);
+
+ROLLBACK;
+
+BEGIN;
+
+-- Semi join removal code with 2 column foreign keys
+
+CREATE TEMP TABLE b (id1 INT NOT NULL, id2 INT NOT NULL, PRIMARY KEY(id1,id2));
+CREATE TEMP TABLE a (id INT NOT NULL PRIMARY KEY, b_id1 INT NOT NULL, b_id2 INT NOT NULL);
+
+ALTER TABLE a ADD CONSTRAINT a_b_id1_b_id2_fkey FOREIGN KEY (b_id1,b_id2) REFERENCES b(id1,id2) MATCH FULL;
+
+-- should remove semi join to b
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2);
+
+-- should not remove semi join to b (extra condition)
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id2 = id2 AND a.b_id2 >= id2);
+
+-- should not remove semi join to b (wrong operator)
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 > id1 AND a.b_id2 < id2);
+
+-- should not remove semi join (only checking id1)
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1);
+
+-- should not remove semi join (only checking id2)
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id2 = id2);
+
+-- should not remove semi join (checking wrong columns)
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id2 = id1 AND a.b_id1 = id2);
+
+-- should not remove semi join (no check for id2)
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id2 = id1 AND a.b_id1 = id1);
+
+-- should not remove semi join (no check for b_id2)
+EXPLAIN (COSTS OFF)
+SELECT id FROM a WHERE EXISTS(SELECT 1 FROM b WHERE a.b_id1 = id1 AND a.b_id1 = id2);
+
+ROLLBACK;
+
 create temp table parent (k int primary key, pd int);
 create temp table child (k int unique, cd int);
 insert into parent values (1, 10), (2, 20), (3, 30);