diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 020558b..1723fd3 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -2505,6 +2505,7 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
 					SpecialJoinInfo *sjinfo,
 					SemiAntiJoinFactors *semifactors)
 {
+	JoinType	jointype = path->jpath.jointype;
 	Path	   *outer_path = path->jpath.outerjoinpath;
 	Path	   *inner_path = path->jpath.innerjoinpath;
 	double		outer_path_rows = outer_path->rows;
@@ -2560,6 +2561,9 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
 		foreach(hcl, hashclauses)
 		{
 			RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl);
+			bool		keep_nulls = (jointype == JOIN_RIGHT
+									  || jointype == JOIN_FULL
+									  || !op_strict(restrictinfo->hashjoinoperator));
 			Selectivity thisbucketsize;
 
 			Assert(IsA(restrictinfo, RestrictInfo));
@@ -2583,7 +2587,8 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
 					thisbucketsize =
 						estimate_hash_bucketsize(root,
 										   get_rightop(restrictinfo->clause),
-												 virtualbuckets);
+												 virtualbuckets,
+												 keep_nulls);
 					restrictinfo->right_bucketsize = thisbucketsize;
 				}
 			}
@@ -2599,7 +2604,8 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
 					thisbucketsize =
 						estimate_hash_bucketsize(root,
 											get_leftop(restrictinfo->clause),
-												 virtualbuckets);
+												 virtualbuckets,
+												 keep_nulls);
 					restrictinfo->left_bucketsize = thisbucketsize;
 				}
 			}
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 1ba103c..e8660bc 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3435,9 +3435,13 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows)
  * discourage use of a hash rather strongly if the inner relation is large,
  * which is what we want.  We do not want to hash unless we know that the
  * inner rel is well-dispersed (or the alternatives seem much worse).
+ *
+ * If keep_nulls is true, then we have to treat nulls as a real value, so
+ * adjust all results according to stanullfrac if available.
  */
 Selectivity
-estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
+estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets,
+						 bool keep_nulls)
 {
 	VariableStatData vardata;
 	double		estfract,
@@ -3454,13 +3458,6 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
 	/* Get number of distinct values */
 	ndistinct = get_variable_numdistinct(&vardata, &isdefault);
 
-	/* If ndistinct isn't real, punt and return 0.1, per comments above */
-	if (isdefault)
-	{
-		ReleaseVariableStats(vardata);
-		return (Selectivity) 0.1;
-	}
-
 	/* Get fraction that are null */
 	if (HeapTupleIsValid(vardata.statsTuple))
 	{
@@ -3472,6 +3469,18 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
 	else
 		stanullfrac = 0.0;
 
+	/*
+	 * If ndistinct isn't real, punt and return 0.1, or the null fraction if
+	 * relevant and worse, per comments above
+	 */
+	if (isdefault)
+	{
+		ReleaseVariableStats(vardata);
+		if (keep_nulls && stanullfrac > 0.1)
+			return (Selectivity) stanullfrac;
+		return (Selectivity) 0.1;
+	}
+
 	/* Compute avg freq of all distinct data values in raw relation */
 	avgfreq = (1.0 - stanullfrac) / ndistinct;
 
@@ -3486,6 +3495,10 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
 	if (vardata.rel)
 		ndistinct *= vardata.rel->rows / vardata.rel->tuples;
 
+	/* treat nulls as a distinct value, if there are any and they matter */
+	if (keep_nulls && stanullfrac > 0.0)
+		ndistinct += 1.0;
+
 	/*
 	 * Initial estimate of bucketsize fraction is 1/nbuckets as long as the
 	 * number of buckets is less than the expected number of distinct values;
@@ -3499,7 +3512,7 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
 	/*
 	 * Look up the frequency of the most common value, if available.
 	 */
-	mcvfreq = 0.0;
+	mcvfreq = keep_nulls ? stanullfrac : 0.0;
 
 	if (HeapTupleIsValid(vardata.statsTuple))
 	{
@@ -3511,9 +3524,10 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
 							 &numbers, &nnumbers))
 		{
 			/*
-			 * The first MCV stat is for the most common value.
+			 * The first MCV stat is for the most common value. Take it unless
+			 * it's less than a previously established null fraction.
 			 */
-			if (nnumbers > 0)
+			if (nnumbers > 0 && numbers[0] > mcvfreq)
 				mcvfreq = numbers[0];
 			free_attstatsslot(vardata.atttype, NULL, 0,
 							  numbers, nnumbers);
@@ -3522,9 +3536,17 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
 
 	/*
 	 * Adjust estimated bucketsize upward to account for skewed distribution.
+	 *
+	 * avgfreq might be 0.0 if the nullfrac is 100%, in which case if nulls are
+	 * significant then we want to clamp estfract to 1.0.
 	 */
-	if (avgfreq > 0.0 && mcvfreq > avgfreq)
-		estfract *= mcvfreq / avgfreq;
+	if (mcvfreq > avgfreq)
+	{
+		if (avgfreq > 0.0)
+			estfract *= mcvfreq / avgfreq;
+		else if (keep_nulls)
+			estfract = 1.0;
+	}
 
 	/*
 	 * Clamp bucketsize to sane range (the above adjustment could easily
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index bf69f2a..0fa4788 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -188,7 +188,7 @@ extern double estimate_num_groups(PlannerInfo *root, List *groupExprs,
 					double input_rows);
 
 extern Selectivity estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey,
-						 double nbuckets);
+											double nbuckets, bool keep_nulls);
 
 extern Datum brincostestimate(PG_FUNCTION_ARGS);
 extern Datum btcostestimate(PG_FUNCTION_ARGS);