From 7574945acced9af579e32f5e216483a1a50e0218 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@otacoo.com>
Date: Thu, 17 Jul 2014 21:58:25 +0900
Subject: [PATCH 2/2] Support for column hints

If incorrect column names are written in a query, system tries to
evaluate if there are columns on existing RTEs that are close in
distance to the one mistaken, and returns to user hints according
to the evaluation done.
---
 src/backend/parser/parse_expr.c           |   9 +-
 src/backend/parser/parse_func.c           |   2 +-
 src/backend/parser/parse_relation.c       | 318 ++++++++++++++++++++++++++----
 src/include/parser/parse_relation.h       |   3 +-
 src/test/regress/expected/alter_table.out |   8 +
 src/test/regress/expected/join.out        |  39 ++++
 src/test/regress/expected/plpgsql.out     |   1 +
 src/test/regress/expected/rowtypes.out    |   1 +
 src/test/regress/expected/rules.out       |   1 +
 src/test/regress/expected/without_oid.out |   1 +
 src/test/regress/sql/join.sql             |  24 +++
 11 files changed, 366 insertions(+), 41 deletions(-)

diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 4a8aaf6..9866198 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -621,7 +621,8 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref)
 				colname = strVal(field2);
 
 				/* Try to identify as a column of the RTE */
-				node = scanRTEForColumn(pstate, rte, colname, cref->location);
+				node = scanRTEForColumn(pstate, rte, colname, cref->location,
+										NULL, NULL);
 				if (node == NULL)
 				{
 					/* Try it as a function call on the whole row */
@@ -666,7 +667,8 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref)
 				colname = strVal(field3);
 
 				/* Try to identify as a column of the RTE */
-				node = scanRTEForColumn(pstate, rte, colname, cref->location);
+				node = scanRTEForColumn(pstate, rte, colname, cref->location,
+										NULL, NULL);
 				if (node == NULL)
 				{
 					/* Try it as a function call on the whole row */
@@ -724,7 +726,8 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref)
 				colname = strVal(field4);
 
 				/* Try to identify as a column of the RTE */
-				node = scanRTEForColumn(pstate, rte, colname, cref->location);
+				node = scanRTEForColumn(pstate, rte, colname, cref->location,
+										NULL, NULL);
 				if (node == NULL)
 				{
 					/* Try it as a function call on the whole row */
diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c
index 9ebd3fd..e128adf 100644
--- a/src/backend/parser/parse_func.c
+++ b/src/backend/parser/parse_func.c
@@ -1779,7 +1779,7 @@ ParseComplexProjection(ParseState *pstate, char *funcname, Node *first_arg,
 									 ((Var *) first_arg)->varno,
 									 ((Var *) first_arg)->varlevelsup);
 		/* Return a Var if funcname matches a column, else NULL */
-		return scanRTEForColumn(pstate, rte, funcname, location);
+		return scanRTEForColumn(pstate, rte, funcname, location, NULL, NULL);
 	}
 
 	/*
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index 478584d..2838f89 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -15,6 +15,7 @@
 #include "postgres.h"
 
 #include <ctype.h>
+#include <limits.h>
 
 #include "access/htup_details.h"
 #include "access/sysattr.h"
@@ -28,6 +29,7 @@
 #include "parser/parse_relation.h"
 #include "parser/parse_type.h"
 #include "utils/builtins.h"
+#include "utils/levenshtein.h"
 #include "utils/lsyscache.h"
 #include "utils/rel.h"
 #include "utils/syscache.h"
@@ -520,6 +522,22 @@ GetCTEForRTE(ParseState *pstate, RangeTblEntry *rte, int rtelevelsup)
 }
 
 /*
+ * distanceName
+ *	  Return Levenshtein distance between an actual column name and possible
+ *	  partial match.
+ */
+static int
+distanceName(const char *actual, const char *match, int max)
+{
+	int len = strlen(actual),
+		match_len = strlen(match);
+
+	/* Charge half as much per deletion as per insertion or per substitution */
+	return levenshtein_less_equal_internal(actual, len, match, match_len,
+								   2, 1, 2, max);
+}
+
+/*
  * scanRTEForColumn
  *	  Search the column names of a single RTE for the given name.
  *	  If found, return an appropriate Var node, else return NULL.
@@ -527,10 +545,24 @@ GetCTEForRTE(ParseState *pstate, RangeTblEntry *rte, int rtelevelsup)
  *
  * Side effect: if we find a match, mark the RTE as requiring read access
  * for the column.
+ *
+ * For those callers that will settle for a fuzzy match (for the purposes of
+ * building diagnostic messages), we match the column attribute whose name has
+ * the lowest Levenshtein distance from colname, setting *closest and
+ * *distance.  Such callers should not rely on the return value (even when
+ * there is an exact match), nor should they expect the usual side effect
+ * (unless there is an exact match).  This hardly matters in practice, since an
+ * error is imminent.
+ *
+ * If there are two or more attributes in the range table entry tied for
+ * closest, accurately report the shortest distance found overall, while not
+ * setting a "closest" attribute on the assumption that only a per-entry single
+ * closest match is useful.  Note that we never consider system column names
+ * when performing fuzzy matching.
  */
 Node *
 scanRTEForColumn(ParseState *pstate, RangeTblEntry *rte, char *colname,
-				 int location)
+				 int location, AttrNumber *closest, int *distance)
 {
 	Node	   *result = NULL;
 	int			attnum = 0;
@@ -548,12 +580,16 @@ scanRTEForColumn(ParseState *pstate, RangeTblEntry *rte, char *colname,
 	 * Should this somehow go wrong and we try to access a dropped column,
 	 * we'll still catch it by virtue of the checks in
 	 * get_rte_attribute_type(), which is called by make_var().  That routine
-	 * has to do a cache lookup anyway, so the check there is cheap.
+	 * has to do a cache lookup anyway, so the check there is cheap.  Callers
+	 * interested in finding match with shortest distance need to defend
+	 * against this directly, though.
 	 */
 	foreach(c, rte->eref->colnames)
 	{
+		const char *attcolname = strVal(lfirst(c));
+
 		attnum++;
-		if (strcmp(strVal(lfirst(c)), colname) == 0)
+		if (strcmp(attcolname, colname) == 0)
 		{
 			if (result)
 				ereport(ERROR,
@@ -566,6 +602,39 @@ scanRTEForColumn(ParseState *pstate, RangeTblEntry *rte, char *colname,
 			markVarForSelectPriv(pstate, var, rte);
 			result = (Node *) var;
 		}
+
+		if (distance && *distance != 0)
+		{
+			if (result)
+			{
+				/* Exact match just found */
+				*distance = 0;
+			}
+			else
+			{
+				int lowestdistance = *distance;
+				int thisdistance = distanceName(attcolname, colname,
+												lowestdistance);
+
+				if (thisdistance >= lowestdistance)
+				{
+					/*
+					 * This match distance may equal a prior match within this
+					 * same range table.  When that happens, the prior match is
+					 * discarded as worthless, since a single best match is
+					 * required within a RTE.
+					 */
+					if (thisdistance == lowestdistance)
+						*closest = InvalidAttrNumber;
+
+					continue;
+				}
+
+				/* Store new lowest observed distance for RT */
+				*distance = thisdistance;
+			}
+			*closest = attnum;
+		}
 	}
 
 	/*
@@ -642,7 +711,8 @@ colNameToVar(ParseState *pstate, char *colname, bool localonly,
 				continue;
 
 			/* use orig_pstate here to get the right sublevels_up */
-			newresult = scanRTEForColumn(orig_pstate, rte, colname, location);
+			newresult = scanRTEForColumn(orig_pstate, rte, colname, location,
+										 NULL, NULL);
 
 			if (newresult)
 			{
@@ -668,8 +738,14 @@ colNameToVar(ParseState *pstate, char *colname, bool localonly,
 
 /*
  * searchRangeTableForCol
- *	  See if any RangeTblEntry could possibly provide the given column name.
- *	  If so, return a pointer to the RangeTblEntry; else return NULL.
+ *	  See if any RangeTblEntry could possibly provide the given column name (or
+ *	  find the best match available).  Returns a list of equally likely
+ *	  candidates, or NIL in the event of no plausible candidate.
+ *
+ * Column name may be matched fuzzily; we provide the closet columns if there
+ * was not an exact match.  Caller can depend on passed closest array to find
+ * right attribute within corresponding (first and second) returned list RTEs.
+ * If closest attributes are InvalidAttrNumber, that indicates an exact match.
  *
  * This is different from colNameToVar in that it considers every entry in
  * the ParseState's rangetable(s), not only those that are currently visible
@@ -678,26 +754,145 @@ colNameToVar(ParseState *pstate, char *colname, bool localonly,
  * matches, but only one will be returned).  This must be used ONLY as a
  * heuristic in giving suitable error messages.  See errorMissingColumn.
  */
-static RangeTblEntry *
-searchRangeTableForCol(ParseState *pstate, char *colname, int location)
+static List *
+searchRangeTableForCol(ParseState *pstate, const char *alias, char *colname,
+					   int location, AttrNumber closest[2])
 {
-	ParseState *orig_pstate = pstate;
+	ParseState	   *orig_pstate = pstate;
+	int				distance = INT_MAX;
+	List		   *matchedrte = NIL;
+	ListCell	   *l;
+	int				i;
 
 	while (pstate != NULL)
 	{
-		ListCell   *l;
-
 		foreach(l, pstate->p_rtable)
 		{
-			RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
+			RangeTblEntry  *rte = (RangeTblEntry *) lfirst(l);
+			AttrNumber		rteclosest = InvalidAttrNumber;
+			int				rtdistance = INT_MAX;
+			bool			wrongalias;
 
-			if (scanRTEForColumn(orig_pstate, rte, colname, location))
-				return rte;
+			/*
+			 * Get single best match from each RTE, or no match for RTE if
+			 * there is a tie for best match within a given RTE
+			 */
+			scanRTEForColumn(orig_pstate, rte, colname, location, &rteclosest,
+							 &rtdistance);
+
+			/* Was alias provided by user that does not match entry's alias? */
+			wrongalias = (alias && strcmp(alias, rte->eref->aliasname) != 0);
+
+			if (rtdistance == 0)
+			{
+				/* Exact match (for "wrong alias" or "wrong level" cases) */
+				closest[0] = wrongalias? rteclosest : InvalidAttrNumber;
+
+				/*
+				 * Any exact match is always the uncontested best match.  It
+				 * doesn't seem worth considering the case where there are
+				 * multiple exact matches, so we're done.
+				 */
+				matchedrte = lappend(NIL, rte);
+				return matchedrte;
+			}
+
+			/*
+			 * Charge extra (for inexact matches only) when an alias was
+			 * specified that differs from what might have been used to
+			 * correctly qualify this RTE's closest column
+			 */
+			if (wrongalias)
+				rtdistance += 3;
+
+			if (rteclosest != InvalidAttrNumber)
+			{
+				if (rtdistance >= distance)
+				{
+					/*
+					 * Perhaps record this attribute as being just as close in
+					 * distance to closest attribute observed so far across
+					 * entire range table.  Iff this distance is ultimately the
+					 * lowest distance observed overall, it may end up as the
+					 * second match.
+					 */
+					if (rtdistance == distance)
+					{
+						closest[1] = rteclosest;
+						matchedrte = lappend(matchedrte, rte);
+					}
+
+					continue;
+				}
+
+				/*
+				 * One best match (better than any others in previous RTEs) was
+				 * found within this RTE
+				 */
+				distance = rtdistance;
+				/* New uncontested best match */
+				matchedrte = lappend(NIL, rte);
+				closest[0] = rteclosest;
+			}
+			else
+			{
+				/*
+				 * Even though there were perhaps multiple joint-best matches
+				 * within this RTE (implying that there can be no attribute
+				 * suggestion from it), the shortest distance should still
+				 * serve as the distance for later RTEs to beat (but naturally
+				 * only if it happens to be the lowest so far across the entire
+				 * range table).
+				 */
+				distance = Min(distance, rtdistance);
+			}
 		}
 
 		pstate = pstate->parentParseState;
 	}
-	return NULL;
+
+	/*
+	 * Too many equally close partial matches found?
+	 *
+	 * It's useful to provide two matches for the common case where two range
+	 * tables each have one equally distant candidate column, as when an
+	 * unqualified (and therefore would-be ambiguous) column name is specified
+	 * which is also misspelled by the user.  It seems unhelpful to show no
+	 * hint when this occurs, since in practice one attribute probably
+	 * references the other in a foreign key relationship.  However, when there
+	 * are more than 2 range tables with equally distant matches that's
+	 * probably because the matches are not useful, so don't suggest anything.
+	 */
+	if (list_length(matchedrte) > 2)
+		return NIL;
+
+	/*
+	 * Handle dropped columns, which can appear here as empty colnames per
+	 * remarks within scanRTEForColumn().  If either the first or second
+	 * suggested attributes are dropped, do not provide any suggestion.
+	 */
+	i = 0;
+	foreach(l, matchedrte)
+	{
+		RangeTblEntry  *rte = (RangeTblEntry *) lfirst(l);
+		char		   *closestcol;
+
+		closestcol = strVal(list_nth(rte->eref->colnames, closest[i++] - 1));
+
+		if (strcmp(closestcol, "") == 0)
+			return NIL;
+	}
+
+	/*
+	 * Distance must be less than a normalized threshold in order to avoid
+	 * completely ludicrous suggestions.  Note that a distance of 6 will be
+	 * seen when 6 deletions are required against actual attribute name, or 3
+	 * insertions/substitutions.
+	 */
+	if (distance > 6 && distance > strlen(colname) * 2 / 2)
+		return NIL;
+
+	return matchedrte;
 }
 
 /*
@@ -2855,41 +3050,92 @@ errorMissingRTE(ParseState *pstate, RangeVar *relation)
 /*
  * Generate a suitable error about a missing column.
  *
- * Since this is a very common type of error, we work rather hard to
- * produce a helpful message.
+ * Since this is a very common type of error, we work rather hard to produce a
+ * helpful message, going so far as to guess user's intent when a missing
+ * column name is probably intended to reference one of two would-be ambiguous
+ * attributes (when no alias/qualification was provided).
  */
 void
 errorMissingColumn(ParseState *pstate,
 				   char *relname, char *colname, int location)
 {
-	RangeTblEntry *rte;
+	List		   *matchedrte;
+	AttrNumber	    closest[2];
+	RangeTblEntry  *rte1 = NULL,
+				   *rte2 = NULL;
+	char		   *closestcol1;
+	char		   *closestcol2;
 
 	/*
-	 * If relname was given, just play dumb and report it.  (In practice, a
-	 * bad qualification name should end up at errorMissingRTE, not here, so
-	 * no need to work hard on this case.)
+	 * closest[0] will remain InvalidAttrNumber in event of exact match, and in
+	 * the event of an exact match there is only ever one suggestion
 	 */
-	if (relname)
-		ereport(ERROR,
-				(errcode(ERRCODE_UNDEFINED_COLUMN),
-				 errmsg("column %s.%s does not exist", relname, colname),
-				 parser_errposition(pstate, location)));
+	closest[0] = closest[1] = InvalidAttrNumber;
 
 	/*
-	 * Otherwise, search the entire rtable looking for possible matches.  If
-	 * we find one, emit a hint about it.
+	 * Search the entire rtable looking for possible matches.  If we find one,
+	 * emit a hint about it.
 	 *
 	 * TODO: improve this code (and also errorMissingRTE) to mention using
 	 * LATERAL if appropriate.
 	 */
-	rte = searchRangeTableForCol(pstate, colname, location);
-
-	ereport(ERROR,
-			(errcode(ERRCODE_UNDEFINED_COLUMN),
-			 errmsg("column \"%s\" does not exist", colname),
-			 rte ? errhint("There is a column named \"%s\" in table \"%s\", but it cannot be referenced from this part of the query.",
-						   colname, rte->eref->aliasname) : 0,
-			 parser_errposition(pstate, location)));
+	matchedrte = searchRangeTableForCol(pstate, relname, colname, location,
+										closest);
+
+	/*
+	 * In practice a bad qualification name should end up at errorMissingRTE,
+	 * not here, so no need to work hard on this case.
+	 *
+	 * Extract RTEs for best match, if any, and joint best match, if any.
+	 */
+	if (matchedrte)
+	{
+		rte1 = (RangeTblEntry *) lfirst(list_head(matchedrte));
+
+		if (list_length(matchedrte) > 1)
+			rte2 = (RangeTblEntry *) lsecond(matchedrte);
+
+		if (rte1 && closest[0] != InvalidAttrNumber)
+			closestcol1 = strVal(list_nth(rte1->eref->colnames, closest[0] - 1));
+
+		if (rte2 && closest[1] != InvalidAttrNumber)
+			closestcol2 = strVal(list_nth(rte2->eref->colnames, closest[1] - 1));
+	}
+
+	if (!rte2)
+	{
+		/*
+		 * Handle case where there is zero or one column suggestions to hint,
+		 * including exact matches referenced but not visible.
+		 *
+		 * Infer an exact match referenced despite not being visible from the
+		 * fact that an attribute number was not passed back.
+		 */
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 relname?
+				 errmsg("column %s.%s does not exist", relname, colname):
+				 errmsg("column \"%s\" does not exist", colname),
+				 rte1? closest[0] != InvalidAttrNumber?
+				 errhint("Perhaps you meant to reference the column \"%s\".\"%s\".",
+						 rte1->eref->aliasname, closestcol1):
+				 errhint("There is a column named \"%s\" in table \"%s\", but it cannot be referenced from this part of the query.",
+						 colname, rte1->eref->aliasname): 0,
+				 parser_errposition(pstate, location)));
+	}
+	else
+	{
+		/* Handle case where there are two equally useful column hints */
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 relname?
+				 errmsg("column %s.%s does not exist", relname, colname):
+				 errmsg("column \"%s\" does not exist", colname),
+				 errhint("Perhaps you meant to reference the column \"%s\".\"%s\" or the column \"%s\".\"%s\".",
+						 rte1->eref->aliasname, closestcol1,
+						 rte2->eref->aliasname, closestcol2),
+				 parser_errposition(pstate, location)));
+	}
 }
 
 
diff --git a/src/include/parser/parse_relation.h b/src/include/parser/parse_relation.h
index d8b9493..c18157a 100644
--- a/src/include/parser/parse_relation.h
+++ b/src/include/parser/parse_relation.h
@@ -35,7 +35,8 @@ extern RangeTblEntry *GetRTEByRangeTablePosn(ParseState *pstate,
 extern CommonTableExpr *GetCTEForRTE(ParseState *pstate, RangeTblEntry *rte,
 			 int rtelevelsup);
 extern Node *scanRTEForColumn(ParseState *pstate, RangeTblEntry *rte,
-				 char *colname, int location);
+				 char *colname, int location, AttrNumber *matchedatt,
+				 int *distance);
 extern Node *colNameToVar(ParseState *pstate, char *colname, bool localonly,
 			 int location);
 extern void markVarForSelectPriv(ParseState *pstate, Var *var,
diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out
index 9b89e58..77829dc 100644
--- a/src/test/regress/expected/alter_table.out
+++ b/src/test/regress/expected/alter_table.out
@@ -536,6 +536,7 @@ create table atacc1 ( test int );
 -- add a check constraint (fails)
 alter table atacc1 add constraint atacc_test1 check (test1>3);
 ERROR:  column "test1" does not exist
+HINT:  Perhaps you meant to reference the column "atacc1"."test".
 drop table atacc1;
 -- something a little more complicated
 create table atacc1 ( test int, test2 int, test3 int);
@@ -1342,6 +1343,7 @@ select f1 from c1;
 ERROR:  column "f1" does not exist
 LINE 1: select f1 from c1;
                ^
+HINT:  Perhaps you meant to reference the column "c1"."f2".
 drop table p1 cascade;
 NOTICE:  drop cascades to table c1
 create table p1 (f1 int, f2 int);
@@ -1355,6 +1357,7 @@ select f1 from c1;
 ERROR:  column "f1" does not exist
 LINE 1: select f1 from c1;
                ^
+HINT:  Perhaps you meant to reference the column "c1"."f2".
 drop table p1 cascade;
 NOTICE:  drop cascades to table c1
 create table p1 (f1 int, f2 int);
@@ -1479,6 +1482,7 @@ select oid > 0, * from altstartwith; -- fails
 ERROR:  column "oid" does not exist
 LINE 1: select oid > 0, * from altstartwith;
                ^
+HINT:  Perhaps you meant to reference the column "altstartwith"."col".
 select * from altstartwith;
  col 
 -----
@@ -1515,10 +1519,12 @@ select oid > 0, * from altwithoid; -- fails
 ERROR:  column "oid" does not exist
 LINE 1: select oid > 0, * from altwithoid;
                ^
+HINT:  Perhaps you meant to reference the column "altwithoid"."col".
 select oid > 0, * from altinhoid; -- fails
 ERROR:  column "oid" does not exist
 LINE 1: select oid > 0, * from altinhoid;
                ^
+HINT:  Perhaps you meant to reference the column "altinhoid"."col".
 select * from altwithoid;
  col 
 -----
@@ -1554,6 +1560,7 @@ select oid > 0, * from altwithoid; -- fails
 ERROR:  column "oid" does not exist
 LINE 1: select oid > 0, * from altwithoid;
                ^
+HINT:  Perhaps you meant to reference the column "altwithoid"."col".
 select oid > 0, * from altinhoid;
  ?column? | col 
 ----------+-----
@@ -1580,6 +1587,7 @@ select oid > 0, * from altwithoid; -- fails
 ERROR:  column "oid" does not exist
 LINE 1: select oid > 0, * from altwithoid;
                ^
+HINT:  Perhaps you meant to reference the column "altwithoid"."col".
 select oid > 0, * from altinhoid;
  ?column? | col 
 ----------+-----
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 1cb1c51..f4edcbe 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -2222,6 +2222,12 @@ select * from t1 left join t2 on (t1.a = t2.a);
  200 | 1000 | 200 | 2001
 (5 rows)
 
+-- Test matching of column name with wrong alias
+select t1.x from t1 join t3 on (t1.a = t3.x);
+ERROR:  column t1.x does not exist
+LINE 1: select t1.x from t1 join t3 on (t1.a = t3.x);
+               ^
+HINT:  Perhaps you meant to reference the column "t3"."x".
 --
 -- regression test for 8.1 merge right join bug
 --
@@ -3388,6 +3394,39 @@ select * from
 (0 rows)
 
 --
+-- Test hints given on incorrect column references are useful
+--
+select t1.uunique1 from
+  tenk1 t1 join tenk2 t2 on t1.two = t2.two; -- error, prefer "t1" suggestipn
+ERROR:  column t1.uunique1 does not exist
+LINE 1: select t1.uunique1 from
+               ^
+HINT:  Perhaps you meant to reference the column "t1"."unique1".
+select t2.uunique1 from
+  tenk1 t1 join tenk2 t2 on t1.two = t2.two; -- error, prefer "t2" suggestion
+ERROR:  column t2.uunique1 does not exist
+LINE 1: select t2.uunique1 from
+               ^
+HINT:  Perhaps you meant to reference the column "t2"."unique1".
+select uunique1 from
+  tenk1 t1 join tenk2 t2 on t1.two = t2.two; -- error, suggest both at once
+ERROR:  column "uunique1" does not exist
+LINE 1: select uunique1 from
+               ^
+HINT:  Perhaps you meant to reference the column "t1"."unique1" or the column "t2"."unique1".
+--
+-- Take care to reference the correct RTE
+--
+select atts.relid::regclass, s.* from pg_stats s join
+    pg_attribute a on s.attname = a.attname and s.tablename =
+    a.attrelid::regclass::text join (select unnest(indkey) attnum,
+    indexrelid from pg_index i) atts on atts.attnum = a.attnum where
+    schemaname != 'pg_catalog';
+ERROR:  column atts.relid does not exist
+LINE 1: select atts.relid::regclass, s.* from pg_stats s join
+               ^
+HINT:  Perhaps you meant to reference the column "atts"."indexrelid".
+--
 -- Test LATERAL
 --
 select unique2, x.*
diff --git a/src/test/regress/expected/plpgsql.out b/src/test/regress/expected/plpgsql.out
index 8892bb4..2cb4aa1 100644
--- a/src/test/regress/expected/plpgsql.out
+++ b/src/test/regress/expected/plpgsql.out
@@ -4771,6 +4771,7 @@ END$$;
 ERROR:  column "foo" does not exist
 LINE 1: SELECT rtrim(roomno) AS roomno, foo FROM Room ORDER BY roomn...
                                         ^
+HINT:  Perhaps you meant to reference the column "room"."roomno".
 QUERY:  SELECT rtrim(roomno) AS roomno, foo FROM Room ORDER BY roomno
 CONTEXT:  PL/pgSQL function inline_code_block line 4 at FOR over SELECT rows
 -- Check handling of errors thrown from/into anonymous code blocks.
diff --git a/src/test/regress/expected/rowtypes.out b/src/test/regress/expected/rowtypes.out
index 88e7bfa..19a6e98 100644
--- a/src/test/regress/expected/rowtypes.out
+++ b/src/test/regress/expected/rowtypes.out
@@ -452,6 +452,7 @@ select fullname.text from fullname;  -- error
 ERROR:  column fullname.text does not exist
 LINE 1: select fullname.text from fullname;
                ^
+HINT:  Perhaps you meant to reference the column "fullname"."last".
 -- same, but RECORD instead of named composite type:
 select cast (row('Jim', 'Beam') as text);
     row     
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index ca56b47..48c75fd 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -2368,6 +2368,7 @@ select xmin, * from fooview;  -- fail, views don't have such a column
 ERROR:  column "xmin" does not exist
 LINE 1: select xmin, * from fooview;
                ^
+HINT:  Perhaps you meant to reference the column "fooview"."x".
 select reltoastrelid, relkind, relfrozenxid
   from pg_class where oid = 'fooview'::regclass;
  reltoastrelid | relkind | relfrozenxid 
diff --git a/src/test/regress/expected/without_oid.out b/src/test/regress/expected/without_oid.out
index cb2c0c0..fbff011 100644
--- a/src/test/regress/expected/without_oid.out
+++ b/src/test/regress/expected/without_oid.out
@@ -46,6 +46,7 @@ SELECT count(oid) FROM wo;
 ERROR:  column "oid" does not exist
 LINE 1: SELECT count(oid) FROM wo;
                      ^
+HINT:  Perhaps you meant to reference the column "wo"."i".
 VACUUM ANALYZE wi;
 VACUUM ANALYZE wo;
 SELECT min(relpages) < max(relpages), min(reltuples) - max(reltuples)
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index fa3e068..4d60f9e 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -397,6 +397,10 @@ insert into t2a values (200, 2001);
 
 select * from t1 left join t2 on (t1.a = t2.a);
 
+-- Test matching of column name with wrong alias
+
+select t1.x from t1 join t3 on (t1.a = t3.x);
+
 --
 -- regression test for 8.1 merge right join bug
 --
@@ -1047,6 +1051,26 @@ select * from
   int8_tbl x join (int4_tbl x cross join int4_tbl y(ff)) j on q1 = f1; -- ok
 
 --
+-- Test hints given on incorrect column references are useful
+--
+
+select t1.uunique1 from
+  tenk1 t1 join tenk2 t2 on t1.two = t2.two; -- error, prefer "t1" suggestipn
+select t2.uunique1 from
+  tenk1 t1 join tenk2 t2 on t1.two = t2.two; -- error, prefer "t2" suggestion
+select uunique1 from
+  tenk1 t1 join tenk2 t2 on t1.two = t2.two; -- error, suggest both at once
+
+--
+-- Take care to reference the correct RTE
+--
+
+select atts.relid::regclass, s.* from pg_stats s join
+    pg_attribute a on s.attname = a.attname and s.tablename =
+    a.attrelid::regclass::text join (select unnest(indkey) attnum,
+    indexrelid from pg_index i) atts on atts.attnum = a.attnum where
+    schemaname != 'pg_catalog';
+--
 -- Test LATERAL
 --
 
-- 
2.0.1

