[PATCH] Add hint for misspelled relations

Started by Steve Chavezabout 1 month ago4 messages

Steve Chavez

steve@supabase.io

about 1 month ago

2 attachment(s)

Hello hackers,

Currently misspelled columns offer a hint but not misspelled relations.

This patch enables that, the result is like:

-- having this table
create table clients (id int);
-- we misspell the table name
select * from cliants;
ERROR: relation "cliants" does not exist
LINE 1: select * from cliants;
HINT: Perhaps you meant to reference the table "public.clients".

The possible matches are searched in pg_class for the schemas present in
search_path (or if the relation is qualified, it only searches matches in
that schema). The logic reuses the `varstr_levenshtein_less_equal` function
similar to how the column matching is done.

If there's a tie in the fuzzy match, it's solved by preferring the schema
that appears first on the search path. If that fails, then the
lexicographic order is used to break the tie.

One problem is that scanning all pg_class entries can get expensive on big
catalogs, so the number of searches is capped by MAX_REL_HINT_CANDIDATES.
I've set this to 4096 arbitrarily, any guidance on what would be a good
number is appreciated. Personally I've seen a catalog that contains 125K
tables, with mostly auto generated names. For these cases I don't think the
hint helps that much anyway, so it seemed fine to bail here.

The changes are split into two commits, one refactoring some reusable
functions for easier review and another one implementing the relation hint.

Any feedback is welcomed.

Best regards,
Steve Chavez

[1]:

Attachments:

0001-refactor-isolate-relation-errors-to-a-function.patchtext/x-patch; charset=US-ASCII; name=0001-refactor-isolate-relation-errors-to-a-function.patchDownload

From 9a500e02535b09051816a55ebb6f75e4421da270 Mon Sep 17 00:00:00 2001
From: steve-chavez <stevechavezast@gmail.com>
Date: Mon, 1 Dec 2025 20:13:36 -0500
Subject: [PATCH 1/2] refactor: isolate relation errors to a function

Also add levenshtein_is_absurd explaining more about the formula used.
---
 src/backend/parser/parse_relation.c | 74 ++++++++++++++++-------------
 1 file changed, 40 insertions(+), 34 deletions(-)

diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index d544a69fc80..c1563f1b51b 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -101,6 +101,8 @@ static int	specialAttNum(const char *attname);
 static bool rte_visible_if_lateral(ParseState *pstate, RangeTblEntry *rte);
 static bool rte_visible_if_qualified(ParseState *pstate, RangeTblEntry *rte);
 
+static inline bool levenshtein_is_absurd(int distance, int matchlen);
+static void errorMissingRelation(ParseState *pstate, const RangeVar *relation);
 
 /*
  * refnameNamespaceItem
@@ -615,11 +617,7 @@ updateFuzzyAttrMatchState(int fuzzy_rte_penalty,
 									  - fuzzy_rte_penalty,
 									  true);
 
-	/*
-	 * If more than half the characters are different, don't treat it as a
-	 * match, to avoid making ridiculous suggestions.
-	 */
-	if (columndistance > matchlen / 2)
+	if (levenshtein_is_absurd(columndistance, matchlen))
 		return;
 
 	/*
@@ -1416,6 +1414,31 @@ buildNSItemFromLists(RangeTblEntry *rte, Index rtindex,
 	return nsitem;
 }
 
+void
+errorMissingRelation(ParseState *pstate, const RangeVar *relation)
+{
+	/*
+	 * An unqualified name might have been meant as a reference to
+	 * some not-yet-in-scope CTE.  The bare "does not exist" message
+	 * has proven remarkably unhelpful for figuring out such problems,
+	 * so we take pains to offer a specific hint.
+	 */
+	if (isFutureCTE(pstate, relation->relname))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_TABLE),
+				 errmsg("relation \"%s\" does not exist",
+						relation->relname),
+				 errdetail("There is a WITH item named \"%s\", but it cannot be referenced from this part of the query.",
+						   relation->relname),
+				 errhint("Use WITH RECURSIVE, or re-order the WITH items to remove forward references.")));
+	else
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_TABLE),
+				 relation->schemaname ?
+					errmsg("relation \"%s.%s\" does not exist", relation->schemaname, relation->relname) :
+					errmsg("relation \"%s\" does not exist", relation->relname)));
+}
+
 /*
  * Open a table during parse analysis
  *
@@ -1436,35 +1459,7 @@ parserOpenTable(ParseState *pstate, const RangeVar *relation, int lockmode)
 	setup_parser_errposition_callback(&pcbstate, pstate, relation->location);
 	rel = table_openrv_extended(relation, lockmode, true);
 	if (rel == NULL)
-	{
-		if (relation->schemaname)
-			ereport(ERROR,
-					(errcode(ERRCODE_UNDEFINED_TABLE),
-					 errmsg("relation \"%s.%s\" does not exist",
-							relation->schemaname, relation->relname)));
-		else
-		{
-			/*
-			 * An unqualified name might have been meant as a reference to
-			 * some not-yet-in-scope CTE.  The bare "does not exist" message
-			 * has proven remarkably unhelpful for figuring out such problems,
-			 * so we take pains to offer a specific hint.
-			 */
-			if (isFutureCTE(pstate, relation->relname))
-				ereport(ERROR,
-						(errcode(ERRCODE_UNDEFINED_TABLE),
-						 errmsg("relation \"%s\" does not exist",
-								relation->relname),
-						 errdetail("There is a WITH item named \"%s\", but it cannot be referenced from this part of the query.",
-								   relation->relname),
-						 errhint("Use WITH RECURSIVE, or re-order the WITH items to remove forward references.")));
-			else
-				ereport(ERROR,
-						(errcode(ERRCODE_UNDEFINED_TABLE),
-						 errmsg("relation \"%s\" does not exist",
-								relation->relname)));
-		}
-	}
+		errorMissingRelation(pstate, relation);
 	cancel_parser_errposition_callback(&pcbstate);
 	return rel;
 }
@@ -3971,3 +3966,14 @@ getRTEPermissionInfo(List *rteperminfos, RangeTblEntry *rte)
 
 	return perminfo;
 }
+
+
+/*
+ * Ignore absurd suggestions (e.g., mostly different characters).
+ * Having MAX_FUZZY_DISTANCE = 3, `abc` and `xyz` have a levenshtein distance of 3 which falls under MAX_FUZZY_DISTANCE, but yet they're mostly different.
+ */
+static inline bool
+levenshtein_is_absurd(int distance, int matchlen)
+{
+	return distance > matchlen / 2;
+}
-- 
2.42.0

0002-add-a-hint-for-a-missing-relation.patchtext/x-patch; charset=US-ASCII; name=0002-add-a-hint-for-a-missing-relation.patchDownload

From 4423352524c14456bb451222b0d86c3dd5afbb02 Mon Sep 17 00:00:00 2001
From: steve-chavez <stevechavezast@gmail.com>
Date: Mon, 1 Dec 2025 20:58:46 -0500
Subject: [PATCH 2/2] add a hint for a missing relation

---
 src/backend/parser/parse_relation.c          | 219 ++++++++++++++++++-
 src/test/regress/expected/prepared_xacts.out |   2 +
 src/test/regress/expected/psql.out           |   1 +
 src/test/regress/expected/temp.out           |   4 +
 src/test/regress/expected/transactions.out   |   2 +
 src/test/regress/expected/xml_1.out          |   1 +
 6 files changed, 226 insertions(+), 3 deletions(-)

diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index c1563f1b51b..734c132aaff 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -15,10 +15,13 @@
 #include "postgres.h"
 
 #include <ctype.h>
+#include <limits.h>
 
+#include "access/heapam.h"
 #include "access/htup_details.h"
 #include "access/relation.h"
 #include "access/table.h"
+#include "access/tableam.h"
 #include "catalog/heap.h"
 #include "catalog/namespace.h"
 #include "funcapi.h"
@@ -30,6 +33,7 @@
 #include "parser/parsetree.h"
 #include "storage/lmgr.h"
 #include "utils/builtins.h"
+#include "utils/hsearch.h"
 #include "utils/lsyscache.h"
 #include "utils/syscache.h"
 #include "utils/varlena.h"
@@ -71,6 +75,24 @@ typedef struct
 
 #define MAX_FUZZY_DISTANCE				3
 
+#define MAX_REL_HINT_CANDIDATES			4096
+
+/*
+ * Support for fuzzily matching relations for error messages.
+ *
+ * If the relation is schema qualified, relations in that schema are fuzzyly searched.
+ * If the relation is not schema qualified, all the schemas in search_path are fuzzily searched.
+ *
+ * The searches are capped at MAX_REL_HINT_CANDIDATES, to prevent doing too much work
+ * in big catalogs.
+ */
+typedef struct
+{
+	int			distance;		/* Current distance */
+	int			search_path_pos;		/* Position of the current schema in search path */
+	char		*nspname;		/* Schema of current match */
+	char		*relname;		/* Relation name of current match */
+} FuzzyRelationMatchState;
 
 static ParseNamespaceItem *scanNameSpaceForRefname(ParseState *pstate,
 												   const char *refname,
@@ -102,7 +124,11 @@ static bool rte_visible_if_lateral(ParseState *pstate, RangeTblEntry *rte);
 static bool rte_visible_if_qualified(ParseState *pstate, RangeTblEntry *rte);
 
 static inline bool levenshtein_is_absurd(int distance, int matchlen);
-static void errorMissingRelation(ParseState *pstate, const RangeVar *relation);
+static void errorMissingRelation(ParseState *pstate,
+											 const RangeVar *relation);
+static List *relhintCandidateSchemas(const RangeVar *relation);
+static bool bestFuzzyRelationMatch(const RangeVar *relation,
+										  FuzzyRelationMatchState *match);
 
 /*
  * refnameNamespaceItem
@@ -1414,6 +1440,10 @@ buildNSItemFromLists(RangeTblEntry *rte, Index rtindex,
 	return nsitem;
 }
 
+/*
+ * Generate a suitable error about a missing relation, possibly with hints
+ * about similarly named relations.
+ */
 void
 errorMissingRelation(ParseState *pstate, const RangeVar *relation)
 {
@@ -1432,11 +1462,31 @@ errorMissingRelation(ParseState *pstate, const RangeVar *relation)
 						   relation->relname),
 				 errhint("Use WITH RECURSIVE, or re-order the WITH items to remove forward references.")));
 	else
+	{
+		bool		found_match = false;
+		FuzzyRelationMatchState match = {
+			.distance = MAX_FUZZY_DISTANCE + 1,
+			.search_path_pos = INT_MAX,
+			.nspname = NULL,
+			.relname = NULL,
+		};
+
+		found_match = bestFuzzyRelationMatch(relation, &match);
+
 		ereport(ERROR,
 				(errcode(ERRCODE_UNDEFINED_TABLE),
 				 relation->schemaname ?
-					errmsg("relation \"%s.%s\" does not exist", relation->schemaname, relation->relname) :
-					errmsg("relation \"%s\" does not exist", relation->relname)));
+					errmsg("relation \"%s.%s\" does not exist",
+						   relation->schemaname, relation->relname) :
+					errmsg("relation \"%s\" does not exist",
+						   relation->relname),
+				 found_match ?
+					errhint("Perhaps you meant to reference the table \"%s\".",
+							quote_qualified_identifier(match.nspname,
+													   match.relname)) :
+					0,
+				 parser_errposition(pstate, relation->location)));
+	}
 }
 
 /*
@@ -1464,6 +1514,169 @@ parserOpenTable(ParseState *pstate, const RangeVar *relation, int lockmode)
 	return rel;
 }
 
+/*
+ * Search pg_class for a relation name that's similar to the target.
+ */
+static bool
+bestFuzzyRelationMatch(const RangeVar *target,
+							  FuzzyRelationMatchState *fuzzystate)
+{
+	List	   *schemas = relhintCandidateSchemas(target);
+	ListCell   *lc;
+	Relation classRel;
+	TableScanDesc scan;
+	HeapTuple tuple;
+	int i = 0;
+	int checked = 0;
+
+	/* Schema order in the search_path */
+	typedef struct {
+	  Oid			oid;
+	  int			order;
+	} SchemaOrderEntry;
+	HTAB	   *schema_order;
+
+	if (schemas == NIL)
+		return false;
+
+	schema_order = hash_create("relation hint search_path order",
+									list_length(schemas),
+									&(HASHCTL){
+										.keysize = sizeof(Oid),
+ 										.entrysize = sizeof(SchemaOrderEntry)
+ 									},
+									HASH_ELEM | HASH_BLOBS);
+
+	/*
+	 * Populate schema_order.  This map won't really be needed in cases where
+	 * the candidate list has only one element, but keep it anyway to reuse
+	 * code.
+	 */
+	foreach(lc, schemas)
+	{
+		Oid			nsp = lfirst_oid(lc);
+		SchemaOrderEntry *entry;
+		bool		found;
+
+		entry = hash_search(schema_order, &nsp, HASH_ENTER, &found);
+		if (!found)
+			entry->order = i;
+		i++;
+	}
+
+	classRel = table_open(RelationRelationId, AccessShareLock);
+	scan = table_beginscan_catalog(classRel, 0, NULL);
+
+	/* Iterate over pg_class entries */
+	while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+	{
+		Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
+		const char *candidate = NameStr(classForm->relname);
+		int			candidatelen = strlen(candidate);
+		int			order = -1;
+		int			reldistance;
+		SchemaOrderEntry *entry;
+		bool		found;
+
+		/* Only search for relation-like objects */
+		if (!(classForm->relkind == RELKIND_RELATION ||
+			  classForm->relkind == RELKIND_PARTITIONED_TABLE ||
+			  classForm->relkind == RELKIND_FOREIGN_TABLE ||
+			  classForm->relkind == RELKIND_VIEW ||
+			  classForm->relkind == RELKIND_MATVIEW))
+			continue;
+
+		entry = hash_search(schema_order, &classForm->relnamespace,
+							HASH_FIND, &found);
+		if (!found)
+			continue;
+
+		order = entry->order;
+
+		/* Keep the scan bounded in very large catalogs. */
+		if (++checked > MAX_REL_HINT_CANDIDATES)
+			break;
+
+		/*
+		 * Only obtain new distances lower or equal than the capped current distance.
+		 * Equal distances are relevant because the tie is solved below.
+		 */
+		reldistance = varstr_levenshtein_less_equal(target->relname, strlen(target->relname),
+												 candidate, candidatelen,
+												 1, 1, 1,
+												 Min(fuzzystate->distance + 1, MAX_FUZZY_DISTANCE + 1),
+												 true);
+
+		/* The above can return MAX_FUZZY_DISTANCE + 1 results, skip these */
+		if (reldistance > MAX_FUZZY_DISTANCE)
+			continue;
+
+		if (levenshtein_is_absurd(reldistance, candidatelen))
+			continue;
+
+		/*
+		 * If the new distance is less than the match found so far, update
+		 * fuzzystate.  If the distance is equal, prefer the match with the
+		 * lowest search path position.  If the search position is equal, use
+		 * the lexicographic order to solve the tie, this also ensures the
+		 * hint is stable across runs since the heap scan order is
+		 * nondeterministic.
+		 */
+		if (reldistance < fuzzystate->distance ||
+			(reldistance == fuzzystate->distance &&
+			 order < fuzzystate->search_path_pos) ||
+			(reldistance == fuzzystate->distance &&
+			 order == fuzzystate->search_path_pos &&
+			 strcmp(candidate, fuzzystate->relname) < 0))
+		{
+			if (fuzzystate->relname)
+				pfree(fuzzystate->relname);
+			if (fuzzystate->nspname)
+				pfree(fuzzystate->nspname);
+
+			fuzzystate->distance        = reldistance;
+			fuzzystate->search_path_pos = order;
+			fuzzystate->relname         = pstrdup(candidate);
+			fuzzystate->nspname         = get_namespace_name(classForm->relnamespace);
+		}
+	}
+
+	table_endscan(scan);
+	table_close(classRel, AccessShareLock);
+	list_free(schemas);
+	hash_destroy(schema_order);
+
+	/* Return true if there was a match */
+	return fuzzystate->relname != NULL;
+}
+
+/*
+ * Candidate schemas for the relation hint.
+ *
+ * If the relation was schema-qualified we'll only search that schema
+ * (considering pg_temp), otherwise consider search_path.
+ */
+static List *
+relhintCandidateSchemas(const RangeVar *relation)
+{
+	if (relation->schemaname)
+	{
+		Oid			namespaceId = LookupNamespaceNoError(relation->schemaname);
+
+		return OidIsValid(namespaceId) ? list_make1_oid(namespaceId) : NIL;
+	}
+	else if (relation->relpersistence == RELPERSISTENCE_TEMP)
+	{
+		Oid			tempNamespace;
+
+		GetTempNamespaceState(&tempNamespace, NULL);
+		return OidIsValid(tempNamespace) ? list_make1_oid(tempNamespace) : NIL;
+	}
+	else
+		return fetch_search_path(true);
+}
+
+
 /*
  * Add an entry for a relation to the pstate's range table (p_rtable).
  * Then, construct and return a ParseNamespaceItem for the new RTE.
diff --git a/src/test/regress/expected/prepared_xacts.out b/src/test/regress/expected/prepared_xacts.out
index 515a2ada9d1..47b1f6920fb 100644
--- a/src/test/regress/expected/prepared_xacts.out
+++ b/src/test/regress/expected/prepared_xacts.out
@@ -201,6 +201,7 @@ SELECT * FROM pxtest2;
 ERROR:  relation "pxtest2" does not exist
 LINE 1: SELECT * FROM pxtest2;
                       ^
+HINT:  Perhaps you meant to reference the table "public.pxtest3".
 -- There should be two prepared transactions
 SELECT gid FROM pg_prepared_xacts WHERE gid ~ '^regress_' ORDER BY gid;
      gid      
@@ -257,6 +258,7 @@ SELECT * FROM pxtest3;
 ERROR:  relation "pxtest3" does not exist
 LINE 1: SELECT * FROM pxtest3;
                       ^
+HINT:  Perhaps you meant to reference the table "public.pxtest2".
 -- There should be no prepared transactions
 SELECT gid FROM pg_prepared_xacts WHERE gid ~ '^regress_' ORDER BY gid;
  gid 
diff --git a/src/test/regress/expected/psql.out b/src/test/regress/expected/psql.out
index c8f3932edf0..6b8f133aa7d 100644
--- a/src/test/regress/expected/psql.out
+++ b/src/test/regress/expected/psql.out
@@ -5633,6 +5633,7 @@ SELECT * FROM ac_test;  -- should be gone now
 ERROR:  relation "ac_test" does not exist
 LINE 1: SELECT * FROM ac_test;
                       ^
+HINT:  Perhaps you meant to reference the table "public.aggtest".
 -- ON_ERROR_ROLLBACK
 \set ON_ERROR_ROLLBACK on
 CREATE TABLE oer_test (a int);
diff --git a/src/test/regress/expected/temp.out b/src/test/regress/expected/temp.out
index a50c7ae88a9..63e62028d56 100644
--- a/src/test/regress/expected/temp.out
+++ b/src/test/regress/expected/temp.out
@@ -47,6 +47,7 @@ SELECT * FROM temptest;
 ERROR:  relation "temptest" does not exist
 LINE 1: SELECT * FROM temptest;
                       ^
+HINT:  Perhaps you meant to reference the table "public.xmltest".
 -- Test ON COMMIT DELETE ROWS
 CREATE TEMP TABLE temptest(col int) ON COMMIT DELETE ROWS;
 -- while we're here, verify successful truncation of index with SQL function
@@ -100,6 +101,7 @@ SELECT * FROM temptest;
 ERROR:  relation "temptest" does not exist
 LINE 1: SELECT * FROM temptest;
                       ^
+HINT:  Perhaps you meant to reference the table "public.xmltest".
 BEGIN;
 CREATE TEMP TABLE temptest(col) ON COMMIT DROP AS SELECT 1;
 SELECT * FROM temptest;
@@ -113,6 +115,7 @@ SELECT * FROM temptest;
 ERROR:  relation "temptest" does not exist
 LINE 1: SELECT * FROM temptest;
                       ^
+HINT:  Perhaps you meant to reference the table "public.xmltest".
 -- Test it with a CHECK condition that produces a toasted pg_constraint entry
 BEGIN;
 do $$
@@ -133,6 +136,7 @@ SELECT * FROM temptest;
 ERROR:  relation "temptest" does not exist
 LINE 1: SELECT * FROM temptest;
                       ^
+HINT:  Perhaps you meant to reference the table "public.xmltest".
 -- ON COMMIT is only allowed for TEMP
 CREATE TABLE temptest(col int) ON COMMIT DELETE ROWS;
 ERROR:  ON COMMIT can only be used on temporary tables
diff --git a/src/test/regress/expected/transactions.out b/src/test/regress/expected/transactions.out
index 7f5757e89c4..2d89fb791c5 100644
--- a/src/test/regress/expected/transactions.out
+++ b/src/test/regress/expected/transactions.out
@@ -208,6 +208,7 @@ SELECT * FROM trans_bar;		-- shouldn't exist
 ERROR:  relation "trans_bar" does not exist
 LINE 1: SELECT * FROM trans_bar;
                       ^
+HINT:  Perhaps you meant to reference the table "public.trans_baz".
 SELECT * FROM trans_barbaz;	-- should be empty
  a 
 ---
@@ -226,6 +227,7 @@ BEGIN;
 ERROR:  relation "trans_bar" does not exist
 LINE 1: INSERT into trans_bar VALUES (1);
                     ^
+HINT:  Perhaps you meant to reference the table "public.trans_baz".
 	ROLLBACK TO one;
 	RELEASE SAVEPOINT one;
 	SAVEPOINT two;
diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out
index 73c411118a3..0514b8dd92b 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -1072,6 +1072,7 @@ SELECT * FROM xmltableview2;
 ERROR:  relation "xmltableview2" does not exist
 LINE 1: SELECT * FROM xmltableview2;
                       ^
+HINT:  Perhaps you meant to reference the table "public.xmltableview1".
 \sv xmltableview2
 ERROR:  relation "xmltableview2" does not exist
 SELECT * FROM XMLTABLE(XMLNAMESPACES(DEFAULT 'http://x.y'),
-- 
2.42.0

Kirill Reshke

reshkekirill@gmail.com

about 1 month ago

In reply to: Steve Chavez (#1)

Re: [PATCH] Add hint for misspelled relations

On Tue, 2 Dec 2025 at 07:46, Steve Chavez <steve@supabase.io> wrote:

Hello hackers,

Currently misspelled columns offer a hint but not misspelled relations.

This patch enables that, the result is like:

-- having this table
create table clients (id int);
-- we misspell the table name
select * from cliants;
ERROR: relation "cliants" does not exist
LINE 1: select * from cliants;
HINT: Perhaps you meant to reference the table "public.clients".

The possible matches are searched in pg_class for the schemas present in search_path (or if the relation is qualified, it only searches matches in that schema). The logic reuses the `varstr_levenshtein_less_equal` function similar to how the column matching is done.

If there's a tie in the fuzzy match, it's solved by preferring the schema that appears first on the search path. If that fails, then the lexicographic order is used to break the tie.

One problem is that scanning all pg_class entries can get expensive on big catalogs, so the number of searches is capped by MAX_REL_HINT_CANDIDATES. I've set this to 4096 arbitrarily, any guidance on what would be a good number is appreciated. Personally I've seen a catalog that contains 125K tables, with mostly auto generated names. For these cases I don't think the hint helps that much anyway, so it seemed fine to bail here.

The changes are split into two commits, one refactoring some reusable functions for easier review and another one implementing the relation hint.

Any feedback is welcomed.

Best regards,
Steve Chavez

[1]:

Hi! I did not reviewed this patch closely, but I stopped a this:

+ reldistance = varstr_levenshtein_less_equal(target->relname, strlen(target->relname),
+ candidate, candidatelen,
+ 1, 1, 1,
+ Min(fuzzystate->distance + 1, MAX_FUZZY_DISTANCE + 1),
+ true);
+
+ /* The above can return MAX_FUZZY_DISTANCE + 1 results, skip these */
+ if (reldistance > MAX_FUZZY_DISTANCE)
+ continue;

Why do we even do this? Can't we just pass fuzzystate->distance to
varstr_levenshtein_less_equal? It is initialized in outer func to
MAX_FUZZY_DISTANCE + 1

--
Best regards,
Kirill Reshke

Daniel Gustafsson

daniel@yesql.se

about 1 month ago

In reply to: Steve Chavez (#1)

Re: [PATCH] Add hint for misspelled relations

On 2 Dec 2025, at 03:46, Steve Chavez <steve@supabase.io> wrote:

Currently misspelled columns offer a hint but not misspelled relations.

The tab-completion in psql is one tool offered to avoid misspellings which
reduce the need.

One problem is that scanning all pg_class entries can get expensive on big catalogs, so the number of searches is capped by MAX_REL_HINT_CANDIDATES. I've set this to 4096 arbitrarily, any guidance on what would be a good number is appreciated. Personally I've seen a catalog that contains 125K tables, with mostly auto generated names. For these cases I don't think the hint helps that much anyway, so it seemed fine to bail here.

What makes hints for columns appealing is that it's a pretty contained problem
across data we've already accumulated, relations are quite different as they
require a catalog lookup making it a lot less appealing. The number of
relations can easily become quite large, especially when large partitioning
hierarchies are involved, and while capping prevents large scans there is no
guarantee that the MAX_REL_HINT_CANDIDATES set contains the most likely
entries. Worst case is that it never contains the a close enough match and
we'll spend a scan + hash + calculation of MAX_REL_HINT_CANDIDATES for each
typo.

Are there ways you can pare down the scan, perhaps a scankey on relispartition
and only consider base relations?

Hinting on relations can also give hints for relations the user does not have
permissions on which further reduce the useability.

--
Daniel Gustafsson

Steve Chavez

steve@supabase.io

about 1 month ago

In reply to: Daniel Gustafsson (#3)

1 attachment(s)

Re: [PATCH] Add hint for misspelled relations

Hi Kirill and Daniel, many thanks for your feedback.

Why do we even do this? Can't we just pass fuzzystate->distance to

varstr_levenshtein_less_equal? It is initialized in outer func to
MAX_FUZZY_DISTANCE + 1

This was an oversight. I've corrected it on the new attached patch.

Worst case is that it never contains the a close enough match and

we'll spend a scan + hash + calculation of MAX_REL_HINT_CANDIDATES for each
typo.

Are there ways you can pare down the scan, perhaps a scankey on

relispartition
and only consider base relations?

I've improved the code (new patch attached) and the hash is no
longer necessary.
Now the scan omits partitions as you suggested plus they're now restricted
to each schema on the search_path.

It was also possible to do more scans for each relevant relkind
(RELKIND_RELATION, RELKIND_VIEW, etc) but I wasn't sure if that was more
expensive than doing the filtering on the loop as it's currently done.
It also made the code a bit more complicated with more nested loops, so
I've left it as is.

Perhaps we should limit the amount of schema scans too in case the
search_path has too many schemas?

Hinting on relations can also give hints for relations the user does not

have
permissions on which further reduce the useability.

I've tested the current column hint and it also doesn't consider privileges:

```
create role new nosuperuser;
grant usage on schema public to new;
create table subitems (id int, name text);
grant select (id) on subitems to new;

begin;
set local role to new;
select nam from subitems;
2025-12-09 18:16:38.017 -05 [542091] ERROR: column "nam" does not exist at
character 8
2025-12-09 18:16:38.017 -05 [542091] HINT: Perhaps you meant to reference
the column "subitems.name".
2025-12-09 18:16:38.017 -05 [542091] STATEMENT: select nam from subitems ;
ERROR: column "nam" does not exist
LINE 1: select nam from subitems ;
HINT: Perhaps you meant to reference the column "subitems.name".
```

So perhaps that can be a separate enhancement for both hints?

The tab-completion in psql is one tool offered to avoid misspellings which

reduce the need.

True, but it's still useful to offer a hint for other clients.

What do you think?

Best regards,
Steve Chavez

On Tue, 2 Dec 2025 at 04:27, Daniel Gustafsson <daniel@yesql.se> wrote:

Show quoted text

On 2 Dec 2025, at 03:46, Steve Chavez <steve@supabase.io> wrote:

Currently misspelled columns offer a hint but not misspelled relations.

The tab-completion in psql is one tool offered to avoid misspellings which
reduce the need.

One problem is that scanning all pg_class entries can get expensive on

big catalogs, so the number of searches is capped by
MAX_REL_HINT_CANDIDATES. I've set this to 4096 arbitrarily, any guidance on
what would be a good number is appreciated. Personally I've seen a catalog
that contains 125K tables, with mostly auto generated names. For these
cases I don't think the hint helps that much anyway, so it seemed fine to
bail here.

What makes hints for columns appealing is that it's a pretty contained
problem
across data we've already accumulated, relations are quite different as
they
require a catalog lookup making it a lot less appealing. The number of
relations can easily become quite large, especially when large partitioning
hierarchies are involved, and while capping prevents large scans there is
no
guarantee that the MAX_REL_HINT_CANDIDATES set contains the most likely
entries. Worst case is that it never contains the a close enough match and
we'll spend a scan + hash + calculation of MAX_REL_HINT_CANDIDATES for each
typo.

Are there ways you can pare down the scan, perhaps a scankey on
relispartition
and only consider base relations?

Hinting on relations can also give hints for relations the user does not
have
permissions on which further reduce the useability.

--
Daniel Gustafsson

Attachments:

v2-0001-add-a-hint-for-a-missing-relation.patchtext/x-patch; charset=US-ASCII; name=v2-0001-add-a-hint-for-a-missing-relation.patchDownload

From e9c6abc3793d676d65dacdaecd754d2245b83366 Mon Sep 17 00:00:00 2001
From: steve-chavez <stevechavezast@gmail.com>
Date: Mon, 1 Dec 2025 20:58:46 -0500
Subject: [PATCH v2] add a hint for a missing relation

* remove unneeded MAX_FUZZY_DISTANCE guard
* only scan base relations and not partitions
* constraint scanning to only relevant relnamespaces
---
 src/backend/parser/parse_relation.c          | 196 ++++++++++++++++++-
 src/test/regress/expected/prepared_xacts.out |   2 +
 src/test/regress/expected/psql.out           |   1 +
 src/test/regress/expected/temp.out           |   4 +
 src/test/regress/expected/transactions.out   |   2 +
 src/test/regress/expected/xml_1.out          |   1 +
 6 files changed, 203 insertions(+), 3 deletions(-)

diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index c1563f1b51b..3cb319ae444 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -15,11 +15,15 @@
 #include "postgres.h"
 
 #include <ctype.h>
+#include <limits.h>
 
+#include "access/heapam.h"
 #include "access/htup_details.h"
 #include "access/relation.h"
 #include "access/table.h"
+#include "access/tableam.h"
 #include "catalog/heap.h"
+#include "catalog/pg_class.h"
 #include "catalog/namespace.h"
 #include "funcapi.h"
 #include "nodes/makefuncs.h"
@@ -30,6 +34,8 @@
 #include "parser/parsetree.h"
 #include "storage/lmgr.h"
 #include "utils/builtins.h"
+#include "utils/hsearch.h"
+#include "utils/fmgroids.h"
 #include "utils/lsyscache.h"
 #include "utils/syscache.h"
 #include "utils/varlena.h"
@@ -71,6 +77,24 @@ typedef struct
 
 #define MAX_FUZZY_DISTANCE				3
 
+#define MAX_REL_HINT_CANDIDATES			4096
+
+/*
+ * Support for fuzzily matching relations for error messages.
+ *
+ * If the relation is schema qualified, relations in that schema are fuzzyly searched.
+ * If the relation is not schema qualified, all the schemas in search_path are fuzzily searched.
+ *
+ * The searches are capped at MAX_REL_HINT_CANDIDATES, to prevent doing too much work
+ * in big catalogs.
+ */
+typedef struct
+{
+	int			distance;		/* Current distance */
+	int			search_path_pos;		/* Position of the current schema in search path */
+	char		*nspname;		/* Schema of current match */
+	char		*relname;		/* Relation name of current match */
+} FuzzyRelationMatchState;
 
 static ParseNamespaceItem *scanNameSpaceForRefname(ParseState *pstate,
 												   const char *refname,
@@ -102,7 +126,11 @@ static bool rte_visible_if_lateral(ParseState *pstate, RangeTblEntry *rte);
 static bool rte_visible_if_qualified(ParseState *pstate, RangeTblEntry *rte);
 
 static inline bool levenshtein_is_absurd(int distance, int matchlen);
-static void errorMissingRelation(ParseState *pstate, const RangeVar *relation);
+static void errorMissingRelation(ParseState *pstate,
+											 const RangeVar *relation);
+static List *relhintCandidateSchemas(const RangeVar *relation);
+static bool bestFuzzyRelationMatch(const RangeVar *relation,
+										  FuzzyRelationMatchState *match);
 
 /*
  * refnameNamespaceItem
@@ -1414,6 +1442,10 @@ buildNSItemFromLists(RangeTblEntry *rte, Index rtindex,
 	return nsitem;
 }
 
+/*
+ * Generate a suitable error about a missing relation, possibly with hints
+ * about similarly named relations.
+ */
 void
 errorMissingRelation(ParseState *pstate, const RangeVar *relation)
 {
@@ -1432,11 +1464,31 @@ errorMissingRelation(ParseState *pstate, const RangeVar *relation)
 						   relation->relname),
 				 errhint("Use WITH RECURSIVE, or re-order the WITH items to remove forward references.")));
 	else
+	{
+		bool		found_match = false;
+		FuzzyRelationMatchState match = {
+			.distance = MAX_FUZZY_DISTANCE,
+			.search_path_pos = INT_MAX,
+			.nspname = NULL,
+			.relname = NULL,
+		};
+
+		found_match = bestFuzzyRelationMatch(relation, &match);
+
 		ereport(ERROR,
 				(errcode(ERRCODE_UNDEFINED_TABLE),
 				 relation->schemaname ?
-					errmsg("relation \"%s.%s\" does not exist", relation->schemaname, relation->relname) :
-					errmsg("relation \"%s\" does not exist", relation->relname)));
+					errmsg("relation \"%s.%s\" does not exist",
+						   relation->schemaname, relation->relname) :
+					errmsg("relation \"%s\" does not exist",
+						   relation->relname),
+				 found_match ?
+					errhint("Perhaps you meant to reference the table \"%s\".",
+							quote_qualified_identifier(match.nspname,
+													   match.relname)) :
+					0,
+				 parser_errposition(pstate, relation->location)));
+	}
 }
 
 /*
@@ -1464,6 +1516,144 @@ parserOpenTable(ParseState *pstate, const RangeVar *relation, int lockmode)
 	return rel;
 }
 
+/*
+ * Search pg_class for a relation name that's similar to the target.
+ */
+static bool
+bestFuzzyRelationMatch(const RangeVar *target,
+				  FuzzyRelationMatchState *fuzzystate)
+{
+	List   *candidate_schemas = relhintCandidateSchemas(target);
+	Relation	classRel;
+	int		checked = 0;
+	int		schema_pos = 0;
+
+	if (candidate_schemas == NIL)
+		return false;
+
+	classRel = table_open(RelationRelationId, AccessShareLock);
+
+	foreach_oid(schema, candidate_schemas)
+	{
+		ScanKeyData skey[2];
+		TableScanDesc scan;
+		HeapTuple	tuple;
+
+		/* Scan objects on each schema */
+		ScanKeyInit(&skey[0],
+					Anum_pg_class_relnamespace,
+					BTEqualStrategyNumber, F_OIDEQ,
+					ObjectIdGetDatum(schema));
+
+    /* Don't scan partitions */
+		ScanKeyInit(&skey[1],
+					Anum_pg_class_relispartition,
+					BTEqualStrategyNumber, F_BOOLEQ,
+					BoolGetDatum(false));
+
+		scan = table_beginscan_catalog(classRel, 2, skey);
+
+		while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+		{
+			Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
+			const char *candidate = NameStr(classForm->relname);
+			int		candidatelen = strlen(candidate);
+			int		reldistance;
+
+			/* Only search for relation-like objects */
+			if (!(classForm->relkind == RELKIND_RELATION ||
+				  classForm->relkind == RELKIND_PARTITIONED_TABLE ||
+				  classForm->relkind == RELKIND_FOREIGN_TABLE ||
+				  classForm->relkind == RELKIND_VIEW ||
+				  classForm->relkind == RELKIND_MATVIEW))
+				continue;
+
+			/* Keep the scan bounded in very large catalogs. */
+			if (++checked > MAX_REL_HINT_CANDIDATES)
+			{
+				table_endscan(scan);
+				goto done;
+			}
+
+			/*
+			 * Only obtain new distances lower or equal than the capped current distance.
+			 * Equal distances are relevant because the tie is solved below.
+			 */
+			reldistance = varstr_levenshtein_less_equal(target->relname, strlen(target->relname),
+							candidate, candidatelen,
+							1, 1, 1,
+							fuzzystate->distance,
+							true);
+
+			if (levenshtein_is_absurd(reldistance, candidatelen))
+				continue;
+
+			/*
+			 * If the new distance is less than the match found so far, update
+			 * fuzzystate.  If the distance is equal, prefer the match with the
+			 * lowest search path position.  If the search position is equal, use
+			 * the lexicographic order to solve the tie, this also ensures the
+			 * hint is stable across runs since the heap scan order is
+			 * nondeterministic.
+			 */
+			if (reldistance < fuzzystate->distance ||
+				(reldistance == fuzzystate->distance &&
+				 schema_pos < fuzzystate->search_path_pos) ||
+				(reldistance == fuzzystate->distance &&
+				 schema_pos == fuzzystate->search_path_pos &&
+				 strcmp(candidate, fuzzystate->relname) < 0))
+			{
+				if (fuzzystate->relname)
+					pfree(fuzzystate->relname);
+				if (fuzzystate->nspname)
+					pfree(fuzzystate->nspname);
+
+				fuzzystate->distance = reldistance;
+				fuzzystate->search_path_pos = schema_pos;
+				fuzzystate->relname = pstrdup(candidate);
+				fuzzystate->nspname = get_namespace_name(classForm->relnamespace);
+			}
+		}
+
+		table_endscan(scan);
+		schema_pos++;
+	}
+
+done:
+	table_close(classRel, AccessShareLock);
+	list_free(candidate_schemas);
+
+	/* Return true if there was a match */
+	return fuzzystate->relname != NULL;
+}
+
+/*
+ * Candidate schemas for the relation hint.
+ *
+ * If the relation was schema-qualified we'll only search that schema
+ * (considering pg_temp), otherwise consider search_path.
+ */
+static List *
+relhintCandidateSchemas(const RangeVar *relation)
+{
+	if (relation->schemaname)
+	{
+		Oid			namespaceId = LookupNamespaceNoError(relation->schemaname);
+
+		return OidIsValid(namespaceId) ? list_make1_oid(namespaceId) : NIL;
+	}
+	else if (relation->relpersistence == RELPERSISTENCE_TEMP)
+	{
+		Oid			tempNamespace;
+
+		GetTempNamespaceState(&tempNamespace, NULL);
+		return OidIsValid(tempNamespace) ? list_make1_oid(tempNamespace) : NIL;
+	}
+	else
+		return fetch_search_path(true);
+}
+
+
 /*
  * Add an entry for a relation to the pstate's range table (p_rtable).
  * Then, construct and return a ParseNamespaceItem for the new RTE.
diff --git a/src/test/regress/expected/prepared_xacts.out b/src/test/regress/expected/prepared_xacts.out
index 515a2ada9d1..47b1f6920fb 100644
--- a/src/test/regress/expected/prepared_xacts.out
+++ b/src/test/regress/expected/prepared_xacts.out
@@ -201,6 +201,7 @@ SELECT * FROM pxtest2;
 ERROR:  relation "pxtest2" does not exist
 LINE 1: SELECT * FROM pxtest2;
                       ^
+HINT:  Perhaps you meant to reference the table "public.pxtest3".
 -- There should be two prepared transactions
 SELECT gid FROM pg_prepared_xacts WHERE gid ~ '^regress_' ORDER BY gid;
      gid      
@@ -257,6 +258,7 @@ SELECT * FROM pxtest3;
 ERROR:  relation "pxtest3" does not exist
 LINE 1: SELECT * FROM pxtest3;
                       ^
+HINT:  Perhaps you meant to reference the table "public.pxtest2".
 -- There should be no prepared transactions
 SELECT gid FROM pg_prepared_xacts WHERE gid ~ '^regress_' ORDER BY gid;
  gid 
diff --git a/src/test/regress/expected/psql.out b/src/test/regress/expected/psql.out
index c8f3932edf0..6b8f133aa7d 100644
--- a/src/test/regress/expected/psql.out
+++ b/src/test/regress/expected/psql.out
@@ -5633,6 +5633,7 @@ SELECT * FROM ac_test;  -- should be gone now
 ERROR:  relation "ac_test" does not exist
 LINE 1: SELECT * FROM ac_test;
                       ^
+HINT:  Perhaps you meant to reference the table "public.aggtest".
 -- ON_ERROR_ROLLBACK
 \set ON_ERROR_ROLLBACK on
 CREATE TABLE oer_test (a int);
diff --git a/src/test/regress/expected/temp.out b/src/test/regress/expected/temp.out
index a50c7ae88a9..63e62028d56 100644
--- a/src/test/regress/expected/temp.out
+++ b/src/test/regress/expected/temp.out
@@ -47,6 +47,7 @@ SELECT * FROM temptest;
 ERROR:  relation "temptest" does not exist
 LINE 1: SELECT * FROM temptest;
                       ^
+HINT:  Perhaps you meant to reference the table "public.xmltest".
 -- Test ON COMMIT DELETE ROWS
 CREATE TEMP TABLE temptest(col int) ON COMMIT DELETE ROWS;
 -- while we're here, verify successful truncation of index with SQL function
@@ -100,6 +101,7 @@ SELECT * FROM temptest;
 ERROR:  relation "temptest" does not exist
 LINE 1: SELECT * FROM temptest;
                       ^
+HINT:  Perhaps you meant to reference the table "public.xmltest".
 BEGIN;
 CREATE TEMP TABLE temptest(col) ON COMMIT DROP AS SELECT 1;
 SELECT * FROM temptest;
@@ -113,6 +115,7 @@ SELECT * FROM temptest;
 ERROR:  relation "temptest" does not exist
 LINE 1: SELECT * FROM temptest;
                       ^
+HINT:  Perhaps you meant to reference the table "public.xmltest".
 -- Test it with a CHECK condition that produces a toasted pg_constraint entry
 BEGIN;
 do $$
@@ -133,6 +136,7 @@ SELECT * FROM temptest;
 ERROR:  relation "temptest" does not exist
 LINE 1: SELECT * FROM temptest;
                       ^
+HINT:  Perhaps you meant to reference the table "public.xmltest".
 -- ON COMMIT is only allowed for TEMP
 CREATE TABLE temptest(col int) ON COMMIT DELETE ROWS;
 ERROR:  ON COMMIT can only be used on temporary tables
diff --git a/src/test/regress/expected/transactions.out b/src/test/regress/expected/transactions.out
index 7f5757e89c4..2d89fb791c5 100644
--- a/src/test/regress/expected/transactions.out
+++ b/src/test/regress/expected/transactions.out
@@ -208,6 +208,7 @@ SELECT * FROM trans_bar;		-- shouldn't exist
 ERROR:  relation "trans_bar" does not exist
 LINE 1: SELECT * FROM trans_bar;
                       ^
+HINT:  Perhaps you meant to reference the table "public.trans_baz".
 SELECT * FROM trans_barbaz;	-- should be empty
  a 
 ---
@@ -226,6 +227,7 @@ BEGIN;
 ERROR:  relation "trans_bar" does not exist
 LINE 1: INSERT into trans_bar VALUES (1);
                     ^
+HINT:  Perhaps you meant to reference the table "public.trans_baz".
 	ROLLBACK TO one;
 	RELEASE SAVEPOINT one;
 	SAVEPOINT two;
diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out
index 73c411118a3..0514b8dd92b 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -1072,6 +1072,7 @@ SELECT * FROM xmltableview2;
 ERROR:  relation "xmltableview2" does not exist
 LINE 1: SELECT * FROM xmltableview2;
                       ^
+HINT:  Perhaps you meant to reference the table "public.xmltableview1".
 \sv xmltableview2
 ERROR:  relation "xmltableview2" does not exist
 SELECT * FROM XMLTABLE(XMLNAMESPACES(DEFAULT 'http://x.y'),
-- 
2.42.0