[PATCH] Add hint for misspelled relations
Hello hackers,
Currently misspelled columns offer a hint but not misspelled relations.
This patch enables that, the result is like:
-- having this table
create table clients (id int);
-- we misspell the table name
select * from cliants;
ERROR: relation "cliants" does not exist
LINE 1: select * from cliants;
HINT: Perhaps you meant to reference the table "public.clients".
The possible matches are searched in pg_class for the schemas present in
search_path (or if the relation is qualified, it only searches matches in
that schema). The logic reuses the `varstr_levenshtein_less_equal` function
similar to how the column matching is done.
If there's a tie in the fuzzy match, it's solved by preferring the schema
that appears first on the search path. If that fails, then the
lexicographic order is used to break the tie.
One problem is that scanning all pg_class entries can get expensive on big
catalogs, so the number of searches is capped by MAX_REL_HINT_CANDIDATES.
I've set this to 4096 arbitrarily, any guidance on what would be a good
number is appreciated. Personally I've seen a catalog that contains 125K
tables, with mostly auto generated names. For these cases I don't think the
hint helps that much anyway, so it seemed fine to bail here.
The changes are split into two commits, one refactoring some reusable
functions for easier review and another one implementing the relation hint.
Any feedback is welcomed.
Best regards,
Steve Chavez
[1]:
Attachments:
0001-refactor-isolate-relation-errors-to-a-function.patchtext/x-patch; charset=US-ASCII; name=0001-refactor-isolate-relation-errors-to-a-function.patchDownload
From 9a500e02535b09051816a55ebb6f75e4421da270 Mon Sep 17 00:00:00 2001
From: steve-chavez <stevechavezast@gmail.com>
Date: Mon, 1 Dec 2025 20:13:36 -0500
Subject: [PATCH 1/2] refactor: isolate relation errors to a function
Also add levenshtein_is_absurd explaining more about the formula used.
---
src/backend/parser/parse_relation.c | 74 ++++++++++++++++-------------
1 file changed, 40 insertions(+), 34 deletions(-)
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index d544a69fc80..c1563f1b51b 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -101,6 +101,8 @@ static int specialAttNum(const char *attname);
static bool rte_visible_if_lateral(ParseState *pstate, RangeTblEntry *rte);
static bool rte_visible_if_qualified(ParseState *pstate, RangeTblEntry *rte);
+static inline bool levenshtein_is_absurd(int distance, int matchlen);
+static void errorMissingRelation(ParseState *pstate, const RangeVar *relation);
/*
* refnameNamespaceItem
@@ -615,11 +617,7 @@ updateFuzzyAttrMatchState(int fuzzy_rte_penalty,
- fuzzy_rte_penalty,
true);
- /*
- * If more than half the characters are different, don't treat it as a
- * match, to avoid making ridiculous suggestions.
- */
- if (columndistance > matchlen / 2)
+ if (levenshtein_is_absurd(columndistance, matchlen))
return;
/*
@@ -1416,6 +1414,31 @@ buildNSItemFromLists(RangeTblEntry *rte, Index rtindex,
return nsitem;
}
+void
+errorMissingRelation(ParseState *pstate, const RangeVar *relation)
+{
+ /*
+ * An unqualified name might have been meant as a reference to
+ * some not-yet-in-scope CTE. The bare "does not exist" message
+ * has proven remarkably unhelpful for figuring out such problems,
+ * so we take pains to offer a specific hint.
+ */
+ if (isFutureCTE(pstate, relation->relname))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_TABLE),
+ errmsg("relation \"%s\" does not exist",
+ relation->relname),
+ errdetail("There is a WITH item named \"%s\", but it cannot be referenced from this part of the query.",
+ relation->relname),
+ errhint("Use WITH RECURSIVE, or re-order the WITH items to remove forward references.")));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_TABLE),
+ relation->schemaname ?
+ errmsg("relation \"%s.%s\" does not exist", relation->schemaname, relation->relname) :
+ errmsg("relation \"%s\" does not exist", relation->relname)));
+}
+
/*
* Open a table during parse analysis
*
@@ -1436,35 +1459,7 @@ parserOpenTable(ParseState *pstate, const RangeVar *relation, int lockmode)
setup_parser_errposition_callback(&pcbstate, pstate, relation->location);
rel = table_openrv_extended(relation, lockmode, true);
if (rel == NULL)
- {
- if (relation->schemaname)
- ereport(ERROR,
- (errcode(ERRCODE_UNDEFINED_TABLE),
- errmsg("relation \"%s.%s\" does not exist",
- relation->schemaname, relation->relname)));
- else
- {
- /*
- * An unqualified name might have been meant as a reference to
- * some not-yet-in-scope CTE. The bare "does not exist" message
- * has proven remarkably unhelpful for figuring out such problems,
- * so we take pains to offer a specific hint.
- */
- if (isFutureCTE(pstate, relation->relname))
- ereport(ERROR,
- (errcode(ERRCODE_UNDEFINED_TABLE),
- errmsg("relation \"%s\" does not exist",
- relation->relname),
- errdetail("There is a WITH item named \"%s\", but it cannot be referenced from this part of the query.",
- relation->relname),
- errhint("Use WITH RECURSIVE, or re-order the WITH items to remove forward references.")));
- else
- ereport(ERROR,
- (errcode(ERRCODE_UNDEFINED_TABLE),
- errmsg("relation \"%s\" does not exist",
- relation->relname)));
- }
- }
+ errorMissingRelation(pstate, relation);
cancel_parser_errposition_callback(&pcbstate);
return rel;
}
@@ -3971,3 +3966,14 @@ getRTEPermissionInfo(List *rteperminfos, RangeTblEntry *rte)
return perminfo;
}
+
+
+/*
+ * Ignore absurd suggestions (e.g., mostly different characters).
+ * Having MAX_FUZZY_DISTANCE = 3, `abc` and `xyz` have a levenshtein distance of 3 which falls under MAX_FUZZY_DISTANCE, but yet they're mostly different.
+ */
+static inline bool
+levenshtein_is_absurd(int distance, int matchlen)
+{
+ return distance > matchlen / 2;
+}
--
2.42.0
0002-add-a-hint-for-a-missing-relation.patchtext/x-patch; charset=US-ASCII; name=0002-add-a-hint-for-a-missing-relation.patchDownload
From 4423352524c14456bb451222b0d86c3dd5afbb02 Mon Sep 17 00:00:00 2001
From: steve-chavez <stevechavezast@gmail.com>
Date: Mon, 1 Dec 2025 20:58:46 -0500
Subject: [PATCH 2/2] add a hint for a missing relation
---
src/backend/parser/parse_relation.c | 219 ++++++++++++++++++-
src/test/regress/expected/prepared_xacts.out | 2 +
src/test/regress/expected/psql.out | 1 +
src/test/regress/expected/temp.out | 4 +
src/test/regress/expected/transactions.out | 2 +
src/test/regress/expected/xml_1.out | 1 +
6 files changed, 226 insertions(+), 3 deletions(-)
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index c1563f1b51b..734c132aaff 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -15,10 +15,13 @@
#include "postgres.h"
#include <ctype.h>
+#include <limits.h>
+#include "access/heapam.h"
#include "access/htup_details.h"
#include "access/relation.h"
#include "access/table.h"
+#include "access/tableam.h"
#include "catalog/heap.h"
#include "catalog/namespace.h"
#include "funcapi.h"
@@ -30,6 +33,7 @@
#include "parser/parsetree.h"
#include "storage/lmgr.h"
#include "utils/builtins.h"
+#include "utils/hsearch.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
#include "utils/varlena.h"
@@ -71,6 +75,24 @@ typedef struct
#define MAX_FUZZY_DISTANCE 3
+#define MAX_REL_HINT_CANDIDATES 4096
+
+/*
+ * Support for fuzzily matching relations for error messages.
+ *
+ * If the relation is schema qualified, relations in that schema are fuzzyly searched.
+ * If the relation is not schema qualified, all the schemas in search_path are fuzzily searched.
+ *
+ * The searches are capped at MAX_REL_HINT_CANDIDATES, to prevent doing too much work
+ * in big catalogs.
+ */
+typedef struct
+{
+ int distance; /* Current distance */
+ int search_path_pos; /* Position of the current schema in search path */
+ char *nspname; /* Schema of current match */
+ char *relname; /* Relation name of current match */
+} FuzzyRelationMatchState;
static ParseNamespaceItem *scanNameSpaceForRefname(ParseState *pstate,
const char *refname,
@@ -102,7 +124,11 @@ static bool rte_visible_if_lateral(ParseState *pstate, RangeTblEntry *rte);
static bool rte_visible_if_qualified(ParseState *pstate, RangeTblEntry *rte);
static inline bool levenshtein_is_absurd(int distance, int matchlen);
-static void errorMissingRelation(ParseState *pstate, const RangeVar *relation);
+static void errorMissingRelation(ParseState *pstate,
+ const RangeVar *relation);
+static List *relhintCandidateSchemas(const RangeVar *relation);
+static bool bestFuzzyRelationMatch(const RangeVar *relation,
+ FuzzyRelationMatchState *match);
/*
* refnameNamespaceItem
@@ -1414,6 +1440,10 @@ buildNSItemFromLists(RangeTblEntry *rte, Index rtindex,
return nsitem;
}
+/*
+ * Generate a suitable error about a missing relation, possibly with hints
+ * about similarly named relations.
+ */
void
errorMissingRelation(ParseState *pstate, const RangeVar *relation)
{
@@ -1432,11 +1462,31 @@ errorMissingRelation(ParseState *pstate, const RangeVar *relation)
relation->relname),
errhint("Use WITH RECURSIVE, or re-order the WITH items to remove forward references.")));
else
+ {
+ bool found_match = false;
+ FuzzyRelationMatchState match = {
+ .distance = MAX_FUZZY_DISTANCE + 1,
+ .search_path_pos = INT_MAX,
+ .nspname = NULL,
+ .relname = NULL,
+ };
+
+ found_match = bestFuzzyRelationMatch(relation, &match);
+
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_TABLE),
relation->schemaname ?
- errmsg("relation \"%s.%s\" does not exist", relation->schemaname, relation->relname) :
- errmsg("relation \"%s\" does not exist", relation->relname)));
+ errmsg("relation \"%s.%s\" does not exist",
+ relation->schemaname, relation->relname) :
+ errmsg("relation \"%s\" does not exist",
+ relation->relname),
+ found_match ?
+ errhint("Perhaps you meant to reference the table \"%s\".",
+ quote_qualified_identifier(match.nspname,
+ match.relname)) :
+ 0,
+ parser_errposition(pstate, relation->location)));
+ }
}
/*
@@ -1464,6 +1514,169 @@ parserOpenTable(ParseState *pstate, const RangeVar *relation, int lockmode)
return rel;
}
+/*
+ * Search pg_class for a relation name that's similar to the target.
+ */
+static bool
+bestFuzzyRelationMatch(const RangeVar *target,
+ FuzzyRelationMatchState *fuzzystate)
+{
+ List *schemas = relhintCandidateSchemas(target);
+ ListCell *lc;
+ Relation classRel;
+ TableScanDesc scan;
+ HeapTuple tuple;
+ int i = 0;
+ int checked = 0;
+
+ /* Schema order in the search_path */
+ typedef struct {
+ Oid oid;
+ int order;
+ } SchemaOrderEntry;
+ HTAB *schema_order;
+
+ if (schemas == NIL)
+ return false;
+
+ schema_order = hash_create("relation hint search_path order",
+ list_length(schemas),
+ &(HASHCTL){
+ .keysize = sizeof(Oid),
+ .entrysize = sizeof(SchemaOrderEntry)
+ },
+ HASH_ELEM | HASH_BLOBS);
+
+ /*
+ * Populate schema_order. This map won't really be needed in cases where
+ * the candidate list has only one element, but keep it anyway to reuse
+ * code.
+ */
+ foreach(lc, schemas)
+ {
+ Oid nsp = lfirst_oid(lc);
+ SchemaOrderEntry *entry;
+ bool found;
+
+ entry = hash_search(schema_order, &nsp, HASH_ENTER, &found);
+ if (!found)
+ entry->order = i;
+ i++;
+ }
+
+ classRel = table_open(RelationRelationId, AccessShareLock);
+ scan = table_beginscan_catalog(classRel, 0, NULL);
+
+ /* Iterate over pg_class entries */
+ while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+ {
+ Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
+ const char *candidate = NameStr(classForm->relname);
+ int candidatelen = strlen(candidate);
+ int order = -1;
+ int reldistance;
+ SchemaOrderEntry *entry;
+ bool found;
+
+ /* Only search for relation-like objects */
+ if (!(classForm->relkind == RELKIND_RELATION ||
+ classForm->relkind == RELKIND_PARTITIONED_TABLE ||
+ classForm->relkind == RELKIND_FOREIGN_TABLE ||
+ classForm->relkind == RELKIND_VIEW ||
+ classForm->relkind == RELKIND_MATVIEW))
+ continue;
+
+ entry = hash_search(schema_order, &classForm->relnamespace,
+ HASH_FIND, &found);
+ if (!found)
+ continue;
+
+ order = entry->order;
+
+ /* Keep the scan bounded in very large catalogs. */
+ if (++checked > MAX_REL_HINT_CANDIDATES)
+ break;
+
+ /*
+ * Only obtain new distances lower or equal than the capped current distance.
+ * Equal distances are relevant because the tie is solved below.
+ */
+ reldistance = varstr_levenshtein_less_equal(target->relname, strlen(target->relname),
+ candidate, candidatelen,
+ 1, 1, 1,
+ Min(fuzzystate->distance + 1, MAX_FUZZY_DISTANCE + 1),
+ true);
+
+ /* The above can return MAX_FUZZY_DISTANCE + 1 results, skip these */
+ if (reldistance > MAX_FUZZY_DISTANCE)
+ continue;
+
+ if (levenshtein_is_absurd(reldistance, candidatelen))
+ continue;
+
+ /*
+ * If the new distance is less than the match found so far, update
+ * fuzzystate. If the distance is equal, prefer the match with the
+ * lowest search path position. If the search position is equal, use
+ * the lexicographic order to solve the tie, this also ensures the
+ * hint is stable across runs since the heap scan order is
+ * nondeterministic.
+ */
+ if (reldistance < fuzzystate->distance ||
+ (reldistance == fuzzystate->distance &&
+ order < fuzzystate->search_path_pos) ||
+ (reldistance == fuzzystate->distance &&
+ order == fuzzystate->search_path_pos &&
+ strcmp(candidate, fuzzystate->relname) < 0))
+ {
+ if (fuzzystate->relname)
+ pfree(fuzzystate->relname);
+ if (fuzzystate->nspname)
+ pfree(fuzzystate->nspname);
+
+ fuzzystate->distance = reldistance;
+ fuzzystate->search_path_pos = order;
+ fuzzystate->relname = pstrdup(candidate);
+ fuzzystate->nspname = get_namespace_name(classForm->relnamespace);
+ }
+ }
+
+ table_endscan(scan);
+ table_close(classRel, AccessShareLock);
+ list_free(schemas);
+ hash_destroy(schema_order);
+
+ /* Return true if there was a match */
+ return fuzzystate->relname != NULL;
+}
+
+/*
+ * Candidate schemas for the relation hint.
+ *
+ * If the relation was schema-qualified we'll only search that schema
+ * (considering pg_temp), otherwise consider search_path.
+ */
+static List *
+relhintCandidateSchemas(const RangeVar *relation)
+{
+ if (relation->schemaname)
+ {
+ Oid namespaceId = LookupNamespaceNoError(relation->schemaname);
+
+ return OidIsValid(namespaceId) ? list_make1_oid(namespaceId) : NIL;
+ }
+ else if (relation->relpersistence == RELPERSISTENCE_TEMP)
+ {
+ Oid tempNamespace;
+
+ GetTempNamespaceState(&tempNamespace, NULL);
+ return OidIsValid(tempNamespace) ? list_make1_oid(tempNamespace) : NIL;
+ }
+ else
+ return fetch_search_path(true);
+}
+
+
/*
* Add an entry for a relation to the pstate's range table (p_rtable).
* Then, construct and return a ParseNamespaceItem for the new RTE.
diff --git a/src/test/regress/expected/prepared_xacts.out b/src/test/regress/expected/prepared_xacts.out
index 515a2ada9d1..47b1f6920fb 100644
--- a/src/test/regress/expected/prepared_xacts.out
+++ b/src/test/regress/expected/prepared_xacts.out
@@ -201,6 +201,7 @@ SELECT * FROM pxtest2;
ERROR: relation "pxtest2" does not exist
LINE 1: SELECT * FROM pxtest2;
^
+HINT: Perhaps you meant to reference the table "public.pxtest3".
-- There should be two prepared transactions
SELECT gid FROM pg_prepared_xacts WHERE gid ~ '^regress_' ORDER BY gid;
gid
@@ -257,6 +258,7 @@ SELECT * FROM pxtest3;
ERROR: relation "pxtest3" does not exist
LINE 1: SELECT * FROM pxtest3;
^
+HINT: Perhaps you meant to reference the table "public.pxtest2".
-- There should be no prepared transactions
SELECT gid FROM pg_prepared_xacts WHERE gid ~ '^regress_' ORDER BY gid;
gid
diff --git a/src/test/regress/expected/psql.out b/src/test/regress/expected/psql.out
index c8f3932edf0..6b8f133aa7d 100644
--- a/src/test/regress/expected/psql.out
+++ b/src/test/regress/expected/psql.out
@@ -5633,6 +5633,7 @@ SELECT * FROM ac_test; -- should be gone now
ERROR: relation "ac_test" does not exist
LINE 1: SELECT * FROM ac_test;
^
+HINT: Perhaps you meant to reference the table "public.aggtest".
-- ON_ERROR_ROLLBACK
\set ON_ERROR_ROLLBACK on
CREATE TABLE oer_test (a int);
diff --git a/src/test/regress/expected/temp.out b/src/test/regress/expected/temp.out
index a50c7ae88a9..63e62028d56 100644
--- a/src/test/regress/expected/temp.out
+++ b/src/test/regress/expected/temp.out
@@ -47,6 +47,7 @@ SELECT * FROM temptest;
ERROR: relation "temptest" does not exist
LINE 1: SELECT * FROM temptest;
^
+HINT: Perhaps you meant to reference the table "public.xmltest".
-- Test ON COMMIT DELETE ROWS
CREATE TEMP TABLE temptest(col int) ON COMMIT DELETE ROWS;
-- while we're here, verify successful truncation of index with SQL function
@@ -100,6 +101,7 @@ SELECT * FROM temptest;
ERROR: relation "temptest" does not exist
LINE 1: SELECT * FROM temptest;
^
+HINT: Perhaps you meant to reference the table "public.xmltest".
BEGIN;
CREATE TEMP TABLE temptest(col) ON COMMIT DROP AS SELECT 1;
SELECT * FROM temptest;
@@ -113,6 +115,7 @@ SELECT * FROM temptest;
ERROR: relation "temptest" does not exist
LINE 1: SELECT * FROM temptest;
^
+HINT: Perhaps you meant to reference the table "public.xmltest".
-- Test it with a CHECK condition that produces a toasted pg_constraint entry
BEGIN;
do $$
@@ -133,6 +136,7 @@ SELECT * FROM temptest;
ERROR: relation "temptest" does not exist
LINE 1: SELECT * FROM temptest;
^
+HINT: Perhaps you meant to reference the table "public.xmltest".
-- ON COMMIT is only allowed for TEMP
CREATE TABLE temptest(col int) ON COMMIT DELETE ROWS;
ERROR: ON COMMIT can only be used on temporary tables
diff --git a/src/test/regress/expected/transactions.out b/src/test/regress/expected/transactions.out
index 7f5757e89c4..2d89fb791c5 100644
--- a/src/test/regress/expected/transactions.out
+++ b/src/test/regress/expected/transactions.out
@@ -208,6 +208,7 @@ SELECT * FROM trans_bar; -- shouldn't exist
ERROR: relation "trans_bar" does not exist
LINE 1: SELECT * FROM trans_bar;
^
+HINT: Perhaps you meant to reference the table "public.trans_baz".
SELECT * FROM trans_barbaz; -- should be empty
a
---
@@ -226,6 +227,7 @@ BEGIN;
ERROR: relation "trans_bar" does not exist
LINE 1: INSERT into trans_bar VALUES (1);
^
+HINT: Perhaps you meant to reference the table "public.trans_baz".
ROLLBACK TO one;
RELEASE SAVEPOINT one;
SAVEPOINT two;
diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out
index 73c411118a3..0514b8dd92b 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -1072,6 +1072,7 @@ SELECT * FROM xmltableview2;
ERROR: relation "xmltableview2" does not exist
LINE 1: SELECT * FROM xmltableview2;
^
+HINT: Perhaps you meant to reference the table "public.xmltableview1".
\sv xmltableview2
ERROR: relation "xmltableview2" does not exist
SELECT * FROM XMLTABLE(XMLNAMESPACES(DEFAULT 'http://x.y'),
--
2.42.0
On Tue, 2 Dec 2025 at 07:46, Steve Chavez <steve@supabase.io> wrote:
Hello hackers,
Currently misspelled columns offer a hint but not misspelled relations.
This patch enables that, the result is like:
-- having this table
create table clients (id int);
-- we misspell the table name
select * from cliants;
ERROR: relation "cliants" does not exist
LINE 1: select * from cliants;
HINT: Perhaps you meant to reference the table "public.clients".The possible matches are searched in pg_class for the schemas present in search_path (or if the relation is qualified, it only searches matches in that schema). The logic reuses the `varstr_levenshtein_less_equal` function similar to how the column matching is done.
If there's a tie in the fuzzy match, it's solved by preferring the schema that appears first on the search path. If that fails, then the lexicographic order is used to break the tie.
One problem is that scanning all pg_class entries can get expensive on big catalogs, so the number of searches is capped by MAX_REL_HINT_CANDIDATES. I've set this to 4096 arbitrarily, any guidance on what would be a good number is appreciated. Personally I've seen a catalog that contains 125K tables, with mostly auto generated names. For these cases I don't think the hint helps that much anyway, so it seemed fine to bail here.
The changes are split into two commits, one refactoring some reusable functions for easier review and another one implementing the relation hint.
Any feedback is welcomed.
Best regards,
Steve Chavez[1]:
Hi! I did not reviewed this patch closely, but I stopped a this:
+ reldistance = varstr_levenshtein_less_equal(target->relname, strlen(target->relname), + candidate, candidatelen, + 1, 1, 1, + Min(fuzzystate->distance + 1, MAX_FUZZY_DISTANCE + 1), + true); + + /* The above can return MAX_FUZZY_DISTANCE + 1 results, skip these */ + if (reldistance > MAX_FUZZY_DISTANCE) + continue;
Why do we even do this? Can't we just pass fuzzystate->distance to
varstr_levenshtein_less_equal? It is initialized in outer func to
MAX_FUZZY_DISTANCE + 1
--
Best regards,
Kirill Reshke
On 2 Dec 2025, at 03:46, Steve Chavez <steve@supabase.io> wrote:
Currently misspelled columns offer a hint but not misspelled relations.
The tab-completion in psql is one tool offered to avoid misspellings which
reduce the need.
One problem is that scanning all pg_class entries can get expensive on big catalogs, so the number of searches is capped by MAX_REL_HINT_CANDIDATES. I've set this to 4096 arbitrarily, any guidance on what would be a good number is appreciated. Personally I've seen a catalog that contains 125K tables, with mostly auto generated names. For these cases I don't think the hint helps that much anyway, so it seemed fine to bail here.
What makes hints for columns appealing is that it's a pretty contained problem
across data we've already accumulated, relations are quite different as they
require a catalog lookup making it a lot less appealing. The number of
relations can easily become quite large, especially when large partitioning
hierarchies are involved, and while capping prevents large scans there is no
guarantee that the MAX_REL_HINT_CANDIDATES set contains the most likely
entries. Worst case is that it never contains the a close enough match and
we'll spend a scan + hash + calculation of MAX_REL_HINT_CANDIDATES for each
typo.
Are there ways you can pare down the scan, perhaps a scankey on relispartition
and only consider base relations?
Hinting on relations can also give hints for relations the user does not have
permissions on which further reduce the useability.
--
Daniel Gustafsson
Hi Kirill and Daniel, many thanks for your feedback.
Why do we even do this? Can't we just pass fuzzystate->distance to
varstr_levenshtein_less_equal? It is initialized in outer func to
MAX_FUZZY_DISTANCE + 1
This was an oversight. I've corrected it on the new attached patch.
Worst case is that it never contains the a close enough match and
we'll spend a scan + hash + calculation of MAX_REL_HINT_CANDIDATES for each
typo.
Are there ways you can pare down the scan, perhaps a scankey on
relispartition
and only consider base relations?
I've improved the code (new patch attached) and the hash is no
longer necessary.
Now the scan omits partitions as you suggested plus they're now restricted
to each schema on the search_path.
It was also possible to do more scans for each relevant relkind
(RELKIND_RELATION, RELKIND_VIEW, etc) but I wasn't sure if that was more
expensive than doing the filtering on the loop as it's currently done.
It also made the code a bit more complicated with more nested loops, so
I've left it as is.
Perhaps we should limit the amount of schema scans too in case the
search_path has too many schemas?
Hinting on relations can also give hints for relations the user does not
have
permissions on which further reduce the useability.
I've tested the current column hint and it also doesn't consider privileges:
```
create role new nosuperuser;
grant usage on schema public to new;
create table subitems (id int, name text);
grant select (id) on subitems to new;
begin;
set local role to new;
select nam from subitems;
2025-12-09 18:16:38.017 -05 [542091] ERROR: column "nam" does not exist at
character 8
2025-12-09 18:16:38.017 -05 [542091] HINT: Perhaps you meant to reference
the column "subitems.name".
2025-12-09 18:16:38.017 -05 [542091] STATEMENT: select nam from subitems ;
ERROR: column "nam" does not exist
LINE 1: select nam from subitems ;
HINT: Perhaps you meant to reference the column "subitems.name".
```
So perhaps that can be a separate enhancement for both hints?
The tab-completion in psql is one tool offered to avoid misspellings which
reduce the need.
True, but it's still useful to offer a hint for other clients.
What do you think?
Best regards,
Steve Chavez
On Tue, 2 Dec 2025 at 04:27, Daniel Gustafsson <daniel@yesql.se> wrote:
Show quoted text
On 2 Dec 2025, at 03:46, Steve Chavez <steve@supabase.io> wrote:
Currently misspelled columns offer a hint but not misspelled relations.
The tab-completion in psql is one tool offered to avoid misspellings which
reduce the need.One problem is that scanning all pg_class entries can get expensive on
big catalogs, so the number of searches is capped by
MAX_REL_HINT_CANDIDATES. I've set this to 4096 arbitrarily, any guidance on
what would be a good number is appreciated. Personally I've seen a catalog
that contains 125K tables, with mostly auto generated names. For these
cases I don't think the hint helps that much anyway, so it seemed fine to
bail here.What makes hints for columns appealing is that it's a pretty contained
problem
across data we've already accumulated, relations are quite different as
they
require a catalog lookup making it a lot less appealing. The number of
relations can easily become quite large, especially when large partitioning
hierarchies are involved, and while capping prevents large scans there is
no
guarantee that the MAX_REL_HINT_CANDIDATES set contains the most likely
entries. Worst case is that it never contains the a close enough match and
we'll spend a scan + hash + calculation of MAX_REL_HINT_CANDIDATES for each
typo.Are there ways you can pare down the scan, perhaps a scankey on
relispartition
and only consider base relations?Hinting on relations can also give hints for relations the user does not
have
permissions on which further reduce the useability.--
Daniel Gustafsson
Attachments:
v2-0001-add-a-hint-for-a-missing-relation.patchtext/x-patch; charset=US-ASCII; name=v2-0001-add-a-hint-for-a-missing-relation.patchDownload
From e9c6abc3793d676d65dacdaecd754d2245b83366 Mon Sep 17 00:00:00 2001
From: steve-chavez <stevechavezast@gmail.com>
Date: Mon, 1 Dec 2025 20:58:46 -0500
Subject: [PATCH v2] add a hint for a missing relation
* remove unneeded MAX_FUZZY_DISTANCE guard
* only scan base relations and not partitions
* constraint scanning to only relevant relnamespaces
---
src/backend/parser/parse_relation.c | 196 ++++++++++++++++++-
src/test/regress/expected/prepared_xacts.out | 2 +
src/test/regress/expected/psql.out | 1 +
src/test/regress/expected/temp.out | 4 +
src/test/regress/expected/transactions.out | 2 +
src/test/regress/expected/xml_1.out | 1 +
6 files changed, 203 insertions(+), 3 deletions(-)
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index c1563f1b51b..3cb319ae444 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -15,11 +15,15 @@
#include "postgres.h"
#include <ctype.h>
+#include <limits.h>
+#include "access/heapam.h"
#include "access/htup_details.h"
#include "access/relation.h"
#include "access/table.h"
+#include "access/tableam.h"
#include "catalog/heap.h"
+#include "catalog/pg_class.h"
#include "catalog/namespace.h"
#include "funcapi.h"
#include "nodes/makefuncs.h"
@@ -30,6 +34,8 @@
#include "parser/parsetree.h"
#include "storage/lmgr.h"
#include "utils/builtins.h"
+#include "utils/hsearch.h"
+#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
#include "utils/varlena.h"
@@ -71,6 +77,24 @@ typedef struct
#define MAX_FUZZY_DISTANCE 3
+#define MAX_REL_HINT_CANDIDATES 4096
+
+/*
+ * Support for fuzzily matching relations for error messages.
+ *
+ * If the relation is schema qualified, relations in that schema are fuzzyly searched.
+ * If the relation is not schema qualified, all the schemas in search_path are fuzzily searched.
+ *
+ * The searches are capped at MAX_REL_HINT_CANDIDATES, to prevent doing too much work
+ * in big catalogs.
+ */
+typedef struct
+{
+ int distance; /* Current distance */
+ int search_path_pos; /* Position of the current schema in search path */
+ char *nspname; /* Schema of current match */
+ char *relname; /* Relation name of current match */
+} FuzzyRelationMatchState;
static ParseNamespaceItem *scanNameSpaceForRefname(ParseState *pstate,
const char *refname,
@@ -102,7 +126,11 @@ static bool rte_visible_if_lateral(ParseState *pstate, RangeTblEntry *rte);
static bool rte_visible_if_qualified(ParseState *pstate, RangeTblEntry *rte);
static inline bool levenshtein_is_absurd(int distance, int matchlen);
-static void errorMissingRelation(ParseState *pstate, const RangeVar *relation);
+static void errorMissingRelation(ParseState *pstate,
+ const RangeVar *relation);
+static List *relhintCandidateSchemas(const RangeVar *relation);
+static bool bestFuzzyRelationMatch(const RangeVar *relation,
+ FuzzyRelationMatchState *match);
/*
* refnameNamespaceItem
@@ -1414,6 +1442,10 @@ buildNSItemFromLists(RangeTblEntry *rte, Index rtindex,
return nsitem;
}
+/*
+ * Generate a suitable error about a missing relation, possibly with hints
+ * about similarly named relations.
+ */
void
errorMissingRelation(ParseState *pstate, const RangeVar *relation)
{
@@ -1432,11 +1464,31 @@ errorMissingRelation(ParseState *pstate, const RangeVar *relation)
relation->relname),
errhint("Use WITH RECURSIVE, or re-order the WITH items to remove forward references.")));
else
+ {
+ bool found_match = false;
+ FuzzyRelationMatchState match = {
+ .distance = MAX_FUZZY_DISTANCE,
+ .search_path_pos = INT_MAX,
+ .nspname = NULL,
+ .relname = NULL,
+ };
+
+ found_match = bestFuzzyRelationMatch(relation, &match);
+
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_TABLE),
relation->schemaname ?
- errmsg("relation \"%s.%s\" does not exist", relation->schemaname, relation->relname) :
- errmsg("relation \"%s\" does not exist", relation->relname)));
+ errmsg("relation \"%s.%s\" does not exist",
+ relation->schemaname, relation->relname) :
+ errmsg("relation \"%s\" does not exist",
+ relation->relname),
+ found_match ?
+ errhint("Perhaps you meant to reference the table \"%s\".",
+ quote_qualified_identifier(match.nspname,
+ match.relname)) :
+ 0,
+ parser_errposition(pstate, relation->location)));
+ }
}
/*
@@ -1464,6 +1516,144 @@ parserOpenTable(ParseState *pstate, const RangeVar *relation, int lockmode)
return rel;
}
+/*
+ * Search pg_class for a relation name that's similar to the target.
+ */
+static bool
+bestFuzzyRelationMatch(const RangeVar *target,
+ FuzzyRelationMatchState *fuzzystate)
+{
+ List *candidate_schemas = relhintCandidateSchemas(target);
+ Relation classRel;
+ int checked = 0;
+ int schema_pos = 0;
+
+ if (candidate_schemas == NIL)
+ return false;
+
+ classRel = table_open(RelationRelationId, AccessShareLock);
+
+ foreach_oid(schema, candidate_schemas)
+ {
+ ScanKeyData skey[2];
+ TableScanDesc scan;
+ HeapTuple tuple;
+
+ /* Scan objects on each schema */
+ ScanKeyInit(&skey[0],
+ Anum_pg_class_relnamespace,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(schema));
+
+ /* Don't scan partitions */
+ ScanKeyInit(&skey[1],
+ Anum_pg_class_relispartition,
+ BTEqualStrategyNumber, F_BOOLEQ,
+ BoolGetDatum(false));
+
+ scan = table_beginscan_catalog(classRel, 2, skey);
+
+ while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+ {
+ Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
+ const char *candidate = NameStr(classForm->relname);
+ int candidatelen = strlen(candidate);
+ int reldistance;
+
+ /* Only search for relation-like objects */
+ if (!(classForm->relkind == RELKIND_RELATION ||
+ classForm->relkind == RELKIND_PARTITIONED_TABLE ||
+ classForm->relkind == RELKIND_FOREIGN_TABLE ||
+ classForm->relkind == RELKIND_VIEW ||
+ classForm->relkind == RELKIND_MATVIEW))
+ continue;
+
+ /* Keep the scan bounded in very large catalogs. */
+ if (++checked > MAX_REL_HINT_CANDIDATES)
+ {
+ table_endscan(scan);
+ goto done;
+ }
+
+ /*
+ * Only obtain new distances lower or equal than the capped current distance.
+ * Equal distances are relevant because the tie is solved below.
+ */
+ reldistance = varstr_levenshtein_less_equal(target->relname, strlen(target->relname),
+ candidate, candidatelen,
+ 1, 1, 1,
+ fuzzystate->distance,
+ true);
+
+ if (levenshtein_is_absurd(reldistance, candidatelen))
+ continue;
+
+ /*
+ * If the new distance is less than the match found so far, update
+ * fuzzystate. If the distance is equal, prefer the match with the
+ * lowest search path position. If the search position is equal, use
+ * the lexicographic order to solve the tie, this also ensures the
+ * hint is stable across runs since the heap scan order is
+ * nondeterministic.
+ */
+ if (reldistance < fuzzystate->distance ||
+ (reldistance == fuzzystate->distance &&
+ schema_pos < fuzzystate->search_path_pos) ||
+ (reldistance == fuzzystate->distance &&
+ schema_pos == fuzzystate->search_path_pos &&
+ strcmp(candidate, fuzzystate->relname) < 0))
+ {
+ if (fuzzystate->relname)
+ pfree(fuzzystate->relname);
+ if (fuzzystate->nspname)
+ pfree(fuzzystate->nspname);
+
+ fuzzystate->distance = reldistance;
+ fuzzystate->search_path_pos = schema_pos;
+ fuzzystate->relname = pstrdup(candidate);
+ fuzzystate->nspname = get_namespace_name(classForm->relnamespace);
+ }
+ }
+
+ table_endscan(scan);
+ schema_pos++;
+ }
+
+done:
+ table_close(classRel, AccessShareLock);
+ list_free(candidate_schemas);
+
+ /* Return true if there was a match */
+ return fuzzystate->relname != NULL;
+}
+
+/*
+ * Candidate schemas for the relation hint.
+ *
+ * If the relation was schema-qualified we'll only search that schema
+ * (considering pg_temp), otherwise consider search_path.
+ */
+static List *
+relhintCandidateSchemas(const RangeVar *relation)
+{
+ if (relation->schemaname)
+ {
+ Oid namespaceId = LookupNamespaceNoError(relation->schemaname);
+
+ return OidIsValid(namespaceId) ? list_make1_oid(namespaceId) : NIL;
+ }
+ else if (relation->relpersistence == RELPERSISTENCE_TEMP)
+ {
+ Oid tempNamespace;
+
+ GetTempNamespaceState(&tempNamespace, NULL);
+ return OidIsValid(tempNamespace) ? list_make1_oid(tempNamespace) : NIL;
+ }
+ else
+ return fetch_search_path(true);
+}
+
+
/*
* Add an entry for a relation to the pstate's range table (p_rtable).
* Then, construct and return a ParseNamespaceItem for the new RTE.
diff --git a/src/test/regress/expected/prepared_xacts.out b/src/test/regress/expected/prepared_xacts.out
index 515a2ada9d1..47b1f6920fb 100644
--- a/src/test/regress/expected/prepared_xacts.out
+++ b/src/test/regress/expected/prepared_xacts.out
@@ -201,6 +201,7 @@ SELECT * FROM pxtest2;
ERROR: relation "pxtest2" does not exist
LINE 1: SELECT * FROM pxtest2;
^
+HINT: Perhaps you meant to reference the table "public.pxtest3".
-- There should be two prepared transactions
SELECT gid FROM pg_prepared_xacts WHERE gid ~ '^regress_' ORDER BY gid;
gid
@@ -257,6 +258,7 @@ SELECT * FROM pxtest3;
ERROR: relation "pxtest3" does not exist
LINE 1: SELECT * FROM pxtest3;
^
+HINT: Perhaps you meant to reference the table "public.pxtest2".
-- There should be no prepared transactions
SELECT gid FROM pg_prepared_xacts WHERE gid ~ '^regress_' ORDER BY gid;
gid
diff --git a/src/test/regress/expected/psql.out b/src/test/regress/expected/psql.out
index c8f3932edf0..6b8f133aa7d 100644
--- a/src/test/regress/expected/psql.out
+++ b/src/test/regress/expected/psql.out
@@ -5633,6 +5633,7 @@ SELECT * FROM ac_test; -- should be gone now
ERROR: relation "ac_test" does not exist
LINE 1: SELECT * FROM ac_test;
^
+HINT: Perhaps you meant to reference the table "public.aggtest".
-- ON_ERROR_ROLLBACK
\set ON_ERROR_ROLLBACK on
CREATE TABLE oer_test (a int);
diff --git a/src/test/regress/expected/temp.out b/src/test/regress/expected/temp.out
index a50c7ae88a9..63e62028d56 100644
--- a/src/test/regress/expected/temp.out
+++ b/src/test/regress/expected/temp.out
@@ -47,6 +47,7 @@ SELECT * FROM temptest;
ERROR: relation "temptest" does not exist
LINE 1: SELECT * FROM temptest;
^
+HINT: Perhaps you meant to reference the table "public.xmltest".
-- Test ON COMMIT DELETE ROWS
CREATE TEMP TABLE temptest(col int) ON COMMIT DELETE ROWS;
-- while we're here, verify successful truncation of index with SQL function
@@ -100,6 +101,7 @@ SELECT * FROM temptest;
ERROR: relation "temptest" does not exist
LINE 1: SELECT * FROM temptest;
^
+HINT: Perhaps you meant to reference the table "public.xmltest".
BEGIN;
CREATE TEMP TABLE temptest(col) ON COMMIT DROP AS SELECT 1;
SELECT * FROM temptest;
@@ -113,6 +115,7 @@ SELECT * FROM temptest;
ERROR: relation "temptest" does not exist
LINE 1: SELECT * FROM temptest;
^
+HINT: Perhaps you meant to reference the table "public.xmltest".
-- Test it with a CHECK condition that produces a toasted pg_constraint entry
BEGIN;
do $$
@@ -133,6 +136,7 @@ SELECT * FROM temptest;
ERROR: relation "temptest" does not exist
LINE 1: SELECT * FROM temptest;
^
+HINT: Perhaps you meant to reference the table "public.xmltest".
-- ON COMMIT is only allowed for TEMP
CREATE TABLE temptest(col int) ON COMMIT DELETE ROWS;
ERROR: ON COMMIT can only be used on temporary tables
diff --git a/src/test/regress/expected/transactions.out b/src/test/regress/expected/transactions.out
index 7f5757e89c4..2d89fb791c5 100644
--- a/src/test/regress/expected/transactions.out
+++ b/src/test/regress/expected/transactions.out
@@ -208,6 +208,7 @@ SELECT * FROM trans_bar; -- shouldn't exist
ERROR: relation "trans_bar" does not exist
LINE 1: SELECT * FROM trans_bar;
^
+HINT: Perhaps you meant to reference the table "public.trans_baz".
SELECT * FROM trans_barbaz; -- should be empty
a
---
@@ -226,6 +227,7 @@ BEGIN;
ERROR: relation "trans_bar" does not exist
LINE 1: INSERT into trans_bar VALUES (1);
^
+HINT: Perhaps you meant to reference the table "public.trans_baz".
ROLLBACK TO one;
RELEASE SAVEPOINT one;
SAVEPOINT two;
diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out
index 73c411118a3..0514b8dd92b 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -1072,6 +1072,7 @@ SELECT * FROM xmltableview2;
ERROR: relation "xmltableview2" does not exist
LINE 1: SELECT * FROM xmltableview2;
^
+HINT: Perhaps you meant to reference the table "public.xmltableview1".
\sv xmltableview2
ERROR: relation "xmltableview2" does not exist
SELECT * FROM XMLTABLE(XMLNAMESPACES(DEFAULT 'http://x.y'),
--
2.42.0