Allow deleting enum value
Hi,
There is a question related to TODO task with name "Allow deleting enumerated values from an existing enumerated data type".
I made simple changes in parser and implement RemoveEnumLabel function, but as I understand if enum value that we want to
delete is in use by some tables, we have to prevent deletion to avoid inconsistency.
Could you please provide some references where similar functionality was implemented ? Thanks.
Attached basic patch without handling dependencies.
Example of problem that I mention using basic implementation:
postgres=# CREATE TYPE enum_test as ENUM ('1', '2', '3');
CREATE TYPE
postgres=# CREATE TYPE test_enum as ENUM ('1', '2', '3');
CREATE TYPE
postgres=# CREATE TABLE test_table (value test_enum);
CREATE TABLE
postgres=# INSERT INTO test_table VALUES ('1'), ('2');
INSERT 0 2
postgres=# ALTER TYPE test_enum DELETE VALUE '2';
ALTER TYPE
postgres=# SELECT enum_range(NULL::test_enum);
enum_range
------------
{1,3}
(1 row)
postgres=# SELECT * FROM test_table;
ERROR: invalid internal value for enum: 16396
Best regards,
Maksim Kita
Attachments:
0001-Allow-alter-type-delete-value-in-enum.patchtext/x-diff; charset=us-asciiDownload
From 2ae09254584340b7700e31680d752822a782be75 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Wed, 7 Oct 2020 17:18:01 +0300
Subject: [PATCH] Allow alter type delete value in enum
---
src/backend/catalog/pg_enum.c | 60 +++++++++++++++++++++++++++++++++
src/backend/commands/typecmds.c | 8 +++--
src/backend/parser/gram.y | 11 ++++++
src/include/catalog/pg_enum.h | 1 +
4 files changed, 78 insertions(+), 2 deletions(-)
diff --git a/src/backend/catalog/pg_enum.c b/src/backend/catalog/pg_enum.c
index 27e4100a6f..c547dfd22c 100644
--- a/src/backend/catalog/pg_enum.c
+++ b/src/backend/catalog/pg_enum.c
@@ -583,6 +583,66 @@ RenameEnumLabel(Oid enumTypeOid,
table_close(pg_enum, RowExclusiveLock);
}
+extern void RemoveEnumLabel(Oid enumTypeOid, const char *val)
+{
+ Relation pg_enum;
+ HeapTuple enum_tup;
+ Form_pg_enum en;
+ CatCList *list;
+ int nelems;
+ HeapTuple old_tup;
+ int i;
+
+ /* check length of new label is ok */
+ if (strlen(val) > (NAMEDATALEN - 1))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_NAME),
+ errmsg("invalid enum label \"%s\"", val),
+ errdetail("Labels must be %d characters or less.",
+ NAMEDATALEN - 1)));
+
+ /*
+ * Acquire a lock on the enum type, which we won't release until commit.
+ * This ensures that two backends aren't concurrently modifying the same
+ * enum type. Since we are not changing the type's sort order, this is
+ * probably not really necessary, but there seems no reason not to take
+ * the lock to be sure.
+ */
+ LockDatabaseObject(TypeRelationId, enumTypeOid, 0, ExclusiveLock);
+
+ pg_enum = table_open(EnumRelationId, RowExclusiveLock);
+
+ /* Get the list of existing members of the enum */
+ list = SearchSysCacheList1(ENUMTYPOIDNAME,
+ ObjectIdGetDatum(enumTypeOid));
+ nelems = list->n_members;
+
+ /*
+ * Check if the label is in use. (The unique index on pg_enum would catch that anyway, but we
+ * prefer a friendlier error message.)
+ */
+ for (i = 0; i < nelems; i++)
+ {
+ enum_tup = &(list->members[i]->tuple);
+ en = (Form_pg_enum) GETSTRUCT(enum_tup);
+ if (strcmp(NameStr(en->enumlabel), val) == 0)
+ {
+ old_tup = enum_tup;
+ break;
+ }
+ }
+ if (!old_tup)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("\"%s\" is not an existing enum label",
+ val)));
+
+ ReleaseCatCacheList(list);
+
+ CatalogTupleDelete(pg_enum, &old_tup->t_self);
+
+ table_close(pg_enum, RowExclusiveLock);
+}
/*
* Test if the given enum value is on the blacklist
diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c
index 483bb65ddc..b9b3e18a94 100644
--- a/src/backend/commands/typecmds.c
+++ b/src/backend/commands/typecmds.c
@@ -1232,13 +1232,17 @@ AlterEnum(AlterEnumStmt *stmt)
ReleaseSysCache(tup);
- if (stmt->oldVal)
+ if (stmt->oldVal && stmt->newVal)
{
/* Rename an existing label */
RenameEnumLabel(enum_type_oid, stmt->oldVal, stmt->newVal);
}
- else
+ else if (stmt->oldVal)
{
+ /* Delete an existing label */
+ RemoveEnumLabel(enum_type_oid, stmt->oldVal);
+ }
+ else {
/* Add a new label */
AddEnumLabel(enum_type_oid, stmt->newVal,
stmt->newValNeighbor, stmt->newValIsAfter,
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 0d101d8171..82dd5147fa 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -5792,6 +5792,17 @@ AlterEnumStmt:
n->skipIfNewValExists = false;
$$ = (Node *) n;
}
+ | ALTER TYPE_P any_name DELETE_P VALUE_P Sconst
+ {
+ AlterEnumStmt *n = makeNode(AlterEnumStmt);
+ n->typeName = $3;
+ n->oldVal = $6;
+ n->newVal = NULL;
+ n->newValNeighbor = NULL;
+ n->newValIsAfter = false;
+ n->skipIfNewValExists = false;
+ $$ = (Node *) n;
+ }
;
opt_if_not_exists: IF_P NOT EXISTS { $$ = true; }
diff --git a/src/include/catalog/pg_enum.h b/src/include/catalog/pg_enum.h
index b28d441ba7..3dd81cd892 100644
--- a/src/include/catalog/pg_enum.h
+++ b/src/include/catalog/pg_enum.h
@@ -53,6 +53,7 @@ extern void AddEnumLabel(Oid enumTypeOid, const char *newVal,
bool skipIfExists);
extern void RenameEnumLabel(Oid enumTypeOid,
const char *oldVal, const char *newVal);
+extern void RemoveEnumLabel(Oid enumTypeOid, const char *val);
extern bool EnumBlacklisted(Oid enum_id);
extern Size EstimateEnumBlacklistSpace(void);
extern void SerializeEnumBlacklist(void *space, Size size);
--
2.25.1
Maksim Kita <kitaetoya@gmail.com> writes:
There is a question related to TODO task with name "Allow deleting enumerated values from an existing enumerated data type".
I made simple changes in parser and implement RemoveEnumLabel function, but as I understand if enum value that we want to
delete is in use by some tables, we have to prevent deletion to avoid inconsistency.
It's a lot worse than that. Even if you could ensure that the value is no
longer present in any tables (which you cannot really, because of race
conditions), that is not sufficient to ensure that it's not present in any
indexes. For example, if a specific value has managed to work its way up
into the upper levels of a btree index, it's basically never going to
disappear short of a full REINDEX. So we have to keep around sufficient
information to allow it to be compared correctly.
That TODO item should either be removed or marked with a warning stating
that it's next door to impossible. (Unfortunately, a lot of our TODO
items are like that ... there's usually a good reason why they're not
done already.)
regards, tom lane
I wrote:
That TODO item should either be removed or marked with a warning stating
that it's next door to impossible. (Unfortunately, a lot of our TODO
items are like that ... there's usually a good reason why they're not
done already.)
Actually ... I'm not sure if this idea has been discussed before, but
what if we redefined what "delete" means? We could add an "isdropped"
column to pg_enum, and have DROP do nothing but set that flag, and
modify enum_in to reject such values. Then comparisons still work,
and whether there are remaining instances of the value is the
user's problem not ours.
pg_dump and pg_upgrade would need to jump through some hoops to
deal with this, but it's not any harder than a lot of other
weird cases they deal with.
regards, tom lane
I like the idea, during prototype I added additional column and modified
enum_in method. But enum_in is called in contexts that can be important
for user (like comparisons).
Example:
postgres=# CREATE TYPE test_enum AS enum ('1', '2', '3');
CREATE TYPE
postgres=# CREATE TABLE test_table ( value test_enum );
postgres=# INSERT INTO test_table VALUES ('1'), ('2'), ('3');
INSERT 0 3
postgres=# ALTER TYPE test_enum DELETE VALUE '2';
ALTER TYPE
postgres=# INSERT INTO test_table VALUES ('2');
ERROR: enum value is dropped test_enum: "2"
LINE 1: INSERT INTO test_table VALUES ('2');
postgres=# SELECT * FROM test_table WHERE value = '2';
ERROR: enum value is dropped test_enum: "2"
LINE 1: SELECT * FROM test_table WHERE value = '2';
postgres=# UPDATE test_table SET value = '3' WHERE value = '2';
ERROR: enum value is dropped test_enum: "2"
LINE 1: UPDATE test_table SET value = '3' WHERE value = '2';
Probably we need to make more specific change for enum type to prevent
using of dropped column in context of insert or update (where we
creating dropped enum value), but not in others.
Is that possible ? What places should I look into ? Thanks.
Best regards,
Maksim Kita
Attachments:
0001-Allow-alter-type-delete-value-in-enum.patchtext/x-diff; charset=us-asciiDownload
From f829ae210fc0f6a010cba9c9e4e7772a9b59ae37 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Wed, 7 Oct 2020 23:35:56 +0300
Subject: [PATCH] Allow alter type delete value in enum
---
src/backend/catalog/pg_enum.c | 69 +++++++++++++++++++++++++++++++++
src/backend/commands/typecmds.c | 8 +++-
src/backend/parser/gram.y | 11 ++++++
src/backend/utils/adt/enum.c | 10 +++++
src/include/catalog/pg_enum.h | 2 +
5 files changed, 98 insertions(+), 2 deletions(-)
diff --git a/src/backend/catalog/pg_enum.c b/src/backend/catalog/pg_enum.c
index 27e4100a6f..907db93cf4 100644
--- a/src/backend/catalog/pg_enum.c
+++ b/src/backend/catalog/pg_enum.c
@@ -133,6 +133,7 @@ EnumValuesCreate(Oid enumTypeOid, List *vals)
values[Anum_pg_enum_enumsortorder - 1] = Float4GetDatum(elemno + 1);
namestrcpy(&enumlabel, lab);
values[Anum_pg_enum_enumlabel - 1] = NameGetDatum(&enumlabel);
+ values[Anum_pg_enum_isdropped - 1] = false;
tup = heap_form_tuple(RelationGetDescr(pg_enum), values, nulls);
@@ -485,6 +486,7 @@ restart:
values[Anum_pg_enum_enumsortorder - 1] = Float4GetDatum(newelemorder);
namestrcpy(&enumlabel, newVal);
values[Anum_pg_enum_enumlabel - 1] = NameGetDatum(&enumlabel);
+ values[Anum_pg_enum_isdropped - 1] = false;
enum_tup = heap_form_tuple(RelationGetDescr(pg_enum), values, nulls);
CatalogTupleInsert(pg_enum, enum_tup);
heap_freetuple(enum_tup);
@@ -583,6 +585,73 @@ RenameEnumLabel(Oid enumTypeOid,
table_close(pg_enum, RowExclusiveLock);
}
+extern void RemoveEnumLabel(Oid enumTypeOid, const char *val)
+{
+ Relation pg_enum;
+ HeapTuple enum_tup;
+ Form_pg_enum en;
+ CatCList *list;
+ int nelems;
+ HeapTuple old_tup;
+ int i;
+
+ /* check length of new label is ok */
+ if (strlen(val) > (NAMEDATALEN - 1))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_NAME),
+ errmsg("invalid enum label \"%s\"", val),
+ errdetail("Labels must be %d characters or less.",
+ NAMEDATALEN - 1)));
+
+ /*
+ * Acquire a lock on the enum type, which we won't release until commit.
+ * This ensures that two backends aren't concurrently modifying the same
+ * enum type. Since we are not changing the type's sort order, this is
+ * probably not really necessary, but there seems no reason not to take
+ * the lock to be sure.
+ */
+ LockDatabaseObject(TypeRelationId, enumTypeOid, 0, ExclusiveLock);
+
+ pg_enum = table_open(EnumRelationId, RowExclusiveLock);
+
+ /* Get the list of existing members of the enum */
+ list = SearchSysCacheList1(ENUMTYPOIDNAME,
+ ObjectIdGetDatum(enumTypeOid));
+ nelems = list->n_members;
+
+ /*
+ * Check if the label is in use. (The unique index on pg_enum would catch that anyway, but we
+ * prefer a friendlier error message.)
+ */
+ for (i = 0; i < nelems; i++)
+ {
+ enum_tup = &(list->members[i]->tuple);
+ en = (Form_pg_enum) GETSTRUCT(enum_tup);
+ if (strcmp(NameStr(en->enumlabel), val) == 0)
+ {
+ old_tup = enum_tup;
+ break;
+ }
+ }
+ if (!old_tup)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("\"%s\" is not an existing enum label",
+ val)));
+
+ /* OK, make a writable copy of old tuple */
+ enum_tup = heap_copytuple(old_tup);
+ en = (Form_pg_enum) GETSTRUCT(enum_tup);
+
+ ReleaseCatCacheList(list);
+
+ /* Update the pg_enum entry */
+ en->isdropped = true;
+ CatalogTupleUpdate(pg_enum, &enum_tup->t_self, enum_tup);
+ heap_freetuple(enum_tup);
+
+ table_close(pg_enum, RowExclusiveLock);
+}
/*
* Test if the given enum value is on the blacklist
diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c
index 483bb65ddc..b9b3e18a94 100644
--- a/src/backend/commands/typecmds.c
+++ b/src/backend/commands/typecmds.c
@@ -1232,13 +1232,17 @@ AlterEnum(AlterEnumStmt *stmt)
ReleaseSysCache(tup);
- if (stmt->oldVal)
+ if (stmt->oldVal && stmt->newVal)
{
/* Rename an existing label */
RenameEnumLabel(enum_type_oid, stmt->oldVal, stmt->newVal);
}
- else
+ else if (stmt->oldVal)
{
+ /* Delete an existing label */
+ RemoveEnumLabel(enum_type_oid, stmt->oldVal);
+ }
+ else {
/* Add a new label */
AddEnumLabel(enum_type_oid, stmt->newVal,
stmt->newValNeighbor, stmt->newValIsAfter,
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 0d101d8171..82dd5147fa 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -5792,6 +5792,17 @@ AlterEnumStmt:
n->skipIfNewValExists = false;
$$ = (Node *) n;
}
+ | ALTER TYPE_P any_name DELETE_P VALUE_P Sconst
+ {
+ AlterEnumStmt *n = makeNode(AlterEnumStmt);
+ n->typeName = $3;
+ n->oldVal = $6;
+ n->newVal = NULL;
+ n->newValNeighbor = NULL;
+ n->newValIsAfter = false;
+ n->skipIfNewValExists = false;
+ $$ = (Node *) n;
+ }
;
opt_if_not_exists: IF_P NOT EXISTS { $$ = true; }
diff --git a/src/backend/utils/adt/enum.c b/src/backend/utils/adt/enum.c
index 5ead794e34..397c02fa69 100644
--- a/src/backend/utils/adt/enum.c
+++ b/src/backend/utils/adt/enum.c
@@ -113,6 +113,7 @@ enum_in(PG_FUNCTION_ARGS)
Oid enumtypoid = PG_GETARG_OID(1);
Oid enumoid;
HeapTuple tup;
+ Form_pg_enum en;
/* must check length to prevent Assert failure within SearchSysCache */
if (strlen(name) >= NAMEDATALEN)
@@ -132,6 +133,15 @@ enum_in(PG_FUNCTION_ARGS)
format_type_be(enumtypoid),
name)));
+ en = (Form_pg_enum) GETSTRUCT(tup);
+
+ if (en->isdropped)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("enum value is dropped %s: \"%s\"",
+ format_type_be(enumtypoid),
+ name)));
+
/* check it's safe to use in SQL */
check_safe_enum_use(tup);
diff --git a/src/include/catalog/pg_enum.h b/src/include/catalog/pg_enum.h
index b28d441ba7..6a7b00ffbe 100644
--- a/src/include/catalog/pg_enum.h
+++ b/src/include/catalog/pg_enum.h
@@ -34,6 +34,7 @@ CATALOG(pg_enum,3501,EnumRelationId)
Oid enumtypid; /* OID of owning enum type */
float4 enumsortorder; /* sort position of this enum value */
NameData enumlabel; /* text representation of enum value */
+ bool isdropped; /* is enum value dropped */
} FormData_pg_enum;
/* ----------------
@@ -53,6 +54,7 @@ extern void AddEnumLabel(Oid enumTypeOid, const char *newVal,
bool skipIfExists);
extern void RenameEnumLabel(Oid enumTypeOid,
const char *oldVal, const char *newVal);
+extern void RemoveEnumLabel(Oid enumTypeOid, const char *val);
extern bool EnumBlacklisted(Oid enum_id);
extern Size EstimateEnumBlacklistSpace(void);
extern void SerializeEnumBlacklist(void *space, Size size);
--
2.25.1
On Wed, Oct 07, 2020 at 11:47:07PM +0300, Maksim Kita wrote:
I like the idea, during prototype I added additional column and modified
enum_in method. But enum_in is called in contexts that can be important
for user (like comparisons).
...
postgres=# ALTER TYPE test_enum DELETE VALUE '2';
ALTER TYPE
I think it should be called "DROP VALUE"
postgres=# SELECT * FROM test_table WHERE value = '2';
ERROR: enum value is dropped test_enum: "2"
LINE 1: SELECT * FROM test_table WHERE value = '2';
This fails if the value was never added, so why wouldn't it also fail if the
value is added and then removed ?
Maybe you'd want to rename old enum names to something "unlikely", like what we
do for dropped attributes in RemoveAttributeById.
How do you want to handle "adding a value back" ?
I think you should determine/propose/discuss the desired behaviors before
implementing it.
I think you'd also need to update these:
src/bin/pg_dump/pg_dump.c
src/bin/psql/describe.c
src/bin/psql/tab-complete.c
doc/src/sgml/catalogs.sgml
src/test/regress/sql/enum.sql
src/test/regress/expected/enum.out
--
Justin