Fix to enum hashing for dump and restore

Started by Andrewalmost 3 years ago3 messages
#1Andrew
pgsqlhackers@andrewrepp.com
1 attachment(s)

Hello,

I have discovered a bug in one usage of enums. If a table with hash
partitions uses an enum as a partitioning key, it can no longer be
backed up and restored correctly. This is because enums are represented
simply as oids, and the hash function for enums hashes that oid to
determine partition distribution. Given the way oids are assigned, any
dump+restore of a database with such a table may fail with the error
"ERROR: new row for relation "TABLENAME" violates partition constraint".

This can be reproduced with the following steps:
************************************************
create database test;
\c test
create type colors as enum ('red', 'green', 'blue', 'yellow');
create table part (color colors) partition by hash(color);

create table prt_0 partition of part for values
with (modulus 3, remainder 0);

create table prt_1 partition of part for values
with (modulus 3, remainder 1);

create table prt_2 partition of part for values
with (modulus 3, remainder 2);
insert into part values ('red');

/usr/local/pgsql/bin/pg_dump -d test -f /tmp/dump.sql
/usr/local/pgsql/bin/createdb test2
/usr/local/pgsql/bin/psql test2 -f /tmp/dump.sql
************************************************

I have written a patch to fix this bug (attached), by instead having the
hashenum functions look up the enumsortorder ID of the value being
hashed. These are deterministic across databases, and so allow for
stable dump and restore. This admittedly comes at the performance cost
of doing a catalog lookup, but there is precedent for this in
e.g. hashrange and hashtext.

I look forward to your feedback on this, thank you!

Sincerely,
Andrew J Repp (VMware)

Attachments:

v1-0001-Change-enum-hashing-to-consider-enumsortorder.patchapplication/octet-stream; name="=?UTF-8?Q?v1-0001-Change-enum-hashing-to-consider-enumsortorder.patch?="Download
From 9f0909cd6fa8721ea870afdbee3bb183226065b9 Mon Sep 17 00:00:00 2001
From: Andrew Repp <reppa@vmware.com>
Date: Tue, 24 Jan 2023 20:42:17 -0600
Subject: [PATCH v1 1/1] Change enum hashing to consider enumsortorder

---
 src/backend/access/hash/hashfunc.c | 25 +++++++++++++++++++--
 src/backend/utils/cache/typcache.c | 36 ++++++++++++++++++++++++++++++
 src/include/utils/typcache.h       |  2 ++
 3 files changed, 61 insertions(+), 2 deletions(-)

diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c
index e3e40d6c21..50728362b6 100644
--- a/src/backend/access/hash/hashfunc.c
+++ b/src/backend/access/hash/hashfunc.c
@@ -32,6 +32,7 @@
 #include "utils/builtins.h"
 #include "utils/float.h"
 #include "utils/pg_locale.h"
+#include "utils/typcache.h"
 #include "varatt.h"
 
 /*
@@ -129,13 +130,33 @@ hashoidextended(PG_FUNCTION_ARGS)
 Datum
 hashenum(PG_FUNCTION_ARGS)
 {
-	return hash_uint32((uint32) PG_GETARG_OID(0));
+	/*extract the oid of the enum we're hashing*/
+	uint32 enum_oid = (uint32) PG_GETARG_OID(0);
+	float4 enum_sort_order = extract_enum_sort_order(enum_oid);
+
+	/*
+	 * Maintain consistent approach with hashfloat4 by casting the float4 enum
+	 * sort order to a float8, and hashing that.
+	 */
+	float8 key8 = enum_sort_order;
+	return hash_any((unsigned char *) &key8, sizeof(key8));
 }
 
 Datum
 hashenumextended(PG_FUNCTION_ARGS)
 {
-	return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
+	/*extract the oid of the enum we're hashing*/
+	uint32 enum_oid = (uint32) PG_GETARG_OID(0);
+	uint64 seed = PG_GETARG_INT64(1);
+	float4 enum_sort_order = extract_enum_sort_order(enum_oid);
+
+	/*
+	 * Maintain consistent approach with hashfloat4extended by casting the float4 enum
+	 * sort order to a float8, and hashing that with seed value.
+	 */
+	float8 key8 = enum_sort_order;
+
+	return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed);
 }
 
 Datum
diff --git a/src/backend/utils/cache/typcache.c b/src/backend/utils/cache/typcache.c
index 4a3e0fdb7f..eb554a0a5f 100644
--- a/src/backend/utils/cache/typcache.c
+++ b/src/backend/utils/cache/typcache.c
@@ -2876,3 +2876,39 @@ shared_record_typmod_registry_detach(dsm_segment *segment, Datum datum)
 	}
 	CurrentSession->shared_typmod_registry = NULL;
 }
+
+float4
+extract_enum_sort_order(uint32 enum_oid)
+{
+	Form_pg_enum en;
+	TypeCacheEntry *tcache;
+	Oid typeoid;
+	TypeCacheEnumData *enumdata;
+	EnumItem *enum_obj;
+
+	/* Get the OID of the enum type containing our enum */
+	HeapTuple enum_tup = SearchSysCache1(ENUMOID, ObjectIdGetDatum(enum_oid));
+	if (!HeapTupleIsValid(enum_tup)){
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+					errmsg("invalid internal value for enum: %u",
+						enum_oid)));
+	}
+	en = (Form_pg_enum) GETSTRUCT(enum_tup);
+	typeoid = en->enumtypid;
+	ReleaseSysCache(enum_tup);
+
+	/* extract sort order from our enum */
+	tcache = lookup_type_cache(typeoid, 0);
+	if (tcache->enumData == NULL){
+		load_enum_cache_data(tcache);
+	}
+	enumdata = tcache->enumData;
+	enum_obj = find_enumitem(enumdata, enum_oid);
+	if (enum_obj == NULL){
+		elog(ERROR, "enum value %u not found in cache for enum %s",
+				enum_oid, format_type_be(tcache->type_id));
+	}
+
+	return enum_obj->sort_order;
+}
diff --git a/src/include/utils/typcache.h b/src/include/utils/typcache.h
index 95f3a9ee30..e7af7d4942 100644
--- a/src/include/utils/typcache.h
+++ b/src/include/utils/typcache.h
@@ -199,6 +199,8 @@ extern uint64 assign_record_type_identifier(Oid type_id, int32 typmod);
 
 extern int	compare_values_of_enum(TypeCacheEntry *tcache, Oid arg1, Oid arg2);
 
+extern float4 extract_enum_sort_order(uint32 enum_oid);
+
 extern size_t SharedRecordTypmodRegistryEstimate(void);
 
 extern void SharedRecordTypmodRegistryInit(SharedRecordTypmodRegistry *,
-- 
2.25.1

#2Tom Lane
tgl@sss.pgh.pa.us
In reply to: Andrew (#1)
Re: Fix to enum hashing for dump and restore

Andrew <pgsqlhackers@andrewrepp.com> writes:

I have discovered a bug in one usage of enums. If a table with hash
partitions uses an enum as a partitioning key, it can no longer be
backed up and restored correctly. This is because enums are represented
simply as oids, and the hash function for enums hashes that oid to
determine partition distribution. Given the way oids are assigned, any
dump+restore of a database with such a table may fail with the error
"ERROR: new row for relation "TABLENAME" violates partition constraint".

Ugh, that was not well thought out :-(. I suppose this isn't a problem
for pg_upgrade, which should preserve the enum value OIDs, but an
ordinary dump/restore will indeed hit this.

I have written a patch to fix this bug (attached), by instead having the
hashenum functions look up the enumsortorder ID of the value being
hashed. These are deterministic across databases, and so allow for
stable dump and restore.

Unfortunately, I'm not sure those are as deterministic as all that.
They are floats, so there's a question of roundoff error, not to
mention cross-platform variations in what a float looks like. (At the
absolute minimum, I think we'd have to change your patch to force
consistent byte ordering of the floats.) Actually though, roundoff
error wouldn't be a problem for the normal exact-integer values of
enumsortorder. Where it could come into play is with the fractional
values used after you insert a value into the existing sort order.
And then the whole idea fails, because a dump/restore won't duplicate
those fractional values.

Another problem with this approach is that we can't get from here to there
without a guaranteed dump/reload failure, since it's quite unlikely that
the partition assignment will be the same when based on enumsortorder
as it was when based on OIDs. Worse, it also breaks the pg_upgrade case.

I wonder if it'd work to make pg_dump force --load-via-partition-root
mode when a hashed partition key includes an enum.

regards, tom lane

#3Andrew
pgsqlhackers@andrewrepp.com
In reply to: Tom Lane (#2)
Re: Fix to enum hashing for dump and restore

Those are excellent points.
We will investigate adjusting pg_dump behavior,
as this is primarily a dump+restore issue.

Thank you!

-Andrew J Repp (VMware)

Show quoted text

On Tue, Jan 24, 2023, at 9:56 PM, Tom Lane wrote:

Andrew <pgsqlhackers@andrewrepp.com> writes:

I have discovered a bug in one usage of enums. If a table with hash
partitions uses an enum as a partitioning key, it can no longer be
backed up and restored correctly. This is because enums are represented
simply as oids, and the hash function for enums hashes that oid to
determine partition distribution. Given the way oids are assigned, any
dump+restore of a database with such a table may fail with the error
"ERROR: new row for relation "TABLENAME" violates partition constraint".

Ugh, that was not well thought out :-(. I suppose this isn't a problem
for pg_upgrade, which should preserve the enum value OIDs, but an
ordinary dump/restore will indeed hit this.

I have written a patch to fix this bug (attached), by instead having the
hashenum functions look up the enumsortorder ID of the value being
hashed. These are deterministic across databases, and so allow for
stable dump and restore.

Unfortunately, I'm not sure those are as deterministic as all that.
They are floats, so there's a question of roundoff error, not to
mention cross-platform variations in what a float looks like. (At the
absolute minimum, I think we'd have to change your patch to force
consistent byte ordering of the floats.) Actually though, roundoff
error wouldn't be a problem for the normal exact-integer values of
enumsortorder. Where it could come into play is with the fractional
values used after you insert a value into the existing sort order.
And then the whole idea fails, because a dump/restore won't duplicate
those fractional values.

Another problem with this approach is that we can't get from here to there
without a guaranteed dump/reload failure, since it's quite unlikely that
the partition assignment will be the same when based on enumsortorder
as it was when based on OIDs. Worse, it also breaks the pg_upgrade case.

I wonder if it'd work to make pg_dump force --load-via-partition-root
mode when a hashed partition key includes an enum.

regards, tom lane