From 2a7d233ea36f3ba552b32e527c5e41998c7e0baa Mon Sep 17 00:00:00 2001 From: David Rowley Date: Thu, 23 Mar 2023 17:34:52 +1300 Subject: [PATCH v1 1/2] Work around name / cstring problem in datum_image_eq Indexes using name_ops store name types as cstrings. When performing an Index Only Scan on such an index, name values will arrive as cstrings which means they're not padded out to the full NAMEDATALEN bytes. This caused problems in datum_image_eq and datum_image_hash as if the Datum came from an Index Only Scan, we'd try to look at the full 64 bytes. If the Datum being compared to didn't come from an Index Only Scan then the Datums may appear not equal when they actually are equal. The same problem existed in datum_image_hash. Here we add new versions of datum_image_eq and datum_image_hash which accept FormData_pg_attribute and special case NAMEOID so that we always compare those as cstrings. This is pretty dirty, but it seems no worse than what's mentioned in btrescan(). Bug: #17855 Reported-by: Alexander Lakhin Discussion: https://postgr.es/m/17855-5f523e0f9769a566@postgresql.org --- src/backend/access/nbtree/nbtutils.c | 2 +- src/backend/executor/nodeMemoize.c | 7 +- src/backend/utils/adt/datum.c | 155 ++++++++++++++++++++++++++- src/backend/utils/adt/ri_triggers.c | 2 +- src/backend/utils/adt/rowtypes.c | 2 +- src/include/utils/datum.h | 24 ++++- 6 files changed, 182 insertions(+), 10 deletions(-) diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index 7da499c4dd..f19094b89a 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -2444,7 +2444,7 @@ _bt_keep_natts_fast(Relation rel, IndexTuple lastleft, IndexTuple firstright) break; if (!isNull1 && - !datum_image_eq(datum1, datum2, att->attbyval, att->attlen)) + !datum_image_attr_eq(datum1, datum2, att)) break; keepnatts++; diff --git a/src/backend/executor/nodeMemoize.c b/src/backend/executor/nodeMemoize.c index 4f04269e26..e519dc31d5 100644 --- a/src/backend/executor/nodeMemoize.c +++ b/src/backend/executor/nodeMemoize.c @@ -176,7 +176,7 @@ MemoizeHash_hash(struct memoize_hash *tb, const MemoizeKey *key) attr = &pslot->tts_tupleDescriptor->attrs[i]; - hkey = datum_image_hash(pslot->tts_values[i], attr->attbyval, attr->attlen); + hkey = datum_image_attr_hash(pslot->tts_values[i], attr); hashkey ^= hkey; } @@ -244,8 +244,9 @@ MemoizeHash_equal(struct memoize_hash *tb, const MemoizeKey *key1, /* perform binary comparison on the two datums */ attr = &tslot->tts_tupleDescriptor->attrs[i]; - if (!datum_image_eq(tslot->tts_values[i], pslot->tts_values[i], - attr->attbyval, attr->attlen)) + if (!datum_image_attr_eq(tslot->tts_values[i], + pslot->tts_values[i], + attr)) return false; } return true; diff --git a/src/backend/utils/adt/datum.c b/src/backend/utils/adt/datum.c index 9f06ee7626..3947120a42 100644 --- a/src/backend/utils/adt/datum.c +++ b/src/backend/utils/adt/datum.c @@ -43,6 +43,8 @@ #include "postgres.h" #include "access/detoast.h" +#include "catalog/pg_attribute.h" +#include "catalog/pg_type.h" #include "common/hashfn.h" #include "fmgr.h" #include "utils/builtins.h" @@ -256,7 +258,7 @@ datumIsEqual(Datum value1, Datum value2, bool typByVal, int typLen) } /*------------------------------------------------------------------------- - * datum_image_eq + * datum_image_eq -- Use datum_image_attr_eq instead * * Compares two datums for identical contents, based on byte images. Return * true if the two datums are equal, false otherwise. @@ -326,7 +328,91 @@ datum_image_eq(Datum value1, Datum value2, bool typByVal, int typLen) } /*------------------------------------------------------------------------- - * datum_image_hash + * datum_image_attr_eq + * + * Compares two datums for identical contents, based on byte images. Return + * true if the two datums are equal, false otherwise. + *------------------------------------------------------------------------- + */ +bool +datum_image_attr_eq(Datum value1, Datum value2, FormData_pg_attribute *atttype) +{ + bool typByVal = atttype->attbyval; + int16 typLen; + Size len1, + len2; + bool result = true; + + /* + * Always special case names to compare as cstrings instead of comparing + * NAMEDATALEN bytes. We must do this due to btree name_ops using an + * underlying storage datatype of cstring. If the Datum comes from an + * index only scan, then the Datum won't be padded out with NULs for the + * full NAMEDATALEN bytes. + */ + if (atttype->atttypid == NAMEOID) + typLen = -2; + else + typLen = atttype->attlen; + + if (typByVal) + { + result = (value1 == value2); + } + else if (typLen > 0) + { + result = (memcmp(DatumGetPointer(value1), + DatumGetPointer(value2), + typLen) == 0); + } + else if (typLen == -1) + { + len1 = toast_raw_datum_size(value1); + len2 = toast_raw_datum_size(value2); + /* No need to de-toast if lengths don't match. */ + if (len1 != len2) + result = false; + else + { + struct varlena *arg1val; + struct varlena *arg2val; + + arg1val = PG_DETOAST_DATUM_PACKED(value1); + arg2val = PG_DETOAST_DATUM_PACKED(value2); + + result = (memcmp(VARDATA_ANY(arg1val), + VARDATA_ANY(arg2val), + len1 - VARHDRSZ) == 0); + + /* Only free memory if it's a copy made here. */ + if ((Pointer) arg1val != (Pointer) value1) + pfree(arg1val); + if ((Pointer) arg2val != (Pointer) value2) + pfree(arg2val); + } + } + else if (typLen == -2) + { + char *s1, + *s2; + + /* Compare cstring datums */ + s1 = DatumGetCString(value1); + s2 = DatumGetCString(value2); + len1 = strlen(s1) + 1; + len2 = strlen(s2) + 1; + if (len1 != len2) + return false; + result = (memcmp(s1, s2, len1) == 0); + } + else + elog(ERROR, "unexpected typLen: %d", typLen); + + return result; +} + +/*------------------------------------------------------------------------- + * datum_image_hash -- Use datum_image_attr_hash instead * * Generate a hash value based on the binary representation of 'value'. Most * use cases will want to use the hash function specific to the Datum's type, @@ -376,6 +462,71 @@ datum_image_hash(Datum value, bool typByVal, int typLen) return result; } +/*------------------------------------------------------------------------- + * datum_image_attr_hash + * + * Generate a hash value based on the binary representation of 'value'. Most + * use cases will want to use the hash function specific to the Datum's type, + * however, some corner cases require generating a hash value based on the + * actual bits rather than the logical value. + *------------------------------------------------------------------------- + */ +uint32 +datum_image_attr_hash(Datum value, FormData_pg_attribute *atttype) +{ + bool typByVal = atttype->attbyval; + int16 typLen; + uint32 result; + Size len; + + /* + * Always special case names to compare as cstrings instead of comparing + * NAMEDATALEN bytes. We must do this due to btree name_ops using an + * underlying storage datatype of cstring. If the Datum comes from an + * index only scan, then the Datum won't be padded out with NULs for the + * full NAMEDATALEN bytes. + */ + if (atttype->atttypid == NAMEOID) + typLen = -2; + else + typLen = atttype->attlen; + + if (typByVal) + result = hash_bytes((unsigned char *) &value, sizeof(Datum)); + else if (typLen > 0) + result = hash_bytes((unsigned char *) DatumGetPointer(value), typLen); + else if (typLen == -1) + { + struct varlena *val; + + len = toast_raw_datum_size(value); + + val = PG_DETOAST_DATUM_PACKED(value); + + result = hash_bytes((unsigned char *) VARDATA_ANY(val), len - VARHDRSZ); + + /* Only free memory if it's a copy made here. */ + if ((Pointer) val != (Pointer) value) + pfree(val); + } + else if (typLen == -2) + { + char *s; + + s = DatumGetCString(value); + len = strlen(s) + 1; + + result = hash_bytes((unsigned char *) s, len); + } + else + { + elog(ERROR, "unexpected typLen: %d", typLen); + result = 0; /* keep compiler quiet */ + } + + return result; +} + /*------------------------------------------------------------------------- * btequalimage * diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c index 375b17b9f3..82b1cbcb9e 100644 --- a/src/backend/utils/adt/ri_triggers.c +++ b/src/backend/utils/adt/ri_triggers.c @@ -2838,7 +2838,7 @@ ri_KeysEqual(Relation rel, TupleTableSlot *oldslot, TupleTableSlot *newslot, */ Form_pg_attribute att = TupleDescAttr(oldslot->tts_tupleDescriptor, attnums[i] - 1); - if (!datum_image_eq(oldvalue, newvalue, att->attbyval, att->attlen)) + if (!datum_image_attr_eq(oldvalue, newvalue, att)) return false; } else diff --git a/src/backend/utils/adt/rowtypes.c b/src/backend/utils/adt/rowtypes.c index ad176651d8..11c4460fed 100644 --- a/src/backend/utils/adt/rowtypes.c +++ b/src/backend/utils/adt/rowtypes.c @@ -1723,7 +1723,7 @@ record_image_eq(PG_FUNCTION_ARGS) } /* Compare the pair of elements */ - result = datum_image_eq(values1[i1], values2[i2], att1->attbyval, att2->attlen); + result = datum_image_attr_eq(values1[i1], values2[i2], att1); if (!result) break; } diff --git a/src/include/utils/datum.h b/src/include/utils/datum.h index 70a47f33c1..046e789f7c 100644 --- a/src/include/utils/datum.h +++ b/src/include/utils/datum.h @@ -18,6 +18,8 @@ #ifndef DATUM_H #define DATUM_H +struct FormData_pg_attribute; + /* * datumGetSize - find the "real" length of a datum */ @@ -47,7 +49,7 @@ extern bool datumIsEqual(Datum value1, Datum value2, bool typByVal, int typLen); /* - * datum_image_eq + * datum_image_eq -- Use datum_image_attr_eq instead * * Compares two datums for identical contents, based on byte images. Return * true if the two datums are equal, false otherwise. @@ -56,13 +58,31 @@ extern bool datum_image_eq(Datum value1, Datum value2, bool typByVal, int typLen); /* - * datum_image_hash + * datum_image_attr_eq + * + * Compares two datums for identical contents, based on byte images.Return + * true if the two datums are equal, false otherwise. + */ +extern bool datum_image_attr_eq(Datum value1, Datum value2, + struct FormData_pg_attribute *atttype); + +/* + * datum_image_hash -- Use datum_image_attr_hash instead * * Generates hash value for 'value' based on its bits rather than logical * value. */ extern uint32 datum_image_hash(Datum value, bool typByVal, int typLen); +/* + * datum_image_attr_hash + * + * Generates hash value for 'value' based on its bits rather than logical + * value. + */ +extern uint32 datum_image_attr_hash(Datum value, + struct FormData_pg_attribute *atttype); + /* * Serialize and restore datums so that we can transfer them to parallel * workers. -- 2.37.2