COLLATE: Hash partition vs UPDATE
Hi,
The following case
-- test.sql --
CREATE TABLE test (a text PRIMARY KEY, b text) PARTITION BY HASH (a);
CREATE TABLE test_p0 PARTITION OF test FOR VALUES WITH (MODULUS 2,
REMAINDER 0);
CREATE TABLE test_p1 PARTITION OF test FOR VALUES WITH (MODULUS 2,
REMAINDER 1);
-- CREATE INDEX idx_test_b ON test USING HASH (b);
INSERT INTO test VALUES ('aaaa', 'aaaa');
-- Regression
UPDATE test SET b = 'bbbb' WHERE a = 'aaaa';
-- test.sql --
fails on master, which includes [1]https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=5e1963fb764e9cc092e0f7b58b28985c311431d9, with
psql:test.sql:9: ERROR: could not determine which collation to use for
string hashing
HINT: Use the COLLATE clause to set the collation explicitly.
It passes on 11.x.
I'll add it to the open items list.
[1]: https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=5e1963fb764e9cc092e0f7b58b28985c311431d9
https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=5e1963fb764e9cc092e0f7b58b28985c311431d9
Best regards,
Jesper
Hi Jesper,
On 2019/04/09 1:33, Jesper Pedersen wrote:
Hi,
The following case
-- test.sql --
CREATE TABLE test (a text PRIMARY KEY, b text) PARTITION BY HASH (a);
CREATE TABLE test_p0 PARTITION OF test FOR VALUES WITH (MODULUS 2,
REMAINDER 0);
CREATE TABLE test_p1 PARTITION OF test FOR VALUES WITH (MODULUS 2,
REMAINDER 1);
-- CREATE INDEX idx_test_b ON test USING HASH (b);INSERT INTO test VALUES ('aaaa', 'aaaa');
-- Regression
UPDATE test SET b = 'bbbb' WHERE a = 'aaaa';
-- test.sql --fails on master, which includes [1], with
psql:test.sql:9: ERROR: could not determine which collation to use for
string hashing
HINT: Use the COLLATE clause to set the collation explicitly.It passes on 11.x.
Thanks for the report.
This seems to broken since the following commit (I see you already cc'd
Peter):
commit 5e1963fb764e9cc092e0f7b58b28985c311431d9
Author: Peter Eisentraut <peter@eisentraut.org>
Date: Fri Mar 22 12:09:32 2019 +0100
Collations with nondeterministic comparison
As of this commit, hashing functions hashtext() and hashtextextended()
require a valid collation to be passed in. ISTM,
satisfies_hash_partition() that's called by hash partition constraint
checking should have been changed to use FunctionCall2Coll() interface to
account for the requirements of the above commit. I see that it did that
for compute_partition_hash_value(), which is used by hash partition tuple
routing. That also seems to be covered by regression tests, but there are
no tests that cover satisfies_hash_partition().
Attached patch is an attempt to fix this. I've also added Amul Sul who
can maybe comment on the satisfies_hash_partition() changes.
BTW, it seems we don't need to back-patch this to PG 11 which introduced
hash partitioning, because text hashing functions don't need collation
there, right?
Thanks,
Amit
Attachments:
satisfies_hash_partition-collate-1.patchtext/plain; charset=UTF-8; name=satisfies_hash_partition-collate-1.patchDownload
diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c
index c8770ccfee..331dab3954 100644
--- a/src/backend/partitioning/partbounds.c
+++ b/src/backend/partitioning/partbounds.c
@@ -2778,6 +2778,7 @@ satisfies_hash_partition(PG_FUNCTION_ARGS)
bool variadic_typbyval;
char variadic_typalign;
FmgrInfo partsupfunc[PARTITION_MAX_KEYS];
+ Oid partcollid[PARTITION_MAX_KEYS];
} ColumnsHashData;
Oid parentId;
int modulus;
@@ -2865,6 +2866,7 @@ satisfies_hash_partition(PG_FUNCTION_ARGS)
fmgr_info_copy(&my_extra->partsupfunc[j],
&key->partsupfunc[j],
fcinfo->flinfo->fn_mcxt);
+ my_extra->partcollid[j] = key->partcollation[j];
}
}
@@ -2888,6 +2890,7 @@ satisfies_hash_partition(PG_FUNCTION_ARGS)
/* check argument types */
for (j = 0; j < key->partnatts; ++j)
+ {
if (key->parttypid[j] != my_extra->variadic_type)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@@ -2895,6 +2898,8 @@ satisfies_hash_partition(PG_FUNCTION_ARGS)
j + 1,
format_type_be(key->parttypid[j]),
format_type_be(my_extra->variadic_type))));
+ my_extra->partcollid[j] = key->partcollation[j];
+ }
fmgr_info_copy(&my_extra->partsupfunc[0],
&key->partsupfunc[0],
@@ -2928,9 +2933,10 @@ satisfies_hash_partition(PG_FUNCTION_ARGS)
Assert(OidIsValid(my_extra->partsupfunc[i].fn_oid));
- hash = FunctionCall2(&my_extra->partsupfunc[i],
- PG_GETARG_DATUM(argno),
- seed);
+ hash = FunctionCall2Coll(&my_extra->partsupfunc[i],
+ my_extra->partcollid[i],
+ PG_GETARG_DATUM(argno),
+ seed);
/* Form a single 64-bit hash value */
rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
diff --git a/src/test/regress/expected/hash_part.out b/src/test/regress/expected/hash_part.out
index 731d26fc3d..b536b6955c 100644
--- a/src/test/regress/expected/hash_part.out
+++ b/src/test/regress/expected/hash_part.out
@@ -99,6 +99,20 @@ ERROR: number of partitioning columns (2) does not match number of partition ke
SELECT satisfies_hash_partition('mcinthash'::regclass, 4, 0,
variadic array[now(), now()]);
ERROR: column 1 of the partition key has type "integer", but supplied value is of type "timestamp with time zone"
+-- check satisfies_hash_partition passes correct collation
+create table text_hashp (a text) partition by hash (a);
+create table text_hashp0 partition of text_hashp for values with (modulus 2, remainder 0);
+create table text_hashp1 partition of text_hashp for values with (modulus 2, remainder 1);
+-- The result here should always be true, because 'xxx' must belong at least
+-- one of the partitions
+select satisfies_hash_partition('text_hashp'::regclass, 2, 0, 'xxx'::text) OR
+ satisfies_hash_partition('text_hashp'::regclass, 2, 1, 'xxx'::text) AS satisfies;
+ satisfies
+-----------
+ t
+(1 row)
+
-- cleanup
DROP TABLE mchash;
DROP TABLE mcinthash;
+DROP TABLE text_hashp;
diff --git a/src/test/regress/sql/hash_part.sql b/src/test/regress/sql/hash_part.sql
index f457ac344c..30601b913e 100644
--- a/src/test/regress/sql/hash_part.sql
+++ b/src/test/regress/sql/hash_part.sql
@@ -75,6 +75,16 @@ SELECT satisfies_hash_partition('mcinthash'::regclass, 4, 0,
SELECT satisfies_hash_partition('mcinthash'::regclass, 4, 0,
variadic array[now(), now()]);
+-- check satisfies_hash_partition passes correct collation
+create table text_hashp (a text) partition by hash (a);
+create table text_hashp0 partition of text_hashp for values with (modulus 2, remainder 0);
+create table text_hashp1 partition of text_hashp for values with (modulus 2, remainder 1);
+-- The result here should always be true, because 'xxx' must belong to
+-- one of the two defined partitions
+select satisfies_hash_partition('text_hashp'::regclass, 2, 0, 'xxx'::text) OR
+ satisfies_hash_partition('text_hashp'::regclass, 2, 1, 'xxx'::text) AS satisfies;
+
-- cleanup
DROP TABLE mchash;
DROP TABLE mcinthash;
+DROP TABLE text_hashp;
Hi Amit,
On 4/8/19 11:18 PM, Amit Langote wrote:
As of this commit, hashing functions hashtext() and hashtextextended()
require a valid collation to be passed in. ISTM,
satisfies_hash_partition() that's called by hash partition constraint
checking should have been changed to use FunctionCall2Coll() interface to
account for the requirements of the above commit. I see that it did that
for compute_partition_hash_value(), which is used by hash partition tuple
routing. That also seems to be covered by regression tests, but there are
no tests that cover satisfies_hash_partition().Attached patch is an attempt to fix this. I've also added Amul Sul who
can maybe comment on the satisfies_hash_partition() changes.
Yeah, that works here - apart from an issue with the test case; fixed in
the attached.
Best regards,
Jesper
Attachments:
satisfies_hash_partition-collate-2.patchtext/x-patch; name=satisfies_hash_partition-collate-2.patchDownload
From 1902dcf1fd31c95fd95e1231630b3c446857cd59 Mon Sep 17 00:00:00 2001
From: jesperpedersen <jesper.pedersen@redhat.com>
Date: Tue, 9 Apr 2019 07:35:28 -0400
Subject: [PATCH] 0001
---
src/backend/partitioning/partbounds.c | 12 +++++++++---
src/test/regress/expected/hash_part.out | 14 ++++++++++++++
src/test/regress/sql/hash_part.sql | 10 ++++++++++
3 files changed, 33 insertions(+), 3 deletions(-)
diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c
index c8770ccfee..331dab3954 100644
--- a/src/backend/partitioning/partbounds.c
+++ b/src/backend/partitioning/partbounds.c
@@ -2778,6 +2778,7 @@ satisfies_hash_partition(PG_FUNCTION_ARGS)
bool variadic_typbyval;
char variadic_typalign;
FmgrInfo partsupfunc[PARTITION_MAX_KEYS];
+ Oid partcollid[PARTITION_MAX_KEYS];
} ColumnsHashData;
Oid parentId;
int modulus;
@@ -2865,6 +2866,7 @@ satisfies_hash_partition(PG_FUNCTION_ARGS)
fmgr_info_copy(&my_extra->partsupfunc[j],
&key->partsupfunc[j],
fcinfo->flinfo->fn_mcxt);
+ my_extra->partcollid[j] = key->partcollation[j];
}
}
@@ -2888,6 +2890,7 @@ satisfies_hash_partition(PG_FUNCTION_ARGS)
/* check argument types */
for (j = 0; j < key->partnatts; ++j)
+ {
if (key->parttypid[j] != my_extra->variadic_type)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@@ -2895,6 +2898,8 @@ satisfies_hash_partition(PG_FUNCTION_ARGS)
j + 1,
format_type_be(key->parttypid[j]),
format_type_be(my_extra->variadic_type))));
+ my_extra->partcollid[j] = key->partcollation[j];
+ }
fmgr_info_copy(&my_extra->partsupfunc[0],
&key->partsupfunc[0],
@@ -2928,9 +2933,10 @@ satisfies_hash_partition(PG_FUNCTION_ARGS)
Assert(OidIsValid(my_extra->partsupfunc[i].fn_oid));
- hash = FunctionCall2(&my_extra->partsupfunc[i],
- PG_GETARG_DATUM(argno),
- seed);
+ hash = FunctionCall2Coll(&my_extra->partsupfunc[i],
+ my_extra->partcollid[i],
+ PG_GETARG_DATUM(argno),
+ seed);
/* Form a single 64-bit hash value */
rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
diff --git a/src/test/regress/expected/hash_part.out b/src/test/regress/expected/hash_part.out
index 731d26fc3d..adb4410b4b 100644
--- a/src/test/regress/expected/hash_part.out
+++ b/src/test/regress/expected/hash_part.out
@@ -99,6 +99,20 @@ ERROR: number of partitioning columns (2) does not match number of partition ke
SELECT satisfies_hash_partition('mcinthash'::regclass, 4, 0,
variadic array[now(), now()]);
ERROR: column 1 of the partition key has type "integer", but supplied value is of type "timestamp with time zone"
+-- check satisfies_hash_partition passes correct collation
+create table text_hashp (a text) partition by hash (a);
+create table text_hashp0 partition of text_hashp for values with (modulus 2, remainder 0);
+create table text_hashp1 partition of text_hashp for values with (modulus 2, remainder 1);
+-- The result here should always be true, because 'xxx' must belong to at least
+-- one of the two defined partitions
+select satisfies_hash_partition('text_hashp'::regclass, 2, 0, 'xxx'::text) OR
+ satisfies_hash_partition('text_hashp'::regclass, 2, 1, 'xxx'::text) AS satisfies;
+ satisfies
+-----------
+ t
+(1 row)
+
-- cleanup
DROP TABLE mchash;
DROP TABLE mcinthash;
+DROP TABLE text_hashp;
diff --git a/src/test/regress/sql/hash_part.sql b/src/test/regress/sql/hash_part.sql
index f457ac344c..8cd9e8a8a2 100644
--- a/src/test/regress/sql/hash_part.sql
+++ b/src/test/regress/sql/hash_part.sql
@@ -75,6 +75,16 @@ SELECT satisfies_hash_partition('mcinthash'::regclass, 4, 0,
SELECT satisfies_hash_partition('mcinthash'::regclass, 4, 0,
variadic array[now(), now()]);
+-- check satisfies_hash_partition passes correct collation
+create table text_hashp (a text) partition by hash (a);
+create table text_hashp0 partition of text_hashp for values with (modulus 2, remainder 0);
+create table text_hashp1 partition of text_hashp for values with (modulus 2, remainder 1);
+-- The result here should always be true, because 'xxx' must belong to at least
+-- one of the two defined partitions
+select satisfies_hash_partition('text_hashp'::regclass, 2, 0, 'xxx'::text) OR
+ satisfies_hash_partition('text_hashp'::regclass, 2, 1, 'xxx'::text) AS satisfies;
+
-- cleanup
DROP TABLE mchash;
DROP TABLE mcinthash;
+DROP TABLE text_hashp;
--
2.17.2
On Tue, Apr 9, 2019 at 9:44 PM Jesper Pedersen
<jesper.pedersen@redhat.com> wrote:
Hi Amit,
On 4/8/19 11:18 PM, Amit Langote wrote:
As of this commit, hashing functions hashtext() and hashtextextended()
require a valid collation to be passed in. ISTM,
satisfies_hash_partition() that's called by hash partition constraint
checking should have been changed to use FunctionCall2Coll() interface to
account for the requirements of the above commit. I see that it did that
for compute_partition_hash_value(), which is used by hash partition tuple
routing. That also seems to be covered by regression tests, but there are
no tests that cover satisfies_hash_partition().Attached patch is an attempt to fix this. I've also added Amul Sul who
can maybe comment on the satisfies_hash_partition() changes.Yeah, that works here - apart from an issue with the test case; fixed in
the attached.
Ah, crap. Last minute changes are bad.
Thanks for fixing.
Thanks,
Amit
Jesper Pedersen <jesper.pedersen@redhat.com> writes:
Yeah, that works here - apart from an issue with the test case; fixed in
the attached.
Couple issues spotted in an eyeball review of that:
* There is code that supposes that partsupfunc[] is the last
field of ColumnsHashData, eg
fcinfo->flinfo->fn_extra =
MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt,
offsetof(ColumnsHashData, partsupfunc) +
sizeof(FmgrInfo) * nargs);
I'm a bit surprised that this patch manages to run without crashing,
because this would certainly not allocate space for partcollid[].
I think we would likely be well advised to do
- FmgrInfo partsupfunc[PARTITION_MAX_KEYS];
+ FmgrInfo partsupfunc[FLEXIBLE_ARRAY_MEMBER];
to make it more obvious that that has to be the last field. Or else
drop the cuteness with variable-size allocations of ColumnsHashData.
FmgrInfo is only 48 bytes, I'm not really sure that it's worth the
risk of bugs to "optimize" this.
* I see collation-less calls of the partsupfunc at both partbounds.c:2931
and partbounds.c:2970, but this patch touches only the first one. How
can that be right?
regards, tom lane
Thanks for the review.
On 2019/04/15 5:50, Tom Lane wrote:
Jesper Pedersen <jesper.pedersen@redhat.com> writes:
Yeah, that works here - apart from an issue with the test case; fixed in
the attached.Couple issues spotted in an eyeball review of that:
* There is code that supposes that partsupfunc[] is the last
field of ColumnsHashData, egfcinfo->flinfo->fn_extra =
MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt,
offsetof(ColumnsHashData, partsupfunc) +
sizeof(FmgrInfo) * nargs);I'm a bit surprised that this patch manages to run without crashing,
because this would certainly not allocate space for partcollid[].I think we would likely be well advised to do
- FmgrInfo partsupfunc[PARTITION_MAX_KEYS]; + FmgrInfo partsupfunc[FLEXIBLE_ARRAY_MEMBER];
I went with this:
- FmgrInfo partsupfunc[PARTITION_MAX_KEYS];
Oid partcollid[PARTITION_MAX_KEYS];
+ FmgrInfo partsupfunc[FLEXIBLE_ARRAY_MEMBER];
to make it more obvious that that has to be the last field. Or else
drop the cuteness with variable-size allocations of ColumnsHashData.
FmgrInfo is only 48 bytes, I'm not really sure that it's worth the
risk of bugs to "optimize" this.
I wonder if workloads on hash partitioned tables that require calling
satisfies_hash_partition repeatedly may not be as common as thought when
writing this code? The only case I see where it's being repeatedly called
is bulk inserts into a hash-partitioned table, that too, only if BR
triggers on partitions necessitate rechecking the partition constraint.
* I see collation-less calls of the partsupfunc at both partbounds.c:2931
and partbounds.c:2970, but this patch touches only the first one. How
can that be right?
Oops, that's wrong.
Attached updated patch.
Thanks,
Amit
Attachments:
satisfies_hash_partition-collate-3.patchtext/plain; charset=UTF-8; name=satisfies_hash_partition-collate-3.patchDownload
diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c
index c8770ccfee..460f023134 100644
--- a/src/backend/partitioning/partbounds.c
+++ b/src/backend/partitioning/partbounds.c
@@ -2777,7 +2777,8 @@ satisfies_hash_partition(PG_FUNCTION_ARGS)
int16 variadic_typlen;
bool variadic_typbyval;
char variadic_typalign;
- FmgrInfo partsupfunc[PARTITION_MAX_KEYS];
+ Oid partcollid[PARTITION_MAX_KEYS];
+ FmgrInfo partsupfunc[FLEXIBLE_ARRAY_MEMBER];
} ColumnsHashData;
Oid parentId;
int modulus;
@@ -2851,6 +2852,8 @@ satisfies_hash_partition(PG_FUNCTION_ARGS)
my_extra->relid = parentId;
my_extra->nkeys = key->partnatts;
+ memcpy(my_extra->partcollid, key->partcollation,
+ key->partnatts * sizeof(Oid));
/* check argument types and save fmgr_infos */
for (j = 0; j < key->partnatts; ++j)
{
@@ -2885,6 +2888,7 @@ satisfies_hash_partition(PG_FUNCTION_ARGS)
&my_extra->variadic_typlen,
&my_extra->variadic_typbyval,
&my_extra->variadic_typalign);
+ my_extra->partcollid[0] = key->partcollation[0];
/* check argument types */
for (j = 0; j < key->partnatts; ++j)
@@ -2928,9 +2932,10 @@ satisfies_hash_partition(PG_FUNCTION_ARGS)
Assert(OidIsValid(my_extra->partsupfunc[i].fn_oid));
- hash = FunctionCall2(&my_extra->partsupfunc[i],
- PG_GETARG_DATUM(argno),
- seed);
+ hash = FunctionCall2Coll(&my_extra->partsupfunc[i],
+ my_extra->partcollid[i],
+ PG_GETARG_DATUM(argno),
+ seed);
/* Form a single 64-bit hash value */
rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
@@ -2967,9 +2972,10 @@ satisfies_hash_partition(PG_FUNCTION_ARGS)
Assert(OidIsValid(my_extra->partsupfunc[0].fn_oid));
- hash = FunctionCall2(&my_extra->partsupfunc[0],
- datum[i],
- seed);
+ hash = FunctionCall2Coll(&my_extra->partsupfunc[0],
+ my_extra->partcollid[0],
+ datum[i],
+ seed);
/* Form a single 64-bit hash value */
rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
diff --git a/src/test/regress/expected/hash_part.out b/src/test/regress/expected/hash_part.out
index 731d26fc3d..8db316be27 100644
--- a/src/test/regress/expected/hash_part.out
+++ b/src/test/regress/expected/hash_part.out
@@ -99,6 +99,20 @@ ERROR: number of partitioning columns (2) does not match number of partition ke
SELECT satisfies_hash_partition('mcinthash'::regclass, 4, 0,
variadic array[now(), now()]);
ERROR: column 1 of the partition key has type "integer", but supplied value is of type "timestamp with time zone"
+-- check satisfies_hash_partition passes correct collation
+create table text_hashp (a text) partition by hash (a);
+create table text_hashp0 partition of text_hashp for values with (modulus 2, remainder 0);
+create table text_hashp1 partition of text_hashp for values with (modulus 2, remainder 1);
+-- The result here should always be true, because 'xxx' must belong to
+-- one of the two defined partitions
+select satisfies_hash_partition('text_hashp'::regclass, 2, 0, 'xxx'::text) OR
+ satisfies_hash_partition('text_hashp'::regclass, 2, 1, 'xxx'::text) AS satisfies;
+ satisfies
+-----------
+ t
+(1 row)
+
-- cleanup
DROP TABLE mchash;
DROP TABLE mcinthash;
+DROP TABLE text_hashp;
diff --git a/src/test/regress/sql/hash_part.sql b/src/test/regress/sql/hash_part.sql
index f457ac344c..30601b913e 100644
--- a/src/test/regress/sql/hash_part.sql
+++ b/src/test/regress/sql/hash_part.sql
@@ -75,6 +75,16 @@ SELECT satisfies_hash_partition('mcinthash'::regclass, 4, 0,
SELECT satisfies_hash_partition('mcinthash'::regclass, 4, 0,
variadic array[now(), now()]);
+-- check satisfies_hash_partition passes correct collation
+create table text_hashp (a text) partition by hash (a);
+create table text_hashp0 partition of text_hashp for values with (modulus 2, remainder 0);
+create table text_hashp1 partition of text_hashp for values with (modulus 2, remainder 1);
+-- The result here should always be true, because 'xxx' must belong to
+-- one of the two defined partitions
+select satisfies_hash_partition('text_hashp'::regclass, 2, 0, 'xxx'::text) OR
+ satisfies_hash_partition('text_hashp'::regclass, 2, 1, 'xxx'::text) AS satisfies;
+
-- cleanup
DROP TABLE mchash;
DROP TABLE mcinthash;
+DROP TABLE text_hashp;
Amit Langote <Langote_Amit_f8@lab.ntt.co.jp> writes:
Attached updated patch.
LGTM, pushed.
regards, tom lane