Hash support for row types

Started by Peter Eisentrautabout 5 years ago10 messages

peter.eisentraut@2ndquadrant.com

about 5 years ago

1 attachment(s)

In [0]/messages/by-id/52beaf44-ccc3-0ba1-45c7-74aa251cd6ab@2ndquadrant.com it was discussed that hash support for row types/record would be
handy. So I implemented that.

The implementation hashes each field and combines the hash values. Most
of the code structure can be borrowed from the record comparison
functions/btree support. To combine the hash values, I adapted the code
from the array hashing functions. (The hash_combine()/hash_combine64()
functions also looked sensible, but they don't appear to work in a way
that satisfies the hash_func regression test. Could be documented better.)

The main motivation is to support UNION [DISTINCT] as discussed in [0]/messages/by-id/52beaf44-ccc3-0ba1-45c7-74aa251cd6ab@2ndquadrant.com,
but this also enables other hash-related functionality such as hash
joins (as one regression test accidentally revealed) and hash partitioning.

[0]: /messages/by-id/52beaf44-ccc3-0ba1-45c7-74aa251cd6ab@2ndquadrant.com
/messages/by-id/52beaf44-ccc3-0ba1-45c7-74aa251cd6ab@2ndquadrant.com

--
Peter Eisentraut http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

Attachments:

v1-0001-Hash-support-for-row-types.patchtext/plain; charset=UTF-8; name=v1-0001-Hash-support-for-row-types.patch; x-mac-creator=0; x-mac-type=0Download

From 3c9269d60abc5033804802713d9a10f72e77f3e0 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter@eisentraut.org>
Date: Mon, 19 Oct 2020 09:44:15 +0200
Subject: [PATCH v1] Hash support for row types

Add hash functions for the record type as well as a hash operator
family and operator class for the record type.  This enables all the
hash functionality for the record type such as UNION DISTINCT, hash
joins, and hash partitioning.
---
 doc/src/sgml/queries.sgml               |   9 -
 src/backend/utils/adt/rowtypes.c        | 249 ++++++++++++++++++++++++
 src/include/catalog/pg_amop.dat         |   5 +
 src/include/catalog/pg_amproc.dat       |   4 +
 src/include/catalog/pg_opclass.dat      |   2 +
 src/include/catalog/pg_operator.dat     |   2 +-
 src/include/catalog/pg_opfamily.dat     |   2 +
 src/include/catalog/pg_proc.dat         |   7 +
 src/test/regress/expected/hash_func.out |  12 ++
 src/test/regress/expected/join.out      |   1 +
 src/test/regress/expected/with.out      |  38 ++++
 src/test/regress/sql/hash_func.sql      |   9 +
 src/test/regress/sql/join.sql           |   1 +
 src/test/regress/sql/with.sql           |  10 +
 14 files changed, 341 insertions(+), 10 deletions(-)

diff --git a/doc/src/sgml/queries.sgml b/doc/src/sgml/queries.sgml
index f06afe2c3f..8e70e57b72 100644
--- a/doc/src/sgml/queries.sgml
+++ b/doc/src/sgml/queries.sgml
@@ -2182,15 +2182,6 @@ <title>Search Order</title>
 </programlisting>
    </para>
 
-   <note>
-    <para>
-     The queries shown in this and the following section involving
-     <literal>ROW</literal> constructors in the target list only support
-     <literal>UNION ALL</literal> (not plain <literal>UNION</literal>) in the
-     current implementation.
-    </para>
-   </note>
-
    <tip>
     <para>
      Omit the <literal>ROW()</literal> syntax in the common case where only one
diff --git a/src/backend/utils/adt/rowtypes.c b/src/backend/utils/adt/rowtypes.c
index 674cf0a55d..5c86259929 100644
--- a/src/backend/utils/adt/rowtypes.c
+++ b/src/backend/utils/adt/rowtypes.c
@@ -19,6 +19,7 @@
 #include "access/detoast.h"
 #include "access/htup_details.h"
 #include "catalog/pg_type.h"
+#include "common/hashfn.h"
 #include "funcapi.h"
 #include "libpq/pqformat.h"
 #include "miscadmin.h"
@@ -1766,3 +1767,251 @@ btrecordimagecmp(PG_FUNCTION_ARGS)
 {
 	PG_RETURN_INT32(record_image_cmp(fcinfo));
 }
+
+
+/*
+ * Row type hash functions
+ */
+
+Datum
+record_hash(PG_FUNCTION_ARGS)
+{
+	HeapTupleHeader record = PG_GETARG_HEAPTUPLEHEADER(0);
+	uint32		result = 0;
+	Oid			tupType;
+	int32		tupTypmod;
+	TupleDesc	tupdesc;
+	HeapTupleData tuple;
+	int			ncolumns;
+	RecordCompareData *my_extra;
+	Datum	   *values;
+	bool	   *nulls;
+
+	check_stack_depth();		/* recurses for record-type columns */
+
+	/* Extract type info from tuple */
+	tupType = HeapTupleHeaderGetTypeId(record);
+	tupTypmod = HeapTupleHeaderGetTypMod(record);
+	tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
+	ncolumns = tupdesc->natts;
+
+	/* Build temporary HeapTuple control structure */
+	tuple.t_len = HeapTupleHeaderGetDatumLength(record);
+	ItemPointerSetInvalid(&(tuple.t_self));
+	tuple.t_tableOid = InvalidOid;
+	tuple.t_data = record;
+
+	/*
+	 * We arrange to look up the needed hashing info just once per series
+	 * of calls, assuming the record type doesn't change underneath us.
+	 */
+	my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra;
+	if (my_extra == NULL ||
+		my_extra->ncolumns < ncolumns)
+	{
+		fcinfo->flinfo->fn_extra =
+			MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+							   offsetof(RecordCompareData, columns) +
+							   ncolumns * sizeof(ColumnCompareData));
+		my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra;
+		my_extra->ncolumns = ncolumns;
+		my_extra->record1_type = InvalidOid;
+		my_extra->record1_typmod = 0;
+	}
+
+	if (my_extra->record1_type != tupType ||
+		my_extra->record1_typmod != tupTypmod)
+	{
+		MemSet(my_extra->columns, 0, ncolumns * sizeof(ColumnCompareData));
+		my_extra->record1_type = tupType;
+		my_extra->record1_typmod = tupTypmod;
+	}
+
+	/* Break down the tuple into fields */
+	values = (Datum *) palloc(ncolumns * sizeof(Datum));
+	nulls = (bool *) palloc(ncolumns * sizeof(bool));
+	heap_deform_tuple(&tuple, tupdesc, values, nulls);
+
+	for (int i = 0; i < ncolumns; i++)
+	{
+		Form_pg_attribute att;
+		TypeCacheEntry *typentry;
+		uint32		element_hash;
+
+		att = TupleDescAttr(tupdesc, i);
+
+		if (att->attisdropped)
+			continue;
+
+		/*
+		 * Lookup the hash function if not done already
+		 */
+		typentry = my_extra->columns[i].typentry;
+		if (typentry == NULL ||
+			typentry->type_id != att->atttypid)
+		{
+			typentry = lookup_type_cache(att->atttypid,
+										 TYPECACHE_HASH_PROC_FINFO);
+			if (!OidIsValid(typentry->hash_proc_finfo.fn_oid))
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_FUNCTION),
+						 errmsg("could not identify a hash function for type %s",
+								format_type_be(typentry->type_id))));
+			my_extra->columns[i].typentry = typentry;
+		}
+
+		/* Compute hash of element */
+		if (nulls[i])
+		{
+			element_hash = 0;
+		}
+		else
+		{
+			LOCAL_FCINFO(locfcinfo, 1);
+
+			InitFunctionCallInfoData(*locfcinfo, &typentry->hash_proc_finfo, 1,
+									 att->attcollation, NULL, NULL);
+			locfcinfo->args[0].value = values[i];
+			locfcinfo->args[0].isnull = false;
+			element_hash = DatumGetUInt32(FunctionCallInvoke(locfcinfo));
+
+			/* We don't expect hash support functions to return null */
+			Assert(!locfcinfo->isnull);
+		}
+
+		/* see hash_array() */
+		result = (result << 5) - result + element_hash;
+	}
+
+	pfree(values);
+	pfree(nulls);
+	ReleaseTupleDesc(tupdesc);
+
+	/* Avoid leaking memory when handed toasted input. */
+	PG_FREE_IF_COPY(record, 0);
+
+	PG_RETURN_UINT32(result);
+}
+
+Datum
+record_hash_extended(PG_FUNCTION_ARGS)
+{
+	HeapTupleHeader record = PG_GETARG_HEAPTUPLEHEADER(0);
+	uint64		seed = PG_GETARG_INT64(1);
+	uint64		result = 0;
+	Oid			tupType;
+	int32		tupTypmod;
+	TupleDesc	tupdesc;
+	HeapTupleData tuple;
+	int			ncolumns;
+	RecordCompareData *my_extra;
+	Datum	   *values;
+	bool	   *nulls;
+
+	check_stack_depth();		/* recurses for record-type columns */
+
+	/* Extract type info from tuple */
+	tupType = HeapTupleHeaderGetTypeId(record);
+	tupTypmod = HeapTupleHeaderGetTypMod(record);
+	tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
+	ncolumns = tupdesc->natts;
+
+	/* Build temporary HeapTuple control structure */
+	tuple.t_len = HeapTupleHeaderGetDatumLength(record);
+	ItemPointerSetInvalid(&(tuple.t_self));
+	tuple.t_tableOid = InvalidOid;
+	tuple.t_data = record;
+
+	/*
+	 * We arrange to look up the needed hashing info just once per series
+	 * of calls, assuming the record type doesn't change underneath us.
+	 */
+	my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra;
+	if (my_extra == NULL ||
+		my_extra->ncolumns < ncolumns)
+	{
+		fcinfo->flinfo->fn_extra =
+			MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+							   offsetof(RecordCompareData, columns) +
+							   ncolumns * sizeof(ColumnCompareData));
+		my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra;
+		my_extra->ncolumns = ncolumns;
+		my_extra->record1_type = InvalidOid;
+		my_extra->record1_typmod = 0;
+	}
+
+	if (my_extra->record1_type != tupType ||
+		my_extra->record1_typmod != tupTypmod)
+	{
+		MemSet(my_extra->columns, 0, ncolumns * sizeof(ColumnCompareData));
+		my_extra->record1_type = tupType;
+		my_extra->record1_typmod = tupTypmod;
+	}
+
+	/* Break down the tuple into fields */
+	values = (Datum *) palloc(ncolumns * sizeof(Datum));
+	nulls = (bool *) palloc(ncolumns * sizeof(bool));
+	heap_deform_tuple(&tuple, tupdesc, values, nulls);
+
+	for (int i = 0; i < ncolumns; i++)
+	{
+		Form_pg_attribute att;
+		TypeCacheEntry *typentry;
+		uint64		element_hash;
+
+		att = TupleDescAttr(tupdesc, i);
+
+		if (att->attisdropped)
+			continue;
+
+		/*
+		 * Lookup the hash function if not done already
+		 */
+		typentry = my_extra->columns[i].typentry;
+		if (typentry == NULL ||
+			typentry->type_id != att->atttypid)
+		{
+			typentry = lookup_type_cache(att->atttypid,
+										 TYPECACHE_HASH_EXTENDED_PROC_FINFO);
+			if (!OidIsValid(typentry->hash_extended_proc_finfo.fn_oid))
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_FUNCTION),
+						 errmsg("could not identify a hash function for type %s",
+								format_type_be(typentry->type_id))));
+			my_extra->columns[i].typentry = typentry;
+		}
+
+		/* Compute hash of element */
+		if (nulls[i])
+		{
+			element_hash = 0;
+		}
+		else
+		{
+			LOCAL_FCINFO(locfcinfo, 2);
+
+			InitFunctionCallInfoData(*locfcinfo, &typentry->hash_extended_proc_finfo, 2,
+									 att->attcollation, NULL, NULL);
+			locfcinfo->args[0].value = values[i];
+			locfcinfo->args[0].isnull = false;
+			locfcinfo->args[1].value = Int64GetDatum(seed);
+			locfcinfo->args[0].isnull = false;
+			element_hash = DatumGetUInt64(FunctionCallInvoke(locfcinfo));
+
+			/* We don't expect hash support functions to return null */
+			Assert(!locfcinfo->isnull);
+		}
+
+		/* see hash_array_extended() */
+		result = (result << 5) - result + element_hash;
+	}
+
+	pfree(values);
+	pfree(nulls);
+	ReleaseTupleDesc(tupdesc);
+
+	/* Avoid leaking memory when handed toasted input. */
+	PG_FREE_IF_COPY(record, 0);
+
+	PG_RETURN_UINT64(result);
+}
diff --git a/src/include/catalog/pg_amop.dat b/src/include/catalog/pg_amop.dat
index 1dfb6fd373..dccbeabcf4 100644
--- a/src/include/catalog/pg_amop.dat
+++ b/src/include/catalog/pg_amop.dat
@@ -979,6 +979,11 @@
   amoprighttype => 'oidvector', amopstrategy => '1',
   amopopr => '=(oidvector,oidvector)', amopmethod => 'hash' },
 
+# record_ops
+{ amopfamily => 'hash/record_ops', amoplefttype => 'record',
+  amoprighttype => 'record', amopstrategy => '1',
+  amopopr => '=(record,record)', amopmethod => 'hash' },
+
 # text_ops
 { amopfamily => 'hash/text_ops', amoplefttype => 'text',
   amoprighttype => 'text', amopstrategy => '1', amopopr => '=(text,text)',
diff --git a/src/include/catalog/pg_amproc.dat b/src/include/catalog/pg_amproc.dat
index a8e0c4ff8a..e000421788 100644
--- a/src/include/catalog/pg_amproc.dat
+++ b/src/include/catalog/pg_amproc.dat
@@ -433,6 +433,10 @@
   amprocrighttype => 'uuid', amprocnum => '1', amproc => 'uuid_hash' },
 { amprocfamily => 'hash/uuid_ops', amproclefttype => 'uuid',
   amprocrighttype => 'uuid', amprocnum => '2', amproc => 'uuid_hash_extended' },
+{ amprocfamily => 'hash/record_ops', amproclefttype => 'record',
+  amprocrighttype => 'record', amprocnum => '1', amproc => 'record_hash' },
+{ amprocfamily => 'hash/record_ops', amproclefttype => 'record',
+  amprocrighttype => 'record', amprocnum => '2', amproc => 'record_hash_extended' },
 { amprocfamily => 'hash/pg_lsn_ops', amproclefttype => 'pg_lsn',
   amprocrighttype => 'pg_lsn', amprocnum => '1', amproc => 'pg_lsn_hash' },
 { amprocfamily => 'hash/pg_lsn_ops', amproclefttype => 'pg_lsn',
diff --git a/src/include/catalog/pg_opclass.dat b/src/include/catalog/pg_opclass.dat
index f2342bb328..be5712692f 100644
--- a/src/include/catalog/pg_opclass.dat
+++ b/src/include/catalog/pg_opclass.dat
@@ -114,6 +114,8 @@
   opcfamily => 'hash/oidvector_ops', opcintype => 'oidvector' },
 { opcmethod => 'btree', opcname => 'record_ops',
   opcfamily => 'btree/record_ops', opcintype => 'record' },
+{ opcmethod => 'hash', opcname => 'record_ops',
+  opcfamily => 'hash/record_ops', opcintype => 'record' },
 { opcmethod => 'btree', opcname => 'record_image_ops',
   opcfamily => 'btree/record_image_ops', opcintype => 'record',
   opcdefault => 'f' },
diff --git a/src/include/catalog/pg_operator.dat b/src/include/catalog/pg_operator.dat
index 7cc812adda..1ffd826679 100644
--- a/src/include/catalog/pg_operator.dat
+++ b/src/include/catalog/pg_operator.dat
@@ -3064,7 +3064,7 @@
 
 # generic record comparison operators
 { oid => '2988', oid_symbol => 'RECORD_EQ_OP', descr => 'equal',
-  oprname => '=', oprcanmerge => 't', oprleft => 'record', oprright => 'record',
+  oprname => '=', oprcanmerge => 't', oprcanhash => 't', oprleft => 'record', oprright => 'record',
   oprresult => 'bool', oprcom => '=(record,record)',
   oprnegate => '<>(record,record)', oprcode => 'record_eq', oprrest => 'eqsel',
   oprjoin => 'eqjoinsel' },
diff --git a/src/include/catalog/pg_opfamily.dat b/src/include/catalog/pg_opfamily.dat
index cf0fb325b3..11c7ad2c14 100644
--- a/src/include/catalog/pg_opfamily.dat
+++ b/src/include/catalog/pg_opfamily.dat
@@ -76,6 +76,8 @@
   opfmethod => 'hash', opfname => 'oidvector_ops' },
 { oid => '2994',
   opfmethod => 'btree', opfname => 'record_ops' },
+{ oid => '9611',
+  opfmethod => 'hash', opfname => 'record_ops' },
 { oid => '3194',
   opfmethod => 'btree', opfname => 'record_image_ops' },
 { oid => '1994', oid_symbol => 'TEXT_BTREE_FAM_OID',
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 22340baf1c..1d113272fd 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -9670,6 +9670,13 @@
   proname => 'btrecordcmp', prorettype => 'int4',
   proargtypes => 'record record', prosrc => 'btrecordcmp' },
 
+{ oid => '9609', descr => 'hash',
+  proname => 'record_hash', prorettype => 'int4', proargtypes => 'record',
+  prosrc => 'record_hash' },
+{ oid => '9610', descr => 'hash',
+  proname => 'record_hash_extended', prorettype => 'int8', proargtypes => 'record int8',
+  prosrc => 'record_hash_extended' },
+
 # record comparison using raw byte images
 { oid => '3181',
   proname => 'record_image_eq', prorettype => 'bool',
diff --git a/src/test/regress/expected/hash_func.out b/src/test/regress/expected/hash_func.out
index e6e3410aaa..9c11e16b0b 100644
--- a/src/test/regress/expected/hash_func.out
+++ b/src/test/regress/expected/hash_func.out
@@ -298,3 +298,15 @@ WHERE  hash_range(v)::bit(32) != hash_range_extended(v, 0)::bit(32)
 -------+----------+-----------+-----------
 (0 rows)
 
+CREATE TYPE t1 AS (a int, b text);
+SELECT v as value, record_hash(v)::bit(32) as standard,
+       record_hash_extended(v, 0)::bit(32) as extended0,
+       record_hash_extended(v, 1)::bit(32) as extended1
+FROM   (VALUES (row(1, 'aaa')::t1, row(2, 'bbb'), row(-1, 'ccc'))) x(v)
+WHERE  record_hash(v)::bit(32) != record_hash_extended(v, 0)::bit(32)
+       OR record_hash(v)::bit(32) = record_hash_extended(v, 1)::bit(32);
+ value | standard | extended0 | extended1 
+-------+----------+-----------+-----------
+(0 rows)
+
+DROP TYPE t1;
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index a46b1573bd..4a375deff3 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -2707,6 +2707,7 @@ select a.idv, b.idv from tidv a, tidv b where a.idv = b.idv;
 (5 rows)
 
 set enable_mergejoin = 0;
+set enable_hashjoin = 0;
 explain (costs off)
 select a.idv, b.idv from tidv a, tidv b where a.idv = b.idv;
                      QUERY PLAN                     
diff --git a/src/test/regress/expected/with.out b/src/test/regress/expected/with.out
index 457f3bf04f..6a01ddfdd6 100644
--- a/src/test/regress/expected/with.out
+++ b/src/test/regress/expected/with.out
@@ -616,6 +616,44 @@ select * from search_graph;
  2 | 3 | arc 2 -> 3 | f        | {"(1,4)","(4,5)","(5,1)","(1,2)","(2,3)"}
 (25 rows)
 
+-- again with union distinct to test row-type hash support
+with recursive search_graph(f, t, label, is_cycle, path) as (
+	select *, false, array[row(g.f, g.t)] from graph g
+	union distinct
+	select g.*, row(g.f, g.t) = any(path), path || row(g.f, g.t)
+	from graph g, search_graph sg
+	where g.f = sg.t and not is_cycle
+)
+select * from search_graph;
+ f | t |   label    | is_cycle |                   path                    
+---+---+------------+----------+-------------------------------------------
+ 1 | 2 | arc 1 -> 2 | f        | {"(1,2)"}
+ 1 | 3 | arc 1 -> 3 | f        | {"(1,3)"}
+ 2 | 3 | arc 2 -> 3 | f        | {"(2,3)"}
+ 1 | 4 | arc 1 -> 4 | f        | {"(1,4)"}
+ 4 | 5 | arc 4 -> 5 | f        | {"(4,5)"}
+ 5 | 1 | arc 5 -> 1 | f        | {"(5,1)"}
+ 1 | 2 | arc 1 -> 2 | f        | {"(5,1)","(1,2)"}
+ 1 | 3 | arc 1 -> 3 | f        | {"(5,1)","(1,3)"}
+ 1 | 4 | arc 1 -> 4 | f        | {"(5,1)","(1,4)"}
+ 2 | 3 | arc 2 -> 3 | f        | {"(1,2)","(2,3)"}
+ 4 | 5 | arc 4 -> 5 | f        | {"(1,4)","(4,5)"}
+ 5 | 1 | arc 5 -> 1 | f        | {"(4,5)","(5,1)"}
+ 1 | 2 | arc 1 -> 2 | f        | {"(4,5)","(5,1)","(1,2)"}
+ 1 | 3 | arc 1 -> 3 | f        | {"(4,5)","(5,1)","(1,3)"}
+ 1 | 4 | arc 1 -> 4 | f        | {"(4,5)","(5,1)","(1,4)"}
+ 2 | 3 | arc 2 -> 3 | f        | {"(5,1)","(1,2)","(2,3)"}
+ 4 | 5 | arc 4 -> 5 | f        | {"(5,1)","(1,4)","(4,5)"}
+ 5 | 1 | arc 5 -> 1 | f        | {"(1,4)","(4,5)","(5,1)"}
+ 1 | 2 | arc 1 -> 2 | f        | {"(1,4)","(4,5)","(5,1)","(1,2)"}
+ 1 | 3 | arc 1 -> 3 | f        | {"(1,4)","(4,5)","(5,1)","(1,3)"}
+ 1 | 4 | arc 1 -> 4 | t        | {"(1,4)","(4,5)","(5,1)","(1,4)"}
+ 2 | 3 | arc 2 -> 3 | f        | {"(4,5)","(5,1)","(1,2)","(2,3)"}
+ 4 | 5 | arc 4 -> 5 | t        | {"(4,5)","(5,1)","(1,4)","(4,5)"}
+ 5 | 1 | arc 5 -> 1 | t        | {"(5,1)","(1,4)","(4,5)","(5,1)"}
+ 2 | 3 | arc 2 -> 3 | f        | {"(1,4)","(4,5)","(5,1)","(1,2)","(2,3)"}
+(25 rows)
+
 -- ordering by the path column has same effect as SEARCH DEPTH FIRST
 with recursive search_graph(f, t, label, is_cycle, path) as (
 	select *, false, array[row(g.f, g.t)] from graph g
diff --git a/src/test/regress/sql/hash_func.sql b/src/test/regress/sql/hash_func.sql
index a3e2decc2c..747c79b915 100644
--- a/src/test/regress/sql/hash_func.sql
+++ b/src/test/regress/sql/hash_func.sql
@@ -220,3 +220,12 @@ CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy');
         (int4range(550274, 1550274)), (int4range(1550275, 208112489))) x(v)
 WHERE  hash_range(v)::bit(32) != hash_range_extended(v, 0)::bit(32)
        OR hash_range(v)::bit(32) = hash_range_extended(v, 1)::bit(32);
+
+CREATE TYPE t1 AS (a int, b text);
+SELECT v as value, record_hash(v)::bit(32) as standard,
+       record_hash_extended(v, 0)::bit(32) as extended0,
+       record_hash_extended(v, 1)::bit(32) as extended1
+FROM   (VALUES (row(1, 'aaa')::t1, row(2, 'bbb'), row(-1, 'ccc'))) x(v)
+WHERE  record_hash(v)::bit(32) != record_hash_extended(v, 0)::bit(32)
+       OR record_hash(v)::bit(32) = record_hash_extended(v, 1)::bit(32);
+DROP TYPE t1;
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 1403e0ffe7..023290bd52 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -700,6 +700,7 @@ CREATE TEMP TABLE tt2 ( tt2_id int4, joincol int4 );
 select a.idv, b.idv from tidv a, tidv b where a.idv = b.idv;
 
 set enable_mergejoin = 0;
+set enable_hashjoin = 0;
 
 explain (costs off)
 select a.idv, b.idv from tidv a, tidv b where a.idv = b.idv;
diff --git a/src/test/regress/sql/with.sql b/src/test/regress/sql/with.sql
index 2eea297a71..7aa164b997 100644
--- a/src/test/regress/sql/with.sql
+++ b/src/test/regress/sql/with.sql
@@ -317,6 +317,16 @@ CREATE TEMPORARY TABLE tree(
 )
 select * from search_graph;
 
+-- again with union distinct to test row-type hash support
+with recursive search_graph(f, t, label, is_cycle, path) as (
+	select *, false, array[row(g.f, g.t)] from graph g
+	union distinct
+	select g.*, row(g.f, g.t) = any(path), path || row(g.f, g.t)
+	from graph g, search_graph sg
+	where g.f = sg.t and not is_cycle
+)
+select * from search_graph;
+
 -- ordering by the path column has same effect as SEARCH DEPTH FIRST
 with recursive search_graph(f, t, label, is_cycle, path) as (
 	select *, false, array[row(g.f, g.t)] from graph g
-- 
2.28.0

Andres Freund

andres@anarazel.de

about 5 years ago

In reply to: Peter Eisentraut (#1)

Re: Hash support for row types

Hi,

On 2020-10-19 10:01:14 +0200, Peter Eisentraut wrote:

In [0] it was discussed that hash support for row types/record would be
handy. So I implemented that.

The implementation hashes each field and combines the hash values. Most of
the code structure can be borrowed from the record comparison
functions/btree support. To combine the hash values, I adapted the code
from the array hashing functions. (The hash_combine()/hash_combine64()
functions also looked sensible, but they don't appear to work in a way that
satisfies the hash_func regression test. Could be documented better.)

The main motivation is to support UNION [DISTINCT] as discussed in [0], but
this also enables other hash-related functionality such as hash joins (as
one regression test accidentally revealed) and hash partitioning.

How does this deal with row types with a field that doesn't have a hash
function? Erroring out at runtime could cause queries that used to
succeed, e.g. because all fields have btree ops, to fail, if we just have
a generic unconditionally present hash opclass? Is that an OK
"regression"?

Greetings,

Andres Freund

Peter Eisentraut

peter.eisentraut@2ndquadrant.com

about 5 years ago

In reply to: Andres Freund (#2)

Re: Hash support for row types

On 2020-10-20 01:32, Andres Freund wrote:

How does this deal with row types with a field that doesn't have a hash
function? Erroring out at runtime could cause queries that used to
succeed, e.g. because all fields have btree ops, to fail, if we just have
a generic unconditionally present hash opclass? Is that an OK
"regression"?

Good point. There is actually code in the type cache that is supposed
to handle that, so I'll need to adjust that.

--
Peter Eisentraut http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

Robert Haas

robertmhaas@gmail.com

about 5 years ago

In reply to: Peter Eisentraut (#3)

Re: Hash support for row types

On Tue, Oct 20, 2020 at 11:10 AM Peter Eisentraut
<peter.eisentraut@2ndquadrant.com> wrote:

On 2020-10-20 01:32, Andres Freund wrote:

How does this deal with row types with a field that doesn't have a hash
function? Erroring out at runtime could cause queries that used to
succeed, e.g. because all fields have btree ops, to fail, if we just have
a generic unconditionally present hash opclass? Is that an OK
"regression"?

Good point. There is actually code in the type cache that is supposed
to handle that, so I'll need to adjust that.

Do we need to worry about what happens if somebody modifies the
opclass/opfamily definitions?

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

Tom Lane

tgl@sss.pgh.pa.us

about 5 years ago

In reply to: Robert Haas (#4)

Re: Hash support for row types

Robert Haas <robertmhaas@gmail.com> writes:

Do we need to worry about what happens if somebody modifies the
opclass/opfamily definitions?

There's a lot of places that you can break by doing that. I'm not
too concerned about it.

regards, tom lane

Peter Eisentraut

peter.eisentraut@2ndquadrant.com

about 5 years ago

In reply to: Peter Eisentraut (#3)

1 attachment(s)

Re: Hash support for row types

On 2020-10-20 17:10, Peter Eisentraut wrote:

On 2020-10-20 01:32, Andres Freund wrote:

How does this deal with row types with a field that doesn't have a hash
function? Erroring out at runtime could cause queries that used to
succeed, e.g. because all fields have btree ops, to fail, if we just have
a generic unconditionally present hash opclass? Is that an OK
"regression"?

Good point. There is actually code in the type cache that is supposed
to handle that, so I'll need to adjust that.

Here is an updated patch with the type cache integration added.

To your point, this now checks each fields hashability before
considering a row type as hashable. It can still have run-time errors
for untyped record datums, but that's not something we can do anything
about.

--
Peter Eisentraut http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

Attachments:

v2-0001-Hash-support-for-row-types.patchtext/plain; charset=UTF-8; name=v2-0001-Hash-support-for-row-types.patch; x-mac-creator=0; x-mac-type=0Download

From eda6f7182cfe8ac7317d6874317ace24c7c7d5f6 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter@eisentraut.org>
Date: Fri, 23 Oct 2020 09:33:41 +0200
Subject: [PATCH v2] Hash support for row types

Add hash functions for the record type as well as a hash operator
family and operator class for the record type.  This enables all the
hash functionality for the record type such as UNION DISTINCT, hash
joins, and hash partitioning.

Discussion: https://www.postgresql.org/message-id/flat/38eccd35-4e2d-6767-1b3c-dada1eac3124%402ndquadrant.com
---
 doc/src/sgml/queries.sgml               |   9 -
 src/backend/utils/adt/rowtypes.c        | 249 ++++++++++++++++++++++++
 src/backend/utils/cache/lsyscache.c     |   7 +-
 src/backend/utils/cache/typcache.c      |  78 +++++---
 src/include/catalog/pg_amop.dat         |   5 +
 src/include/catalog/pg_amproc.dat       |   4 +
 src/include/catalog/pg_opclass.dat      |   2 +
 src/include/catalog/pg_operator.dat     |   2 +-
 src/include/catalog/pg_opfamily.dat     |   2 +
 src/include/catalog/pg_proc.dat         |   7 +
 src/test/regress/expected/hash_func.out |  12 ++
 src/test/regress/expected/join.out      |   1 +
 src/test/regress/expected/with.out      |  38 ++++
 src/test/regress/sql/hash_func.sql      |   9 +
 src/test/regress/sql/join.sql           |   1 +
 src/test/regress/sql/with.sql           |  10 +
 16 files changed, 402 insertions(+), 34 deletions(-)

diff --git a/doc/src/sgml/queries.sgml b/doc/src/sgml/queries.sgml
index f06afe2c3f..8e70e57b72 100644
--- a/doc/src/sgml/queries.sgml
+++ b/doc/src/sgml/queries.sgml
@@ -2182,15 +2182,6 @@ <title>Search Order</title>
 </programlisting>
    </para>
 
-   <note>
-    <para>
-     The queries shown in this and the following section involving
-     <literal>ROW</literal> constructors in the target list only support
-     <literal>UNION ALL</literal> (not plain <literal>UNION</literal>) in the
-     current implementation.
-    </para>
-   </note>
-
    <tip>
     <para>
      Omit the <literal>ROW()</literal> syntax in the common case where only one
diff --git a/src/backend/utils/adt/rowtypes.c b/src/backend/utils/adt/rowtypes.c
index 674cf0a55d..28bbce128f 100644
--- a/src/backend/utils/adt/rowtypes.c
+++ b/src/backend/utils/adt/rowtypes.c
@@ -19,6 +19,7 @@
 #include "access/detoast.h"
 #include "access/htup_details.h"
 #include "catalog/pg_type.h"
+#include "common/hashfn.h"
 #include "funcapi.h"
 #include "libpq/pqformat.h"
 #include "miscadmin.h"
@@ -1766,3 +1767,251 @@ btrecordimagecmp(PG_FUNCTION_ARGS)
 {
 	PG_RETURN_INT32(record_image_cmp(fcinfo));
 }
+
+
+/*
+ * Row type hash functions
+ */
+
+Datum
+hash_record(PG_FUNCTION_ARGS)
+{
+	HeapTupleHeader record = PG_GETARG_HEAPTUPLEHEADER(0);
+	uint32		result = 0;
+	Oid			tupType;
+	int32		tupTypmod;
+	TupleDesc	tupdesc;
+	HeapTupleData tuple;
+	int			ncolumns;
+	RecordCompareData *my_extra;
+	Datum	   *values;
+	bool	   *nulls;
+
+	check_stack_depth();		/* recurses for record-type columns */
+
+	/* Extract type info from tuple */
+	tupType = HeapTupleHeaderGetTypeId(record);
+	tupTypmod = HeapTupleHeaderGetTypMod(record);
+	tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
+	ncolumns = tupdesc->natts;
+
+	/* Build temporary HeapTuple control structure */
+	tuple.t_len = HeapTupleHeaderGetDatumLength(record);
+	ItemPointerSetInvalid(&(tuple.t_self));
+	tuple.t_tableOid = InvalidOid;
+	tuple.t_data = record;
+
+	/*
+	 * We arrange to look up the needed hashing info just once per series
+	 * of calls, assuming the record type doesn't change underneath us.
+	 */
+	my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra;
+	if (my_extra == NULL ||
+		my_extra->ncolumns < ncolumns)
+	{
+		fcinfo->flinfo->fn_extra =
+			MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+							   offsetof(RecordCompareData, columns) +
+							   ncolumns * sizeof(ColumnCompareData));
+		my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra;
+		my_extra->ncolumns = ncolumns;
+		my_extra->record1_type = InvalidOid;
+		my_extra->record1_typmod = 0;
+	}
+
+	if (my_extra->record1_type != tupType ||
+		my_extra->record1_typmod != tupTypmod)
+	{
+		MemSet(my_extra->columns, 0, ncolumns * sizeof(ColumnCompareData));
+		my_extra->record1_type = tupType;
+		my_extra->record1_typmod = tupTypmod;
+	}
+
+	/* Break down the tuple into fields */
+	values = (Datum *) palloc(ncolumns * sizeof(Datum));
+	nulls = (bool *) palloc(ncolumns * sizeof(bool));
+	heap_deform_tuple(&tuple, tupdesc, values, nulls);
+
+	for (int i = 0; i < ncolumns; i++)
+	{
+		Form_pg_attribute att;
+		TypeCacheEntry *typentry;
+		uint32		element_hash;
+
+		att = TupleDescAttr(tupdesc, i);
+
+		if (att->attisdropped)
+			continue;
+
+		/*
+		 * Lookup the hash function if not done already
+		 */
+		typentry = my_extra->columns[i].typentry;
+		if (typentry == NULL ||
+			typentry->type_id != att->atttypid)
+		{
+			typentry = lookup_type_cache(att->atttypid,
+										 TYPECACHE_HASH_PROC_FINFO);
+			if (!OidIsValid(typentry->hash_proc_finfo.fn_oid))
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_FUNCTION),
+						 errmsg("could not identify a hash function for type %s",
+								format_type_be(typentry->type_id))));
+			my_extra->columns[i].typentry = typentry;
+		}
+
+		/* Compute hash of element */
+		if (nulls[i])
+		{
+			element_hash = 0;
+		}
+		else
+		{
+			LOCAL_FCINFO(locfcinfo, 1);
+
+			InitFunctionCallInfoData(*locfcinfo, &typentry->hash_proc_finfo, 1,
+									 att->attcollation, NULL, NULL);
+			locfcinfo->args[0].value = values[i];
+			locfcinfo->args[0].isnull = false;
+			element_hash = DatumGetUInt32(FunctionCallInvoke(locfcinfo));
+
+			/* We don't expect hash support functions to return null */
+			Assert(!locfcinfo->isnull);
+		}
+
+		/* see hash_array() */
+		result = (result << 5) - result + element_hash;
+	}
+
+	pfree(values);
+	pfree(nulls);
+	ReleaseTupleDesc(tupdesc);
+
+	/* Avoid leaking memory when handed toasted input. */
+	PG_FREE_IF_COPY(record, 0);
+
+	PG_RETURN_UINT32(result);
+}
+
+Datum
+hash_record_extended(PG_FUNCTION_ARGS)
+{
+	HeapTupleHeader record = PG_GETARG_HEAPTUPLEHEADER(0);
+	uint64		seed = PG_GETARG_INT64(1);
+	uint64		result = 0;
+	Oid			tupType;
+	int32		tupTypmod;
+	TupleDesc	tupdesc;
+	HeapTupleData tuple;
+	int			ncolumns;
+	RecordCompareData *my_extra;
+	Datum	   *values;
+	bool	   *nulls;
+
+	check_stack_depth();		/* recurses for record-type columns */
+
+	/* Extract type info from tuple */
+	tupType = HeapTupleHeaderGetTypeId(record);
+	tupTypmod = HeapTupleHeaderGetTypMod(record);
+	tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
+	ncolumns = tupdesc->natts;
+
+	/* Build temporary HeapTuple control structure */
+	tuple.t_len = HeapTupleHeaderGetDatumLength(record);
+	ItemPointerSetInvalid(&(tuple.t_self));
+	tuple.t_tableOid = InvalidOid;
+	tuple.t_data = record;
+
+	/*
+	 * We arrange to look up the needed hashing info just once per series
+	 * of calls, assuming the record type doesn't change underneath us.
+	 */
+	my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra;
+	if (my_extra == NULL ||
+		my_extra->ncolumns < ncolumns)
+	{
+		fcinfo->flinfo->fn_extra =
+			MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+							   offsetof(RecordCompareData, columns) +
+							   ncolumns * sizeof(ColumnCompareData));
+		my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra;
+		my_extra->ncolumns = ncolumns;
+		my_extra->record1_type = InvalidOid;
+		my_extra->record1_typmod = 0;
+	}
+
+	if (my_extra->record1_type != tupType ||
+		my_extra->record1_typmod != tupTypmod)
+	{
+		MemSet(my_extra->columns, 0, ncolumns * sizeof(ColumnCompareData));
+		my_extra->record1_type = tupType;
+		my_extra->record1_typmod = tupTypmod;
+	}
+
+	/* Break down the tuple into fields */
+	values = (Datum *) palloc(ncolumns * sizeof(Datum));
+	nulls = (bool *) palloc(ncolumns * sizeof(bool));
+	heap_deform_tuple(&tuple, tupdesc, values, nulls);
+
+	for (int i = 0; i < ncolumns; i++)
+	{
+		Form_pg_attribute att;
+		TypeCacheEntry *typentry;
+		uint64		element_hash;
+
+		att = TupleDescAttr(tupdesc, i);
+
+		if (att->attisdropped)
+			continue;
+
+		/*
+		 * Lookup the hash function if not done already
+		 */
+		typentry = my_extra->columns[i].typentry;
+		if (typentry == NULL ||
+			typentry->type_id != att->atttypid)
+		{
+			typentry = lookup_type_cache(att->atttypid,
+										 TYPECACHE_HASH_EXTENDED_PROC_FINFO);
+			if (!OidIsValid(typentry->hash_extended_proc_finfo.fn_oid))
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_FUNCTION),
+						 errmsg("could not identify a hash function for type %s",
+								format_type_be(typentry->type_id))));
+			my_extra->columns[i].typentry = typentry;
+		}
+
+		/* Compute hash of element */
+		if (nulls[i])
+		{
+			element_hash = 0;
+		}
+		else
+		{
+			LOCAL_FCINFO(locfcinfo, 2);
+
+			InitFunctionCallInfoData(*locfcinfo, &typentry->hash_extended_proc_finfo, 2,
+									 att->attcollation, NULL, NULL);
+			locfcinfo->args[0].value = values[i];
+			locfcinfo->args[0].isnull = false;
+			locfcinfo->args[1].value = Int64GetDatum(seed);
+			locfcinfo->args[0].isnull = false;
+			element_hash = DatumGetUInt64(FunctionCallInvoke(locfcinfo));
+
+			/* We don't expect hash support functions to return null */
+			Assert(!locfcinfo->isnull);
+		}
+
+		/* see hash_array_extended() */
+		result = (result << 5) - result + element_hash;
+	}
+
+	pfree(values);
+	pfree(nulls);
+	ReleaseTupleDesc(tupdesc);
+
+	/* Avoid leaking memory when handed toasted input. */
+	PG_FREE_IF_COPY(record, 0);
+
+	PG_RETURN_UINT64(result);
+}
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c
index f3bf413829..24411f8cf2 100644
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -1358,13 +1358,18 @@ op_hashjoinable(Oid opno, Oid inputtype)
 	TypeCacheEntry *typentry;
 
 	/* As in op_mergejoinable, let the typcache handle the hard cases */
-	/* Eventually we'll need a similar case for record_eq ... */
 	if (opno == ARRAY_EQ_OP)
 	{
 		typentry = lookup_type_cache(inputtype, TYPECACHE_HASH_PROC);
 		if (typentry->hash_proc == F_HASH_ARRAY)
 			result = true;
 	}
+	else if (opno == RECORD_EQ_OP)
+	{
+		typentry = lookup_type_cache(inputtype, TYPECACHE_HASH_PROC);
+		if (typentry->hash_proc == F_HASH_RECORD)
+			result = true;
+	}
 	else
 	{
 		/* For all other operators, rely on pg_operator.oprcanhash */
diff --git a/src/backend/utils/cache/typcache.c b/src/backend/utils/cache/typcache.c
index f51248b70d..d0c5d3e61e 100644
--- a/src/backend/utils/cache/typcache.c
+++ b/src/backend/utils/cache/typcache.c
@@ -98,8 +98,10 @@ static TypeCacheEntry *firstDomainTypeEntry = NULL;
 #define TCFLAGS_CHECKED_FIELD_PROPERTIES	0x004000
 #define TCFLAGS_HAVE_FIELD_EQUALITY			0x008000
 #define TCFLAGS_HAVE_FIELD_COMPARE			0x010000
-#define TCFLAGS_CHECKED_DOMAIN_CONSTRAINTS	0x020000
-#define TCFLAGS_DOMAIN_BASE_IS_COMPOSITE	0x040000
+#define TCFLAGS_HAVE_FIELD_HASHING			0x020000
+#define TCFLAGS_HAVE_FIELD_EXTENDED_HASHING	0x040000
+#define TCFLAGS_CHECKED_DOMAIN_CONSTRAINTS	0x080000
+#define TCFLAGS_DOMAIN_BASE_IS_COMPOSITE	0x100000
 
 /* The flags associated with equality/comparison/hashing are all but these: */
 #define TCFLAGS_OPERATOR_FLAGS \
@@ -298,6 +300,8 @@ static bool array_element_has_extended_hashing(TypeCacheEntry *typentry);
 static void cache_array_element_properties(TypeCacheEntry *typentry);
 static bool record_fields_have_equality(TypeCacheEntry *typentry);
 static bool record_fields_have_compare(TypeCacheEntry *typentry);
+static bool record_fields_have_hashing(TypeCacheEntry *typentry);
+static bool record_fields_have_extended_hashing(TypeCacheEntry *typentry);
 static void cache_record_field_properties(TypeCacheEntry *typentry);
 static bool range_element_has_hashing(TypeCacheEntry *typentry);
 static bool range_element_has_extended_hashing(TypeCacheEntry *typentry);
@@ -678,18 +682,16 @@ lookup_type_cache(Oid type_id, int flags)
 										  HASHSTANDARD_PROC);
 
 		/*
-		 * As above, make sure hash_array will succeed.  We don't currently
-		 * support hashing for composite types, but when we do, we'll need
-		 * more logic here to check that case too.
+		 * As above, make sure hash_array, hash_record, or hash_range will
+		 * succeed.
 		 */
 		if (hash_proc == F_HASH_ARRAY &&
 			!array_element_has_hashing(typentry))
 			hash_proc = InvalidOid;
-
-		/*
-		 * Likewise for hash_range.
-		 */
-		if (hash_proc == F_HASH_RANGE &&
+		else if (hash_proc == F_HASH_RECORD &&
+				 !record_fields_have_hashing(typentry))
+			hash_proc = InvalidOid;
+		else if (hash_proc == F_HASH_RANGE &&
 			!range_element_has_hashing(typentry))
 			hash_proc = InvalidOid;
 
@@ -722,18 +724,16 @@ lookup_type_cache(Oid type_id, int flags)
 												   HASHEXTENDED_PROC);
 
 		/*
-		 * As above, make sure hash_array_extended will succeed.  We don't
-		 * currently support hashing for composite types, but when we do,
-		 * we'll need more logic here to check that case too.
+		 * As above, make sure hash_array_extended, hash_record_extended, or
+		 * hash_range_extended will succeed.
 		 */
 		if (hash_extended_proc == F_HASH_ARRAY_EXTENDED &&
 			!array_element_has_extended_hashing(typentry))
 			hash_extended_proc = InvalidOid;
-
-		/*
-		 * Likewise for hash_range_extended.
-		 */
-		if (hash_extended_proc == F_HASH_RANGE_EXTENDED &&
+		else if (hash_extended_proc == F_HASH_RECORD_EXTENDED &&
+			!record_fields_have_extended_hashing(typentry))
+			hash_extended_proc = InvalidOid;
+		else if (hash_extended_proc == F_HASH_RANGE_EXTENDED &&
 			!range_element_has_extended_hashing(typentry))
 			hash_extended_proc = InvalidOid;
 
@@ -1448,6 +1448,22 @@ record_fields_have_compare(TypeCacheEntry *typentry)
 	return (typentry->flags & TCFLAGS_HAVE_FIELD_COMPARE) != 0;
 }
 
+static bool
+record_fields_have_hashing(TypeCacheEntry *typentry)
+{
+	if (!(typentry->flags & TCFLAGS_CHECKED_FIELD_PROPERTIES))
+		cache_record_field_properties(typentry);
+	return (typentry->flags & TCFLAGS_HAVE_FIELD_HASHING) != 0;
+}
+
+static bool
+record_fields_have_extended_hashing(TypeCacheEntry *typentry)
+{
+	if (!(typentry->flags & TCFLAGS_CHECKED_FIELD_PROPERTIES))
+		cache_record_field_properties(typentry);
+	return (typentry->flags & TCFLAGS_HAVE_FIELD_EXTENDED_HASHING) != 0;
+}
+
 static void
 cache_record_field_properties(TypeCacheEntry *typentry)
 {
@@ -1457,8 +1473,12 @@ cache_record_field_properties(TypeCacheEntry *typentry)
 	 * everything will (we may get a failure at runtime ...)
 	 */
 	if (typentry->type_id == RECORDOID)
+	{
 		typentry->flags |= (TCFLAGS_HAVE_FIELD_EQUALITY |
-							TCFLAGS_HAVE_FIELD_COMPARE);
+							TCFLAGS_HAVE_FIELD_COMPARE |
+							TCFLAGS_HAVE_FIELD_HASHING |
+							TCFLAGS_HAVE_FIELD_EXTENDED_HASHING);
+	}
 	else if (typentry->typtype == TYPTYPE_COMPOSITE)
 	{
 		TupleDesc	tupdesc;
@@ -1475,7 +1495,9 @@ cache_record_field_properties(TypeCacheEntry *typentry)
 
 		/* Have each property if all non-dropped fields have the property */
 		newflags = (TCFLAGS_HAVE_FIELD_EQUALITY |
-					TCFLAGS_HAVE_FIELD_COMPARE);
+					TCFLAGS_HAVE_FIELD_COMPARE |
+					TCFLAGS_HAVE_FIELD_HASHING |
+					TCFLAGS_HAVE_FIELD_EXTENDED_HASHING);
 		for (i = 0; i < tupdesc->natts; i++)
 		{
 			TypeCacheEntry *fieldentry;
@@ -1486,11 +1508,17 @@ cache_record_field_properties(TypeCacheEntry *typentry)
 
 			fieldentry = lookup_type_cache(attr->atttypid,
 										   TYPECACHE_EQ_OPR |
-										   TYPECACHE_CMP_PROC);
+										   TYPECACHE_CMP_PROC |
+										   TYPECACHE_HASH_PROC |
+										   TYPECACHE_HASH_EXTENDED_PROC);
 			if (!OidIsValid(fieldentry->eq_opr))
 				newflags &= ~TCFLAGS_HAVE_FIELD_EQUALITY;
 			if (!OidIsValid(fieldentry->cmp_proc))
 				newflags &= ~TCFLAGS_HAVE_FIELD_COMPARE;
+			if (!OidIsValid(fieldentry->hash_proc))
+				newflags &= ~TCFLAGS_HAVE_FIELD_HASHING;
+			if (!OidIsValid(fieldentry->hash_extended_proc))
+				newflags &= ~TCFLAGS_HAVE_FIELD_EXTENDED_HASHING;
 
 			/* We can drop out of the loop once we disprove all bits */
 			if (newflags == 0)
@@ -1515,12 +1543,16 @@ cache_record_field_properties(TypeCacheEntry *typentry)
 		}
 		baseentry = lookup_type_cache(typentry->domainBaseType,
 									  TYPECACHE_EQ_OPR |
-									  TYPECACHE_CMP_PROC);
+									  TYPECACHE_CMP_PROC |
+									  TYPECACHE_HASH_PROC |
+									  TYPECACHE_HASH_EXTENDED_PROC);
 		if (baseentry->typtype == TYPTYPE_COMPOSITE)
 		{
 			typentry->flags |= TCFLAGS_DOMAIN_BASE_IS_COMPOSITE;
 			typentry->flags |= baseentry->flags & (TCFLAGS_HAVE_FIELD_EQUALITY |
-												   TCFLAGS_HAVE_FIELD_COMPARE);
+												   TCFLAGS_HAVE_FIELD_COMPARE |
+												   TCFLAGS_HAVE_FIELD_HASHING |
+												   TCFLAGS_HAVE_FIELD_EXTENDED_HASHING);
 		}
 	}
 	typentry->flags |= TCFLAGS_CHECKED_FIELD_PROPERTIES;
diff --git a/src/include/catalog/pg_amop.dat b/src/include/catalog/pg_amop.dat
index 1dfb6fd373..dccbeabcf4 100644
--- a/src/include/catalog/pg_amop.dat
+++ b/src/include/catalog/pg_amop.dat
@@ -979,6 +979,11 @@
   amoprighttype => 'oidvector', amopstrategy => '1',
   amopopr => '=(oidvector,oidvector)', amopmethod => 'hash' },
 
+# record_ops
+{ amopfamily => 'hash/record_ops', amoplefttype => 'record',
+  amoprighttype => 'record', amopstrategy => '1',
+  amopopr => '=(record,record)', amopmethod => 'hash' },
+
 # text_ops
 { amopfamily => 'hash/text_ops', amoplefttype => 'text',
   amoprighttype => 'text', amopstrategy => '1', amopopr => '=(text,text)',
diff --git a/src/include/catalog/pg_amproc.dat b/src/include/catalog/pg_amproc.dat
index a8e0c4ff8a..db3e8c2d01 100644
--- a/src/include/catalog/pg_amproc.dat
+++ b/src/include/catalog/pg_amproc.dat
@@ -433,6 +433,10 @@
   amprocrighttype => 'uuid', amprocnum => '1', amproc => 'uuid_hash' },
 { amprocfamily => 'hash/uuid_ops', amproclefttype => 'uuid',
   amprocrighttype => 'uuid', amprocnum => '2', amproc => 'uuid_hash_extended' },
+{ amprocfamily => 'hash/record_ops', amproclefttype => 'record',
+  amprocrighttype => 'record', amprocnum => '1', amproc => 'hash_record' },
+{ amprocfamily => 'hash/record_ops', amproclefttype => 'record',
+  amprocrighttype => 'record', amprocnum => '2', amproc => 'hash_record_extended' },
 { amprocfamily => 'hash/pg_lsn_ops', amproclefttype => 'pg_lsn',
   amprocrighttype => 'pg_lsn', amprocnum => '1', amproc => 'pg_lsn_hash' },
 { amprocfamily => 'hash/pg_lsn_ops', amproclefttype => 'pg_lsn',
diff --git a/src/include/catalog/pg_opclass.dat b/src/include/catalog/pg_opclass.dat
index f2342bb328..be5712692f 100644
--- a/src/include/catalog/pg_opclass.dat
+++ b/src/include/catalog/pg_opclass.dat
@@ -114,6 +114,8 @@
   opcfamily => 'hash/oidvector_ops', opcintype => 'oidvector' },
 { opcmethod => 'btree', opcname => 'record_ops',
   opcfamily => 'btree/record_ops', opcintype => 'record' },
+{ opcmethod => 'hash', opcname => 'record_ops',
+  opcfamily => 'hash/record_ops', opcintype => 'record' },
 { opcmethod => 'btree', opcname => 'record_image_ops',
   opcfamily => 'btree/record_image_ops', opcintype => 'record',
   opcdefault => 'f' },
diff --git a/src/include/catalog/pg_operator.dat b/src/include/catalog/pg_operator.dat
index 7cc812adda..1ffd826679 100644
--- a/src/include/catalog/pg_operator.dat
+++ b/src/include/catalog/pg_operator.dat
@@ -3064,7 +3064,7 @@
 
 # generic record comparison operators
 { oid => '2988', oid_symbol => 'RECORD_EQ_OP', descr => 'equal',
-  oprname => '=', oprcanmerge => 't', oprleft => 'record', oprright => 'record',
+  oprname => '=', oprcanmerge => 't', oprcanhash => 't', oprleft => 'record', oprright => 'record',
   oprresult => 'bool', oprcom => '=(record,record)',
   oprnegate => '<>(record,record)', oprcode => 'record_eq', oprrest => 'eqsel',
   oprjoin => 'eqjoinsel' },
diff --git a/src/include/catalog/pg_opfamily.dat b/src/include/catalog/pg_opfamily.dat
index cf0fb325b3..11c7ad2c14 100644
--- a/src/include/catalog/pg_opfamily.dat
+++ b/src/include/catalog/pg_opfamily.dat
@@ -76,6 +76,8 @@
   opfmethod => 'hash', opfname => 'oidvector_ops' },
 { oid => '2994',
   opfmethod => 'btree', opfname => 'record_ops' },
+{ oid => '9611',
+  opfmethod => 'hash', opfname => 'record_ops' },
 { oid => '3194',
   opfmethod => 'btree', opfname => 'record_image_ops' },
 { oid => '1994', oid_symbol => 'TEXT_BTREE_FAM_OID',
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index bbcac69d48..3712d7f1cc 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -9670,6 +9670,13 @@
   proname => 'btrecordcmp', prorettype => 'int4',
   proargtypes => 'record record', prosrc => 'btrecordcmp' },
 
+{ oid => '9609', descr => 'hash',
+  proname => 'hash_record', prorettype => 'int4', proargtypes => 'record',
+  prosrc => 'hash_record' },
+{ oid => '9610', descr => 'hash',
+  proname => 'hash_record_extended', prorettype => 'int8', proargtypes => 'record int8',
+  prosrc => 'hash_record_extended' },
+
 # record comparison using raw byte images
 { oid => '3181',
   proname => 'record_image_eq', prorettype => 'bool',
diff --git a/src/test/regress/expected/hash_func.out b/src/test/regress/expected/hash_func.out
index e6e3410aaa..da0d7ec05d 100644
--- a/src/test/regress/expected/hash_func.out
+++ b/src/test/regress/expected/hash_func.out
@@ -298,3 +298,15 @@ WHERE  hash_range(v)::bit(32) != hash_range_extended(v, 0)::bit(32)
 -------+----------+-----------+-----------
 (0 rows)
 
+CREATE TYPE t1 AS (a int, b text);
+SELECT v as value, hash_record(v)::bit(32) as standard,
+       hash_record_extended(v, 0)::bit(32) as extended0,
+       hash_record_extended(v, 1)::bit(32) as extended1
+FROM   (VALUES (row(1, 'aaa')::t1, row(2, 'bbb'), row(-1, 'ccc'))) x(v)
+WHERE  hash_record(v)::bit(32) != hash_record_extended(v, 0)::bit(32)
+       OR hash_record(v)::bit(32) = hash_record_extended(v, 1)::bit(32);
+ value | standard | extended0 | extended1 
+-------+----------+-----------+-----------
+(0 rows)
+
+DROP TYPE t1;
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index a46b1573bd..4a375deff3 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -2707,6 +2707,7 @@ select a.idv, b.idv from tidv a, tidv b where a.idv = b.idv;
 (5 rows)
 
 set enable_mergejoin = 0;
+set enable_hashjoin = 0;
 explain (costs off)
 select a.idv, b.idv from tidv a, tidv b where a.idv = b.idv;
                      QUERY PLAN                     
diff --git a/src/test/regress/expected/with.out b/src/test/regress/expected/with.out
index 457f3bf04f..6a01ddfdd6 100644
--- a/src/test/regress/expected/with.out
+++ b/src/test/regress/expected/with.out
@@ -616,6 +616,44 @@ select * from search_graph;
  2 | 3 | arc 2 -> 3 | f        | {"(1,4)","(4,5)","(5,1)","(1,2)","(2,3)"}
 (25 rows)
 
+-- again with union distinct to test row-type hash support
+with recursive search_graph(f, t, label, is_cycle, path) as (
+	select *, false, array[row(g.f, g.t)] from graph g
+	union distinct
+	select g.*, row(g.f, g.t) = any(path), path || row(g.f, g.t)
+	from graph g, search_graph sg
+	where g.f = sg.t and not is_cycle
+)
+select * from search_graph;
+ f | t |   label    | is_cycle |                   path                    
+---+---+------------+----------+-------------------------------------------
+ 1 | 2 | arc 1 -> 2 | f        | {"(1,2)"}
+ 1 | 3 | arc 1 -> 3 | f        | {"(1,3)"}
+ 2 | 3 | arc 2 -> 3 | f        | {"(2,3)"}
+ 1 | 4 | arc 1 -> 4 | f        | {"(1,4)"}
+ 4 | 5 | arc 4 -> 5 | f        | {"(4,5)"}
+ 5 | 1 | arc 5 -> 1 | f        | {"(5,1)"}
+ 1 | 2 | arc 1 -> 2 | f        | {"(5,1)","(1,2)"}
+ 1 | 3 | arc 1 -> 3 | f        | {"(5,1)","(1,3)"}
+ 1 | 4 | arc 1 -> 4 | f        | {"(5,1)","(1,4)"}
+ 2 | 3 | arc 2 -> 3 | f        | {"(1,2)","(2,3)"}
+ 4 | 5 | arc 4 -> 5 | f        | {"(1,4)","(4,5)"}
+ 5 | 1 | arc 5 -> 1 | f        | {"(4,5)","(5,1)"}
+ 1 | 2 | arc 1 -> 2 | f        | {"(4,5)","(5,1)","(1,2)"}
+ 1 | 3 | arc 1 -> 3 | f        | {"(4,5)","(5,1)","(1,3)"}
+ 1 | 4 | arc 1 -> 4 | f        | {"(4,5)","(5,1)","(1,4)"}
+ 2 | 3 | arc 2 -> 3 | f        | {"(5,1)","(1,2)","(2,3)"}
+ 4 | 5 | arc 4 -> 5 | f        | {"(5,1)","(1,4)","(4,5)"}
+ 5 | 1 | arc 5 -> 1 | f        | {"(1,4)","(4,5)","(5,1)"}
+ 1 | 2 | arc 1 -> 2 | f        | {"(1,4)","(4,5)","(5,1)","(1,2)"}
+ 1 | 3 | arc 1 -> 3 | f        | {"(1,4)","(4,5)","(5,1)","(1,3)"}
+ 1 | 4 | arc 1 -> 4 | t        | {"(1,4)","(4,5)","(5,1)","(1,4)"}
+ 2 | 3 | arc 2 -> 3 | f        | {"(4,5)","(5,1)","(1,2)","(2,3)"}
+ 4 | 5 | arc 4 -> 5 | t        | {"(4,5)","(5,1)","(1,4)","(4,5)"}
+ 5 | 1 | arc 5 -> 1 | t        | {"(5,1)","(1,4)","(4,5)","(5,1)"}
+ 2 | 3 | arc 2 -> 3 | f        | {"(1,4)","(4,5)","(5,1)","(1,2)","(2,3)"}
+(25 rows)
+
 -- ordering by the path column has same effect as SEARCH DEPTH FIRST
 with recursive search_graph(f, t, label, is_cycle, path) as (
 	select *, false, array[row(g.f, g.t)] from graph g
diff --git a/src/test/regress/sql/hash_func.sql b/src/test/regress/sql/hash_func.sql
index a3e2decc2c..a4d32e7a2c 100644
--- a/src/test/regress/sql/hash_func.sql
+++ b/src/test/regress/sql/hash_func.sql
@@ -220,3 +220,12 @@ CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy');
         (int4range(550274, 1550274)), (int4range(1550275, 208112489))) x(v)
 WHERE  hash_range(v)::bit(32) != hash_range_extended(v, 0)::bit(32)
        OR hash_range(v)::bit(32) = hash_range_extended(v, 1)::bit(32);
+
+CREATE TYPE t1 AS (a int, b text);
+SELECT v as value, hash_record(v)::bit(32) as standard,
+       hash_record_extended(v, 0)::bit(32) as extended0,
+       hash_record_extended(v, 1)::bit(32) as extended1
+FROM   (VALUES (row(1, 'aaa')::t1, row(2, 'bbb'), row(-1, 'ccc'))) x(v)
+WHERE  hash_record(v)::bit(32) != hash_record_extended(v, 0)::bit(32)
+       OR hash_record(v)::bit(32) = hash_record_extended(v, 1)::bit(32);
+DROP TYPE t1;
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 1403e0ffe7..023290bd52 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -700,6 +700,7 @@ CREATE TEMP TABLE tt2 ( tt2_id int4, joincol int4 );
 select a.idv, b.idv from tidv a, tidv b where a.idv = b.idv;
 
 set enable_mergejoin = 0;
+set enable_hashjoin = 0;
 
 explain (costs off)
 select a.idv, b.idv from tidv a, tidv b where a.idv = b.idv;
diff --git a/src/test/regress/sql/with.sql b/src/test/regress/sql/with.sql
index 2eea297a71..7aa164b997 100644
--- a/src/test/regress/sql/with.sql
+++ b/src/test/regress/sql/with.sql
@@ -317,6 +317,16 @@ CREATE TEMPORARY TABLE tree(
 )
 select * from search_graph;
 
+-- again with union distinct to test row-type hash support
+with recursive search_graph(f, t, label, is_cycle, path) as (
+	select *, false, array[row(g.f, g.t)] from graph g
+	union distinct
+	select g.*, row(g.f, g.t) = any(path), path || row(g.f, g.t)
+	from graph g, search_graph sg
+	where g.f = sg.t and not is_cycle
+)
+select * from search_graph;
+
 -- ordering by the path column has same effect as SEARCH DEPTH FIRST
 with recursive search_graph(f, t, label, is_cycle, path) as (
 	select *, false, array[row(g.f, g.t)] from graph g
-- 
2.28.0

Tom Lane

tgl@sss.pgh.pa.us

about 5 years ago

In reply to: Peter Eisentraut (#6)

Re: Hash support for row types

Peter Eisentraut <peter.eisentraut@2ndquadrant.com> writes:

Here is an updated patch with the type cache integration added.

To your point, this now checks each fields hashability before
considering a row type as hashable. It can still have run-time errors
for untyped record datums, but that's not something we can do anything
about.

This looks good code-wise. A couple small niggles on the tests:

* The new test in with.sql claims to be testing row hashing, but
it's not very apparent that any such thing actually happens. Maybe
EXPLAIN the query, as well as execute it, to confirm that a
hash-based plan is used.

* Is it worth devising a test case in which hashing is not possible
because one of the columns isn't hashable? I have mixed feelings
about this because the set of suitable column types may decrease
to empty over time, making it hard to maintain the test case.

I marked it RFC.

regards, tom lane

Peter Eisentraut

peter.eisentraut@2ndquadrant.com

about 5 years ago

In reply to: Tom Lane (#7)

1 attachment(s)

Re: Hash support for row types

I wrote a new patch to add a lot more tests around hash-based plans.
This is intended to apply separately from the other patch, and the other
patch would then "flip" some of the test cases.

On 2020-11-13 20:51, Tom Lane wrote:

* The new test in with.sql claims to be testing row hashing, but
it's not very apparent that any such thing actually happens. Maybe
EXPLAIN the query, as well as execute it, to confirm that a
hash-based plan is used.

The recursive union requires hashing, but this is not visible in the
plan. You only get an error if there is no hashing support for a type.
I have added a test for this.

For the non-recursive union, I have added more tests that show this in
the plans.

* Is it worth devising a test case in which hashing is not possible
because one of the columns isn't hashable? I have mixed feelings
about this because the set of suitable column types may decrease
to empty over time, making it hard to maintain the test case.

I used the money type for now. If someone adds hash support for that,
we'll change it. I don't think this will change too rapidly, though.

--
Peter Eisentraut
2ndQuadrant, an EDB company
https://www.2ndquadrant.com/

Attachments:

0001-Add-more-tests-for-hashing-and-hash-based-plans.patchtext/plain; charset=UTF-8; name=0001-Add-more-tests-for-hashing-and-hash-based-plans.patch; x-mac-creator=0; x-mac-type=0Download

From dface8b1a8a5714001b2f6cd7f949d2f78047306 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter@eisentraut.org>
Date: Tue, 17 Nov 2020 13:54:52 +0100
Subject: [PATCH] Add more tests for hashing and hash-based plans

- Test hashing of an array of a non-hashable element type.

- Test UNION [DISTINCT] with hash- and sort-based plans.  (Previously,
  only INTERSECT and EXCEPT where tested there.)

- Test UNION [DISTINCT] with a non-hashable column type.  This
  currently reverts to a sort-based plan even if enable_hashagg is on.

- Test UNION/INTERSECT/EXCEPT hash- and sort-based plans with arrays
  as column types.  Also test an array with a non-hashable element
  type.

- Test UNION/INTERSECT/EXCEPT similarly with row types as column
  types.  Currently, this uses only sort-based plans because there is
  no hashing support for row types.

- Add a test case that shows that recursive queries using UNION
  [DISTINCT] require hashable column types.

- Add a currently failing test that uses UNION DISTINCT in a
  cycle-detection use case using row types as column types.

Discussion: https://www.postgresql.org/message-id/flat/38eccd35-4e2d-6767-1b3c-dada1eac3124%402ndquadrant.com
---
 src/test/regress/expected/hash_func.out |   7 +
 src/test/regress/expected/union.out     | 357 +++++++++++++++++++++++-
 src/test/regress/expected/with.out      |  20 ++
 src/test/regress/sql/hash_func.sql      |   6 +
 src/test/regress/sql/union.sql          |  92 +++++-
 src/test/regress/sql/with.sql           |  18 ++
 6 files changed, 498 insertions(+), 2 deletions(-)

diff --git a/src/test/regress/expected/hash_func.out b/src/test/regress/expected/hash_func.out
index e6e3410aaa..e7d615fde5 100644
--- a/src/test/regress/expected/hash_func.out
+++ b/src/test/regress/expected/hash_func.out
@@ -177,6 +177,13 @@ WHERE  hash_array(v)::bit(32) != hash_array_extended(v, 0)::bit(32)
 -------+----------+-----------+-----------
 (0 rows)
 
+-- array hashing with non-hashable element type
+SELECT v as value, hash_array(v)::bit(32) as standard
+FROM   (VALUES ('{0}'::money[])) x(v);
+ERROR:  could not identify a hash function for type money
+SELECT v as value, hash_array_extended(v, 0)::bit(32) as extended0
+FROM   (VALUES ('{0}'::money[])) x(v);
+ERROR:  could not identify an extended hash function for type money
 SELECT v as value, hashbpchar(v)::bit(32) as standard,
        hashbpcharextended(v, 0)::bit(32) as extended0,
        hashbpcharextended(v, 1)::bit(32) as extended1
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index 6e72e92d80..22e1ff5c42 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -345,8 +345,28 @@ ERROR:  FOR NO KEY UPDATE is not allowed with UNION/INTERSECT/EXCEPT
         1 |        2 |        3
 (1 row)
 
--- exercise both hashed and sorted implementations of INTERSECT/EXCEPT
+-- exercise both hashed and sorted implementations of UNION/INTERSECT/EXCEPT
 set enable_hashagg to on;
+explain (costs off)
+select count(*) from
+  ( select unique1 from tenk1 union select fivethous from tenk1 ) ss;
+                           QUERY PLAN                           
+----------------------------------------------------------------
+ Aggregate
+   ->  HashAggregate
+         Group Key: tenk1.unique1
+         ->  Append
+               ->  Index Only Scan using tenk1_unique1 on tenk1
+               ->  Seq Scan on tenk1 tenk1_1
+(6 rows)
+
+select count(*) from
+  ( select unique1 from tenk1 union select fivethous from tenk1 ) ss;
+ count 
+-------
+ 10000
+(1 row)
+
 explain (costs off)
 select count(*) from
   ( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss;
@@ -389,6 +409,27 @@ select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
 (1 row)
 
 set enable_hashagg to off;
+explain (costs off)
+select count(*) from
+  ( select unique1 from tenk1 union select fivethous from tenk1 ) ss;
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ Aggregate
+   ->  Unique
+         ->  Sort
+               Sort Key: tenk1.unique1
+               ->  Append
+                     ->  Index Only Scan using tenk1_unique1 on tenk1
+                     ->  Seq Scan on tenk1 tenk1_1
+(7 rows)
+
+select count(*) from
+  ( select unique1 from tenk1 union select fivethous from tenk1 ) ss;
+ count 
+-------
+ 10000
+(1 row)
+
 explain (costs off)
 select count(*) from
   ( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss;
@@ -434,6 +475,320 @@ select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
       10
 (1 row)
 
+reset enable_hashagg;
+-- non-hashable type
+set enable_hashagg to on;
+explain (costs off)
+select x from (values (100::money), (200::money)) _(x) union select x from (values (100::money), (300::money)) _(x);
+                  QUERY PLAN                   
+-----------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: "*VALUES*".column1
+         ->  Append
+               ->  Values Scan on "*VALUES*"
+               ->  Values Scan on "*VALUES*_1"
+(6 rows)
+
+set enable_hashagg to off;
+explain (costs off)
+select x from (values (100::money), (200::money)) _(x) union select x from (values (100::money), (300::money)) _(x);
+                  QUERY PLAN                   
+-----------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: "*VALUES*".column1
+         ->  Append
+               ->  Values Scan on "*VALUES*"
+               ->  Values Scan on "*VALUES*_1"
+(6 rows)
+
+reset enable_hashagg;
+-- arrays
+set enable_hashagg to on;
+explain (costs off)
+select x from (values (array[1, 2]), (array[1, 3])) _(x) union select x from (values (array[1, 2]), (array[1, 4])) _(x);
+               QUERY PLAN                
+-----------------------------------------
+ HashAggregate
+   Group Key: "*VALUES*".column1
+   ->  Append
+         ->  Values Scan on "*VALUES*"
+         ->  Values Scan on "*VALUES*_1"
+(5 rows)
+
+select x from (values (array[1, 2]), (array[1, 3])) _(x) union select x from (values (array[1, 2]), (array[1, 4])) _(x);
+   x   
+-------
+ {1,4}
+ {1,2}
+ {1,3}
+(3 rows)
+
+explain (costs off)
+select x from (values (array[1, 2]), (array[1, 3])) _(x) intersect select x from (values (array[1, 2]), (array[1, 4])) _(x);
+                  QUERY PLAN                   
+-----------------------------------------------
+ HashSetOp Intersect
+   ->  Append
+         ->  Subquery Scan on "*SELECT* 1"
+               ->  Values Scan on "*VALUES*"
+         ->  Subquery Scan on "*SELECT* 2"
+               ->  Values Scan on "*VALUES*_1"
+(6 rows)
+
+select x from (values (array[1, 2]), (array[1, 3])) _(x) intersect select x from (values (array[1, 2]), (array[1, 4])) _(x);
+   x   
+-------
+ {1,2}
+(1 row)
+
+explain (costs off)
+select x from (values (array[1, 2]), (array[1, 3])) _(x) except select x from (values (array[1, 2]), (array[1, 4])) _(x);
+                  QUERY PLAN                   
+-----------------------------------------------
+ HashSetOp Except
+   ->  Append
+         ->  Subquery Scan on "*SELECT* 1"
+               ->  Values Scan on "*VALUES*"
+         ->  Subquery Scan on "*SELECT* 2"
+               ->  Values Scan on "*VALUES*_1"
+(6 rows)
+
+select x from (values (array[1, 2]), (array[1, 3])) _(x) except select x from (values (array[1, 2]), (array[1, 4])) _(x);
+   x   
+-------
+ {1,3}
+(1 row)
+
+-- non-hashable type
+explain (costs off)
+select x from (values (array[100::money]), (array[200::money])) _(x) union select x from (values (array[100::money]), (array[300::money])) _(x);
+                  QUERY PLAN                   
+-----------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: "*VALUES*".column1
+         ->  Append
+               ->  Values Scan on "*VALUES*"
+               ->  Values Scan on "*VALUES*_1"
+(6 rows)
+
+select x from (values (array[100::money]), (array[200::money])) _(x) union select x from (values (array[100::money]), (array[300::money])) _(x);
+     x     
+-----------
+ {$100.00}
+ {$200.00}
+ {$300.00}
+(3 rows)
+
+set enable_hashagg to off;
+explain (costs off)
+select x from (values (array[1, 2]), (array[1, 3])) _(x) union select x from (values (array[1, 2]), (array[1, 4])) _(x);
+                  QUERY PLAN                   
+-----------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: "*VALUES*".column1
+         ->  Append
+               ->  Values Scan on "*VALUES*"
+               ->  Values Scan on "*VALUES*_1"
+(6 rows)
+
+select x from (values (array[1, 2]), (array[1, 3])) _(x) union select x from (values (array[1, 2]), (array[1, 4])) _(x);
+   x   
+-------
+ {1,2}
+ {1,3}
+ {1,4}
+(3 rows)
+
+explain (costs off)
+select x from (values (array[1, 2]), (array[1, 3])) _(x) intersect select x from (values (array[1, 2]), (array[1, 4])) _(x);
+                     QUERY PLAN                      
+-----------------------------------------------------
+ SetOp Intersect
+   ->  Sort
+         Sort Key: "*SELECT* 1".x
+         ->  Append
+               ->  Subquery Scan on "*SELECT* 1"
+                     ->  Values Scan on "*VALUES*"
+               ->  Subquery Scan on "*SELECT* 2"
+                     ->  Values Scan on "*VALUES*_1"
+(8 rows)
+
+select x from (values (array[1, 2]), (array[1, 3])) _(x) intersect select x from (values (array[1, 2]), (array[1, 4])) _(x);
+   x   
+-------
+ {1,2}
+(1 row)
+
+explain (costs off)
+select x from (values (array[1, 2]), (array[1, 3])) _(x) except select x from (values (array[1, 2]), (array[1, 4])) _(x);
+                     QUERY PLAN                      
+-----------------------------------------------------
+ SetOp Except
+   ->  Sort
+         Sort Key: "*SELECT* 1".x
+         ->  Append
+               ->  Subquery Scan on "*SELECT* 1"
+                     ->  Values Scan on "*VALUES*"
+               ->  Subquery Scan on "*SELECT* 2"
+                     ->  Values Scan on "*VALUES*_1"
+(8 rows)
+
+select x from (values (array[1, 2]), (array[1, 3])) _(x) except select x from (values (array[1, 2]), (array[1, 4])) _(x);
+   x   
+-------
+ {1,3}
+(1 row)
+
+reset enable_hashagg;
+-- records
+set enable_hashagg to on;
+-- currently no hashing support for record, so these will still run with sort plans:
+explain (costs off)
+select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x);
+                  QUERY PLAN                   
+-----------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: "*VALUES*".column1
+         ->  Append
+               ->  Values Scan on "*VALUES*"
+               ->  Values Scan on "*VALUES*_1"
+(6 rows)
+
+select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x);
+   x   
+-------
+ (1,2)
+ (1,3)
+ (1,4)
+(3 rows)
+
+explain (costs off)
+select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
+                     QUERY PLAN                      
+-----------------------------------------------------
+ SetOp Intersect
+   ->  Sort
+         Sort Key: "*SELECT* 1".x
+         ->  Append
+               ->  Subquery Scan on "*SELECT* 1"
+                     ->  Values Scan on "*VALUES*"
+               ->  Subquery Scan on "*SELECT* 2"
+                     ->  Values Scan on "*VALUES*_1"
+(8 rows)
+
+select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
+   x   
+-------
+ (1,2)
+(1 row)
+
+explain (costs off)
+select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
+                     QUERY PLAN                      
+-----------------------------------------------------
+ SetOp Except
+   ->  Sort
+         Sort Key: "*SELECT* 1".x
+         ->  Append
+               ->  Subquery Scan on "*SELECT* 1"
+                     ->  Values Scan on "*VALUES*"
+               ->  Subquery Scan on "*SELECT* 2"
+                     ->  Values Scan on "*VALUES*_1"
+(8 rows)
+
+select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
+   x   
+-------
+ (1,3)
+(1 row)
+
+-- non-hashable type
+explain (costs off)
+select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x);
+                  QUERY PLAN                   
+-----------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: "*VALUES*".column1
+         ->  Append
+               ->  Values Scan on "*VALUES*"
+               ->  Values Scan on "*VALUES*_1"
+(6 rows)
+
+select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x);
+     x     
+-----------
+ ($100.00)
+ ($200.00)
+ ($300.00)
+(3 rows)
+
+set enable_hashagg to off;
+explain (costs off)
+select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x);
+                  QUERY PLAN                   
+-----------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: "*VALUES*".column1
+         ->  Append
+               ->  Values Scan on "*VALUES*"
+               ->  Values Scan on "*VALUES*_1"
+(6 rows)
+
+select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x);
+   x   
+-------
+ (1,2)
+ (1,3)
+ (1,4)
+(3 rows)
+
+explain (costs off)
+select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
+                     QUERY PLAN                      
+-----------------------------------------------------
+ SetOp Intersect
+   ->  Sort
+         Sort Key: "*SELECT* 1".x
+         ->  Append
+               ->  Subquery Scan on "*SELECT* 1"
+                     ->  Values Scan on "*VALUES*"
+               ->  Subquery Scan on "*SELECT* 2"
+                     ->  Values Scan on "*VALUES*_1"
+(8 rows)
+
+select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
+   x   
+-------
+ (1,2)
+(1 row)
+
+explain (costs off)
+select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
+                     QUERY PLAN                      
+-----------------------------------------------------
+ SetOp Except
+   ->  Sort
+         Sort Key: "*SELECT* 1".x
+         ->  Append
+               ->  Subquery Scan on "*SELECT* 1"
+                     ->  Values Scan on "*VALUES*"
+               ->  Subquery Scan on "*SELECT* 2"
+                     ->  Values Scan on "*VALUES*_1"
+(8 rows)
+
+select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
+   x   
+-------
+ (1,3)
+(1 row)
+
 reset enable_hashagg;
 --
 -- Mixed types
diff --git a/src/test/regress/expected/with.out b/src/test/regress/expected/with.out
index 457f3bf04f..1f984a9fa4 100644
--- a/src/test/regress/expected/with.out
+++ b/src/test/regress/expected/with.out
@@ -49,6 +49,15 @@ SELECT * FROM t;
  5
 (5 rows)
 
+-- UNION DISTINCT requires hashable type
+WITH RECURSIVE t(n) AS (
+    VALUES (1::money)
+UNION
+    SELECT n+1::money FROM t WHERE n < 100::money
+)
+SELECT sum(n) FROM t;
+ERROR:  could not implement recursive UNION
+DETAIL:  All column datatypes must be hashable.
 -- recursive view
 CREATE RECURSIVE VIEW nums (n) AS
     VALUES (1)
@@ -616,6 +625,17 @@ select * from search_graph;
  2 | 3 | arc 2 -> 3 | f        | {"(1,4)","(4,5)","(5,1)","(1,2)","(2,3)"}
 (25 rows)
 
+-- UNION DISTINCT currently not supported here because row types not hashable
+with recursive search_graph(f, t, label, is_cycle, path) as (
+	select *, false, array[row(g.f, g.t)] from graph g
+	union distinct
+	select g.*, row(g.f, g.t) = any(path), path || row(g.f, g.t)
+	from graph g, search_graph sg
+	where g.f = sg.t and not is_cycle
+)
+select * from search_graph;
+ERROR:  could not implement recursive UNION
+DETAIL:  All column datatypes must be hashable.
 -- ordering by the path column has same effect as SEARCH DEPTH FIRST
 with recursive search_graph(f, t, label, is_cycle, path) as (
 	select *, false, array[row(g.f, g.t)] from graph g
diff --git a/src/test/regress/sql/hash_func.sql b/src/test/regress/sql/hash_func.sql
index a3e2decc2c..de84e68ba3 100644
--- a/src/test/regress/sql/hash_func.sql
+++ b/src/test/regress/sql/hash_func.sql
@@ -130,6 +130,12 @@
 WHERE  hash_array(v)::bit(32) != hash_array_extended(v, 0)::bit(32)
        OR hash_array(v)::bit(32) = hash_array_extended(v, 1)::bit(32);
 
+-- array hashing with non-hashable element type
+SELECT v as value, hash_array(v)::bit(32) as standard
+FROM   (VALUES ('{0}'::money[])) x(v);
+SELECT v as value, hash_array_extended(v, 0)::bit(32) as extended0
+FROM   (VALUES ('{0}'::money[])) x(v);
+
 SELECT v as value, hashbpchar(v)::bit(32) as standard,
        hashbpcharextended(v, 0)::bit(32) as extended0,
        hashbpcharextended(v, 1)::bit(32) as extended1
diff --git a/src/test/regress/sql/union.sql b/src/test/regress/sql/union.sql
index 5f4881d594..6cee454a4c 100644
--- a/src/test/regress/sql/union.sql
+++ b/src/test/regress/sql/union.sql
@@ -118,10 +118,16 @@
 (SELECT 1,2,3 UNION SELECT 4,5,6) EXCEPT SELECT 4,5,6;
 (SELECT 1,2,3 UNION SELECT 4,5,6 ORDER BY 1,2) EXCEPT SELECT 4,5,6;
 
--- exercise both hashed and sorted implementations of INTERSECT/EXCEPT
+-- exercise both hashed and sorted implementations of UNION/INTERSECT/EXCEPT
 
 set enable_hashagg to on;
 
+explain (costs off)
+select count(*) from
+  ( select unique1 from tenk1 union select fivethous from tenk1 ) ss;
+select count(*) from
+  ( select unique1 from tenk1 union select fivethous from tenk1 ) ss;
+
 explain (costs off)
 select count(*) from
   ( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss;
@@ -134,6 +140,12 @@
 
 set enable_hashagg to off;
 
+explain (costs off)
+select count(*) from
+  ( select unique1 from tenk1 union select fivethous from tenk1 ) ss;
+select count(*) from
+  ( select unique1 from tenk1 union select fivethous from tenk1 ) ss;
+
 explain (costs off)
 select count(*) from
   ( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss;
@@ -146,6 +158,84 @@
 
 reset enable_hashagg;
 
+-- non-hashable type
+set enable_hashagg to on;
+
+explain (costs off)
+select x from (values (100::money), (200::money)) _(x) union select x from (values (100::money), (300::money)) _(x);
+
+set enable_hashagg to off;
+
+explain (costs off)
+select x from (values (100::money), (200::money)) _(x) union select x from (values (100::money), (300::money)) _(x);
+
+reset enable_hashagg;
+
+-- arrays
+set enable_hashagg to on;
+
+explain (costs off)
+select x from (values (array[1, 2]), (array[1, 3])) _(x) union select x from (values (array[1, 2]), (array[1, 4])) _(x);
+select x from (values (array[1, 2]), (array[1, 3])) _(x) union select x from (values (array[1, 2]), (array[1, 4])) _(x);
+explain (costs off)
+select x from (values (array[1, 2]), (array[1, 3])) _(x) intersect select x from (values (array[1, 2]), (array[1, 4])) _(x);
+select x from (values (array[1, 2]), (array[1, 3])) _(x) intersect select x from (values (array[1, 2]), (array[1, 4])) _(x);
+explain (costs off)
+select x from (values (array[1, 2]), (array[1, 3])) _(x) except select x from (values (array[1, 2]), (array[1, 4])) _(x);
+select x from (values (array[1, 2]), (array[1, 3])) _(x) except select x from (values (array[1, 2]), (array[1, 4])) _(x);
+
+-- non-hashable type
+explain (costs off)
+select x from (values (array[100::money]), (array[200::money])) _(x) union select x from (values (array[100::money]), (array[300::money])) _(x);
+select x from (values (array[100::money]), (array[200::money])) _(x) union select x from (values (array[100::money]), (array[300::money])) _(x);
+
+set enable_hashagg to off;
+
+explain (costs off)
+select x from (values (array[1, 2]), (array[1, 3])) _(x) union select x from (values (array[1, 2]), (array[1, 4])) _(x);
+select x from (values (array[1, 2]), (array[1, 3])) _(x) union select x from (values (array[1, 2]), (array[1, 4])) _(x);
+explain (costs off)
+select x from (values (array[1, 2]), (array[1, 3])) _(x) intersect select x from (values (array[1, 2]), (array[1, 4])) _(x);
+select x from (values (array[1, 2]), (array[1, 3])) _(x) intersect select x from (values (array[1, 2]), (array[1, 4])) _(x);
+explain (costs off)
+select x from (values (array[1, 2]), (array[1, 3])) _(x) except select x from (values (array[1, 2]), (array[1, 4])) _(x);
+select x from (values (array[1, 2]), (array[1, 3])) _(x) except select x from (values (array[1, 2]), (array[1, 4])) _(x);
+
+reset enable_hashagg;
+
+-- records
+set enable_hashagg to on;
+
+-- currently no hashing support for record, so these will still run with sort plans:
+explain (costs off)
+select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x);
+select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x);
+explain (costs off)
+select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
+select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
+explain (costs off)
+select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
+select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
+
+-- non-hashable type
+explain (costs off)
+select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x);
+select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x);
+
+set enable_hashagg to off;
+
+explain (costs off)
+select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x);
+select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x);
+explain (costs off)
+select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
+select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
+explain (costs off)
+select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
+select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
+
+reset enable_hashagg;
+
 --
 -- Mixed types
 --
diff --git a/src/test/regress/sql/with.sql b/src/test/regress/sql/with.sql
index 2eea297a71..c6ce01a2d1 100644
--- a/src/test/regress/sql/with.sql
+++ b/src/test/regress/sql/with.sql
@@ -31,6 +31,14 @@
 )
 SELECT * FROM t;
 
+-- UNION DISTINCT requires hashable type
+WITH RECURSIVE t(n) AS (
+    VALUES (1::money)
+UNION
+    SELECT n+1::money FROM t WHERE n < 100::money
+)
+SELECT sum(n) FROM t;
+
 -- recursive view
 CREATE RECURSIVE VIEW nums (n) AS
     VALUES (1)
@@ -317,6 +325,16 @@ CREATE TEMPORARY TABLE tree(
 )
 select * from search_graph;
 
+-- UNION DISTINCT currently not supported here because row types not hashable
+with recursive search_graph(f, t, label, is_cycle, path) as (
+	select *, false, array[row(g.f, g.t)] from graph g
+	union distinct
+	select g.*, row(g.f, g.t) = any(path), path || row(g.f, g.t)
+	from graph g, search_graph sg
+	where g.f = sg.t and not is_cycle
+)
+select * from search_graph;
+
 -- ordering by the path column has same effect as SEARCH DEPTH FIRST
 with recursive search_graph(f, t, label, is_cycle, path) as (
 	select *, false, array[row(g.f, g.t)] from graph g
-- 
2.29.2

Tom Lane

tgl@sss.pgh.pa.us

about 5 years ago

In reply to: Peter Eisentraut (#8)

Re: Hash support for row types

Peter Eisentraut <peter.eisentraut@2ndquadrant.com> writes:

I wrote a new patch to add a lot more tests around hash-based plans.
This is intended to apply separately from the other patch, and the other
patch would then "flip" some of the test cases.

OK, that addresses my concerns.

regards, tom lane

#10

Peter Eisentraut

peter.eisentraut@2ndquadrant.com

about 5 years ago

In reply to: Tom Lane (#9)

Re: Hash support for row types

On 2020-11-17 20:33, Tom Lane wrote:

Peter Eisentraut <peter.eisentraut@2ndquadrant.com> writes:

I wrote a new patch to add a lot more tests around hash-based plans.
This is intended to apply separately from the other patch, and the other
patch would then "flip" some of the test cases.

OK, that addresses my concerns.

Thanks. I have committed the tests and then subsequently the feature patch.

--
Peter Eisentraut
2ndQuadrant, an EDB company
https://www.2ndquadrant.com/