[PATCH] ltree hash functions
Hi All,
I've written a patch to add hash functions for the ltree extension. It adds
support for hash indexes and hash aggregation. I've reused the existing
logic that's used to hash arrays and added tests that mirror elsewhere
(i.e. hstore and hash_func regression tests).
The patch doesn't currently support hash joins as the ltree = operator was
created without support for it. The ALTER OPERATOR command doesn't support
changing the hash join support, so I'm not sure what the best strategy to
change it is. Is it ok to update the operator's row in the pg_operator
system catalog or is there a better way to change this that someone could
recommend?
Any comments on the overall approach or other feedback would be appreciated.
Thanks,
Tommy
Attachments:
ltree-hash-v1.patchapplication/octet-stream; name=ltree-hash-v1.patchDownload
diff --git a/contrib/ltree/Makefile b/contrib/ltree/Makefile
index 770769a730..f50f2c9470 100644
--- a/contrib/ltree/Makefile
+++ b/contrib/ltree/Makefile
@@ -14,7 +14,7 @@ OBJS = \
ltxtquery_op.o
EXTENSION = ltree
-DATA = ltree--1.1--1.2.sql ltree--1.1.sql ltree--1.0--1.1.sql
+DATA = ltree--1.2--1.3.sql ltree--1.1--1.2.sql ltree--1.1.sql ltree--1.0--1.1.sql
PGFILEDESC = "ltree - hierarchical label data type"
HEADERS = ltree.h
diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index 984cd030cf..b0f99afea1 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -1433,8 +1433,27 @@ SELECT '{j.k.l.m, g.b.c.d.e}'::ltree[] ?~ 'A*@|g.b.c.d.e';
g.b.c.d.e
(1 row)
+-- Check that the ltree_hash() and ltree_hash_extended() function's lower
+-- 32 bits match when the seed is 0 and do not match when the seed != 0
+SELECT v as value, ltree_hash(v)::bit(32) as standard,
+ ltree_hash_extended(v, 0)::bit(32) as extended0,
+ ltree_hash_extended(v, 1)::bit(32) as extended1
+FROM (VALUES (NULL::ltree), (''::ltree), ('0'::ltree), ('0.1'::ltree),
+ ('0.1.2'::ltree), ('0'::ltree), ('0_asd.1_ASD'::ltree)) x(v)
+WHERE ltree_hash(v)::bit(32) != ltree_hash_extended(v, 0)::bit(32)
+ OR ltree_hash(v)::bit(32) = ltree_hash_extended(v, 1)::bit(32);
+ value | standard | extended0 | extended1
+-------+----------+-----------+-----------
+(0 rows)
+
CREATE TABLE ltreetest (t ltree);
\copy ltreetest FROM 'data/ltree.data'
+SELECT count(*) from ltreetest;
+ count
+-------
+ 1006
+(1 row)
+
SELECT * FROM ltreetest WHERE t < '12.3' order by t asc;
t
----------------------------------
@@ -7832,6 +7851,26 @@ SELECT * FROM ltreetest WHERE t ? '{23.*.1,23.*.2}' order by t asc;
23.3.32.21.5.14.10.17.1
(4 rows)
+drop index tstidx;
+create index tstidx on ltreetest using hash (t);
+set enable_seqscan=off;
+SELECT * FROM ltreetest WHERE t = '12.3' order by t asc;
+ t
+------
+ 12.3
+(1 row)
+
+set enable_hashagg = true;
+set enable_sort = false;
+SELECT count(*) FROM (
+SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t
+) t2;
+ count
+-------
+ 1006
+(1 row)
+
+set enable_sort = true;
drop index tstidx;
create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=0));
ERROR: value 0 out of bounds for option "siglen"
diff --git a/contrib/ltree/ltree--1.2--1.3.sql b/contrib/ltree/ltree--1.2--1.3.sql
new file mode 100644
index 0000000000..d9111551dd
--- /dev/null
+++ b/contrib/ltree/ltree--1.2--1.3.sql
@@ -0,0 +1,21 @@
+/* contrib/ltree/ltree--1.2--1.3.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION ltree UPDATE TO '1.3'" to load this file. \quit
+
+CREATE FUNCTION ltree_hash(ltree)
+RETURNS integer
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
+
+CREATE FUNCTION ltree_hash_extended(ltree, bigint)
+RETURNS bigint
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
+
+CREATE OPERATOR CLASS hash_ltree_ops
+DEFAULT FOR TYPE ltree USING hash
+AS
+ OPERATOR 1 = ,
+ FUNCTION 1 ltree_hash(ltree),
+ FUNCTION 2 ltree_hash_extended(ltree, bigint);
diff --git a/contrib/ltree/ltree.control b/contrib/ltree/ltree.control
index b408d64781..c2cbeda96c 100644
--- a/contrib/ltree/ltree.control
+++ b/contrib/ltree/ltree.control
@@ -1,6 +1,6 @@
# ltree extension
comment = 'data type for hierarchical tree-like structures'
-default_version = '1.2'
+default_version = '1.3'
module_pathname = '$libdir/ltree'
relocatable = true
trusted = true
diff --git a/contrib/ltree/ltree_op.c b/contrib/ltree/ltree_op.c
index da1db5fcd2..4199c70f7f 100644
--- a/contrib/ltree/ltree_op.c
+++ b/contrib/ltree/ltree_op.c
@@ -9,6 +9,7 @@
#include "access/htup_details.h"
#include "catalog/pg_statistic.h"
+#include "common/hashfn.h"
#include "ltree.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
@@ -37,6 +38,8 @@ PG_FUNCTION_INFO_V1(lca);
PG_FUNCTION_INFO_V1(ltree2text);
PG_FUNCTION_INFO_V1(text2ltree);
PG_FUNCTION_INFO_V1(ltreeparentsel);
+PG_FUNCTION_INFO_V1(ltree_hash);
+PG_FUNCTION_INFO_V1(ltree_hash_extended);
int
ltree_compare(const ltree *a, const ltree *b)
@@ -588,3 +591,68 @@ ltreeparentsel(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8((float8) selec);
}
+
+/*
+ * Hashes the elements of the path using the same logic as hash_array
+ */
+Datum
+ltree_hash(PG_FUNCTION_ARGS)
+{
+ ltree *a = PG_GETARG_LTREE_P(0);
+
+ uint32 result = 1;
+ int an = a->numlevel;
+ ltree_level *al = LTREE_FIRST(a);
+
+ while (an > 0)
+ {
+ uint32 levelHash = hash_any((unsigned char *) al->name, al->len);
+
+ result = (result << 5) - result + levelHash;
+
+ an--;
+ al = LEVEL_NEXT(al);
+ }
+
+ PG_FREE_IF_COPY(a, 0);
+ PG_RETURN_UINT32(result);
+}
+
+
+/*
+ * Hashes the elements of the path using the same logic as hash_array_extended
+ * (and ltree_hash). It differs from hash_array_extended only in how it handles
+ * zero length label paths. We return 1 + seed to ensure that the low 32 bits
+ * of the result match ltree_hash when the seed is 0, as required by the hash
+ * index support functions, but to also return a different value when there is
+ * a seed.
+ */
+Datum
+ltree_hash_extended(PG_FUNCTION_ARGS)
+{
+ ltree *a = PG_GETARG_LTREE_P(0);
+ const uint64 seed = PG_GETARG_INT64(1);
+
+ uint64 result = 1;
+ int an = a->numlevel;
+ ltree_level *al = LTREE_FIRST(a);
+
+ if (an == 0)
+ {
+ PG_FREE_IF_COPY(a, 0);
+ PG_RETURN_UINT64(result + seed);
+ }
+
+ while (an > 0)
+ {
+ uint64 levelHash = hash_any_extended((unsigned char *) al->name, al->len, seed);
+
+ result = (result << 5) - result + levelHash;
+
+ an--;
+ al = LEVEL_NEXT(al);
+ }
+
+ PG_FREE_IF_COPY(a, 0);
+ PG_RETURN_UINT64(result);
+}
diff --git a/contrib/ltree/ltreetest.sql b/contrib/ltree/ltreetest.sql
index d6996caf3c..388d5bb6f5 100644
--- a/contrib/ltree/ltreetest.sql
+++ b/contrib/ltree/ltreetest.sql
@@ -19,3 +19,4 @@ INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Galaxies');
INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Astronauts');
CREATE INDEX path_gist_idx ON test USING gist(path);
CREATE INDEX path_idx ON test USING btree(path);
+CREATE INDEX path_hash_idx ON test USING hash(path);
diff --git a/contrib/ltree/sql/ltree.sql b/contrib/ltree/sql/ltree.sql
index 402096f6c4..fa0b9dff91 100644
--- a/contrib/ltree/sql/ltree.sql
+++ b/contrib/ltree/sql/ltree.sql
@@ -282,9 +282,22 @@ SELECT ('{3456,1.2.3.4}'::ltree[] ?<@ '1.2.5') is null;
SELECT '{ltree.asd, tree.awdfg}'::ltree[] ?@ 'tree & aWdfg@'::ltxtquery;
SELECT '{j.k.l.m, g.b.c.d.e}'::ltree[] ?~ 'A*@|g.b.c.d.e';
+-- Check that the ltree_hash() and ltree_hash_extended() function's lower
+-- 32 bits match when the seed is 0 and do not match when the seed != 0
+SELECT v as value, ltree_hash(v)::bit(32) as standard,
+ ltree_hash_extended(v, 0)::bit(32) as extended0,
+ ltree_hash_extended(v, 1)::bit(32) as extended1
+FROM (VALUES (NULL::ltree), (''::ltree), ('0'::ltree), ('0.1'::ltree),
+ ('0.1.2'::ltree), ('0'::ltree), ('0_asd.1_ASD'::ltree)) x(v)
+WHERE ltree_hash(v)::bit(32) != ltree_hash_extended(v, 0)::bit(32)
+ OR ltree_hash(v)::bit(32) = ltree_hash_extended(v, 1)::bit(32);
+
+
CREATE TABLE ltreetest (t ltree);
\copy ltreetest FROM 'data/ltree.data'
+SELECT count(*) from ltreetest;
+
SELECT * FROM ltreetest WHERE t < '12.3' order by t asc;
SELECT * FROM ltreetest WHERE t <= '12.3' order by t asc;
SELECT * FROM ltreetest WHERE t = '12.3' order by t asc;
@@ -328,6 +341,20 @@ SELECT * FROM ltreetest WHERE t ~ '23.*.1' order by t asc;
SELECT * FROM ltreetest WHERE t ~ '23.*.2' order by t asc;
SELECT * FROM ltreetest WHERE t ? '{23.*.1,23.*.2}' order by t asc;
+drop index tstidx;
+create index tstidx on ltreetest using hash (t);
+set enable_seqscan=off;
+
+SELECT * FROM ltreetest WHERE t = '12.3' order by t asc;
+
+set enable_hashagg = true;
+set enable_sort = false;
+
+SELECT count(*) FROM (
+SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t
+) t2;
+set enable_sort = true;
+
drop index tstidx;
create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=0));
create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=2025));
diff --git a/doc/src/sgml/ltree.sgml b/doc/src/sgml/ltree.sgml
index 00a6ae70da..5c109d2d5f 100644
--- a/doc/src/sgml/ltree.sgml
+++ b/doc/src/sgml/ltree.sgml
@@ -623,6 +623,13 @@ Europe & Russia*@ & !Transportation
<literal>>=</literal>, <literal>></literal>
</para>
</listitem>
+ <listitem>
+ <para>
+ Hash index over <type>ltree</type>:
+ <literal>=</literal>
+ </para>
+ </listitem>
+
<listitem>
<para>
GiST index over <type>ltree</type> (<literal>gist_ltree_ops</literal>
@@ -712,6 +719,7 @@ INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Galaxies');
INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Astronauts');
CREATE INDEX path_gist_idx ON test USING GIST (path);
CREATE INDEX path_idx ON test USING BTREE (path);
+CREATE INDEX path_hash_idx ON test USING HASH (path);
</programlisting>
<para>
Hi,
I've created a CF entry for the patch:
https://commitfest.postgresql.org/43/4375/
I only briefly skimmed the code, so a couple comments.
On 6/17/23 17:45, Tommy Pavlicek wrote:
Hi All,
I've written a patch to add hash functions for the ltree extension. It
adds support for hash indexes and hash aggregation. I've reused the
existing logic that's used to hash arrays and added tests that mirror
elsewhere (i.e. hstore and hash_func regression tests).
Reusing code/logic is the right approach, IMHO.
The patch doesn't currently support hash joins as the ltree = operator
was created without support for it. The ALTER OPERATOR command doesn't
support changing the hash join support, so I'm not sure what the best
strategy to change it is. Is it ok to update the operator's row in the
pg_operator system catalog or is there a better way to change this that
someone could recommend?
I guess the "correct" solution would be to extend ALTER OPERATOR. I
wonder why it's not supported - it's clearly an intentional decision
(per comment in AlterOperator). So what might break if this changes for
an existing operator?
FWIW the CREATE OPERATOR documentation only talks about hash joins for
HASHES, maybe it should be updated to also mention hash aggregates?
Any comments on the overall approach or other feedback would be appreciated.
I wonder what's the use case for this. I wonder how often people join on
ltree, for example. Did you just notice ltree can't hash and decided to
fix that, or do you have a practical use case / need for this feature?
regards
--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
Tomas Vondra <tomas.vondra@enterprisedb.com> writes:
I guess the "correct" solution would be to extend ALTER OPERATOR. I
wonder why it's not supported - it's clearly an intentional decision
(per comment in AlterOperator). So what might break if this changes for
an existing operator?
This code was added by commit 321eed5f0. The thread leading up to
that commit is here:
/messages/by-id/3348985.V7xMLFDaJO@dinodell
There are some nontrivial concerns in there about breaking the
semantics of existing exclusion constraints, for instance. I think
we mostly rejected the concern about invalidation of cached plans
as already-covered, but that wasn't the only problem.
However, I think we could largely ignore the issues if we restricted
ALTER OPERATOR to only add commutator, negator, hashes, or merges
properties to operators that lacked them before --- which'd be the
primary if not only use-case anyway. That direction can't break
anything.
regards, tom lane
On 6/17/23 20:19, Tom Lane wrote:
Tomas Vondra <tomas.vondra@enterprisedb.com> writes:
I guess the "correct" solution would be to extend ALTER OPERATOR. I
wonder why it's not supported - it's clearly an intentional decision
(per comment in AlterOperator). So what might break if this changes for
an existing operator?This code was added by commit 321eed5f0. The thread leading up to
that commit is here:/messages/by-id/3348985.V7xMLFDaJO@dinodell
There are some nontrivial concerns in there about breaking the
semantics of existing exclusion constraints, for instance. I think
we mostly rejected the concern about invalidation of cached plans
as already-covered, but that wasn't the only problem.However, I think we could largely ignore the issues if we restricted
ALTER OPERATOR to only add commutator, negator, hashes, or merges
properties to operators that lacked them before --- which'd be the
primary if not only use-case anyway. That direction can't break
anything.
Sound reasonable.
Tommy, are you interested in extending ALTER OPERATOR to allow this,
which would also allow fixing the ltree operator?
regards
--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
FWIW the CREATE OPERATOR documentation only talks about hash joins for
HASHES, maybe it should be updated to also mention hash aggregates?
I think I might have been a bit unclear here, the hash aggregate does work
without altering the operator so it's just the join that's blocked. Sorry
about the confusion.
I wonder what's the use case for this. I wonder how often people join on
ltree, for example. Did you just notice ltree can't hash and decided to
fix that, or do you have a practical use case / need for this feature?
I mostly want to add hash indexes. Beyond selecting specific values, you
can use them to get ancestors (trim the path and do an exact select) and
descendents (using a functional index calculating the parent path for each
row). For example, I've found it can be faster to calculate the path of
every ancestor and use select ltree path = ANY([ancestor paths]) compared
to using a gist index. It's not ideal, but unfortunately I've found that
with enough rows, gist indexes get very large and slow. Btree indexes are
better, but for ltree they can still be up to around 10x bigger than a hash
index. I've also seen ltree hash indexes outperform btree indexes in very
large tables, but I suspect in most cases they'll be similar.
Tommy, are you interested in extending ALTER OPERATOR to allow this,
which would also allow fixing the ltree operator?
Yes, I can do that. I took a look over the code and email thread and it
seems like it should be relatively straight forward. I'll put a patch
together for that and then update this patch to alter the operator.
On Sat, Jun 17, 2023 at 9:57 PM Tomas Vondra <tomas.vondra@enterprisedb.com>
wrote:
Show quoted text
On 6/17/23 20:19, Tom Lane wrote:
Tomas Vondra <tomas.vondra@enterprisedb.com> writes:
I guess the "correct" solution would be to extend ALTER OPERATOR. I
wonder why it's not supported - it's clearly an intentional decision
(per comment in AlterOperator). So what might break if this changes for
an existing operator?This code was added by commit 321eed5f0. The thread leading up to
that commit is here:/messages/by-id/3348985.V7xMLFDaJO@dinodell
There are some nontrivial concerns in there about breaking the
semantics of existing exclusion constraints, for instance. I think
we mostly rejected the concern about invalidation of cached plans
as already-covered, but that wasn't the only problem.However, I think we could largely ignore the issues if we restricted
ALTER OPERATOR to only add commutator, negator, hashes, or merges
properties to operators that lacked them before --- which'd be the
primary if not only use-case anyway. That direction can't break
anything.Sound reasonable.
Tommy, are you interested in extending ALTER OPERATOR to allow this,
which would also allow fixing the ltree operator?regards
--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
On 19 Jun 2023, at 11:18, Tommy Pavlicek <tommypav122@gmail.com> wrote:
Tommy, are you interested in extending ALTER OPERATOR to allow this,
which would also allow fixing the ltree operator?Yes, I can do that. I took a look over the code and email thread and it seems like it should be relatively straight forward. I'll put a patch together for that and then update this patch to alter the operator.
Did you have a chance to look at this for an updated patch for this commitfest?
--
Daniel Gustafsson
On Thu, Jul 6, 2023 at 2:18 AM Daniel Gustafsson <daniel@yesql.se> wrote:
On 19 Jun 2023, at 11:18, Tommy Pavlicek <tommypav122@gmail.com> wrote:
Tommy, are you interested in extending ALTER OPERATOR to allow this,
which would also allow fixing the ltree operator?Yes, I can do that. I took a look over the code and email thread and it seems like it should be relatively straight forward. I'll put a patch together for that and then update this patch to alter the operator.
Did you have a chance to look at this for an updated patch for this commitfest?
I finally had a chance to look at this and I've updated the patch to
alter the = operator to enable hash joins.
This is ready to be looked at now.
Is there anything I need to do to move this forward?
Cheers,
Tommy
Attachments:
0001-ltree-hash-v2.patchapplication/octet-stream; name=0001-ltree-hash-v2.patchDownload
diff --git a/contrib/ltree/Makefile b/contrib/ltree/Makefile
index 770769a730..f50f2c9470 100644
--- a/contrib/ltree/Makefile
+++ b/contrib/ltree/Makefile
@@ -14,7 +14,7 @@ OBJS = \
ltxtquery_op.o
EXTENSION = ltree
-DATA = ltree--1.1--1.2.sql ltree--1.1.sql ltree--1.0--1.1.sql
+DATA = ltree--1.2--1.3.sql ltree--1.1--1.2.sql ltree--1.1.sql ltree--1.0--1.1.sql
PGFILEDESC = "ltree - hierarchical label data type"
HEADERS = ltree.h
diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index 984cd030cf..b0f99afea1 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -1433,8 +1433,27 @@ SELECT '{j.k.l.m, g.b.c.d.e}'::ltree[] ?~ 'A*@|g.b.c.d.e';
g.b.c.d.e
(1 row)
+-- Check that the ltree_hash() and ltree_hash_extended() function's lower
+-- 32 bits match when the seed is 0 and do not match when the seed != 0
+SELECT v as value, ltree_hash(v)::bit(32) as standard,
+ ltree_hash_extended(v, 0)::bit(32) as extended0,
+ ltree_hash_extended(v, 1)::bit(32) as extended1
+FROM (VALUES (NULL::ltree), (''::ltree), ('0'::ltree), ('0.1'::ltree),
+ ('0.1.2'::ltree), ('0'::ltree), ('0_asd.1_ASD'::ltree)) x(v)
+WHERE ltree_hash(v)::bit(32) != ltree_hash_extended(v, 0)::bit(32)
+ OR ltree_hash(v)::bit(32) = ltree_hash_extended(v, 1)::bit(32);
+ value | standard | extended0 | extended1
+-------+----------+-----------+-----------
+(0 rows)
+
CREATE TABLE ltreetest (t ltree);
\copy ltreetest FROM 'data/ltree.data'
+SELECT count(*) from ltreetest;
+ count
+-------
+ 1006
+(1 row)
+
SELECT * FROM ltreetest WHERE t < '12.3' order by t asc;
t
----------------------------------
@@ -7832,6 +7851,26 @@ SELECT * FROM ltreetest WHERE t ? '{23.*.1,23.*.2}' order by t asc;
23.3.32.21.5.14.10.17.1
(4 rows)
+drop index tstidx;
+create index tstidx on ltreetest using hash (t);
+set enable_seqscan=off;
+SELECT * FROM ltreetest WHERE t = '12.3' order by t asc;
+ t
+------
+ 12.3
+(1 row)
+
+set enable_hashagg = true;
+set enable_sort = false;
+SELECT count(*) FROM (
+SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t
+) t2;
+ count
+-------
+ 1006
+(1 row)
+
+set enable_sort = true;
drop index tstidx;
create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=0));
ERROR: value 0 out of bounds for option "siglen"
diff --git a/contrib/ltree/ltree--1.2--1.3.sql b/contrib/ltree/ltree--1.2--1.3.sql
new file mode 100644
index 0000000000..8dd1b645c1
--- /dev/null
+++ b/contrib/ltree/ltree--1.2--1.3.sql
@@ -0,0 +1,23 @@
+/* contrib/ltree/ltree--1.2--1.3.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION ltree UPDATE TO '1.3'" to load this file. \quit
+
+CREATE FUNCTION ltree_hash(ltree)
+RETURNS integer
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
+
+CREATE FUNCTION ltree_hash_extended(ltree, bigint)
+RETURNS bigint
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
+
+CREATE OPERATOR CLASS hash_ltree_ops
+DEFAULT FOR TYPE ltree USING hash
+AS
+ OPERATOR 1 = ,
+ FUNCTION 1 ltree_hash(ltree),
+ FUNCTION 2 ltree_hash_extended(ltree, bigint);
+
+ALTER OPERATOR =(ltree, ltree) SET (HASHES);
diff --git a/contrib/ltree/ltree.control b/contrib/ltree/ltree.control
index b408d64781..c2cbeda96c 100644
--- a/contrib/ltree/ltree.control
+++ b/contrib/ltree/ltree.control
@@ -1,6 +1,6 @@
# ltree extension
comment = 'data type for hierarchical tree-like structures'
-default_version = '1.2'
+default_version = '1.3'
module_pathname = '$libdir/ltree'
relocatable = true
trusted = true
diff --git a/contrib/ltree/ltree_op.c b/contrib/ltree/ltree_op.c
index da1db5fcd2..4199c70f7f 100644
--- a/contrib/ltree/ltree_op.c
+++ b/contrib/ltree/ltree_op.c
@@ -9,6 +9,7 @@
#include "access/htup_details.h"
#include "catalog/pg_statistic.h"
+#include "common/hashfn.h"
#include "ltree.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
@@ -37,6 +38,8 @@ PG_FUNCTION_INFO_V1(lca);
PG_FUNCTION_INFO_V1(ltree2text);
PG_FUNCTION_INFO_V1(text2ltree);
PG_FUNCTION_INFO_V1(ltreeparentsel);
+PG_FUNCTION_INFO_V1(ltree_hash);
+PG_FUNCTION_INFO_V1(ltree_hash_extended);
int
ltree_compare(const ltree *a, const ltree *b)
@@ -588,3 +591,68 @@ ltreeparentsel(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8((float8) selec);
}
+
+/*
+ * Hashes the elements of the path using the same logic as hash_array
+ */
+Datum
+ltree_hash(PG_FUNCTION_ARGS)
+{
+ ltree *a = PG_GETARG_LTREE_P(0);
+
+ uint32 result = 1;
+ int an = a->numlevel;
+ ltree_level *al = LTREE_FIRST(a);
+
+ while (an > 0)
+ {
+ uint32 levelHash = hash_any((unsigned char *) al->name, al->len);
+
+ result = (result << 5) - result + levelHash;
+
+ an--;
+ al = LEVEL_NEXT(al);
+ }
+
+ PG_FREE_IF_COPY(a, 0);
+ PG_RETURN_UINT32(result);
+}
+
+
+/*
+ * Hashes the elements of the path using the same logic as hash_array_extended
+ * (and ltree_hash). It differs from hash_array_extended only in how it handles
+ * zero length label paths. We return 1 + seed to ensure that the low 32 bits
+ * of the result match ltree_hash when the seed is 0, as required by the hash
+ * index support functions, but to also return a different value when there is
+ * a seed.
+ */
+Datum
+ltree_hash_extended(PG_FUNCTION_ARGS)
+{
+ ltree *a = PG_GETARG_LTREE_P(0);
+ const uint64 seed = PG_GETARG_INT64(1);
+
+ uint64 result = 1;
+ int an = a->numlevel;
+ ltree_level *al = LTREE_FIRST(a);
+
+ if (an == 0)
+ {
+ PG_FREE_IF_COPY(a, 0);
+ PG_RETURN_UINT64(result + seed);
+ }
+
+ while (an > 0)
+ {
+ uint64 levelHash = hash_any_extended((unsigned char *) al->name, al->len, seed);
+
+ result = (result << 5) - result + levelHash;
+
+ an--;
+ al = LEVEL_NEXT(al);
+ }
+
+ PG_FREE_IF_COPY(a, 0);
+ PG_RETURN_UINT64(result);
+}
diff --git a/contrib/ltree/ltreetest.sql b/contrib/ltree/ltreetest.sql
index d6996caf3c..388d5bb6f5 100644
--- a/contrib/ltree/ltreetest.sql
+++ b/contrib/ltree/ltreetest.sql
@@ -19,3 +19,4 @@ INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Galaxies');
INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Astronauts');
CREATE INDEX path_gist_idx ON test USING gist(path);
CREATE INDEX path_idx ON test USING btree(path);
+CREATE INDEX path_hash_idx ON test USING hash(path);
diff --git a/contrib/ltree/sql/ltree.sql b/contrib/ltree/sql/ltree.sql
index 402096f6c4..fa0b9dff91 100644
--- a/contrib/ltree/sql/ltree.sql
+++ b/contrib/ltree/sql/ltree.sql
@@ -282,9 +282,22 @@ SELECT ('{3456,1.2.3.4}'::ltree[] ?<@ '1.2.5') is null;
SELECT '{ltree.asd, tree.awdfg}'::ltree[] ?@ 'tree & aWdfg@'::ltxtquery;
SELECT '{j.k.l.m, g.b.c.d.e}'::ltree[] ?~ 'A*@|g.b.c.d.e';
+-- Check that the ltree_hash() and ltree_hash_extended() function's lower
+-- 32 bits match when the seed is 0 and do not match when the seed != 0
+SELECT v as value, ltree_hash(v)::bit(32) as standard,
+ ltree_hash_extended(v, 0)::bit(32) as extended0,
+ ltree_hash_extended(v, 1)::bit(32) as extended1
+FROM (VALUES (NULL::ltree), (''::ltree), ('0'::ltree), ('0.1'::ltree),
+ ('0.1.2'::ltree), ('0'::ltree), ('0_asd.1_ASD'::ltree)) x(v)
+WHERE ltree_hash(v)::bit(32) != ltree_hash_extended(v, 0)::bit(32)
+ OR ltree_hash(v)::bit(32) = ltree_hash_extended(v, 1)::bit(32);
+
+
CREATE TABLE ltreetest (t ltree);
\copy ltreetest FROM 'data/ltree.data'
+SELECT count(*) from ltreetest;
+
SELECT * FROM ltreetest WHERE t < '12.3' order by t asc;
SELECT * FROM ltreetest WHERE t <= '12.3' order by t asc;
SELECT * FROM ltreetest WHERE t = '12.3' order by t asc;
@@ -328,6 +341,20 @@ SELECT * FROM ltreetest WHERE t ~ '23.*.1' order by t asc;
SELECT * FROM ltreetest WHERE t ~ '23.*.2' order by t asc;
SELECT * FROM ltreetest WHERE t ? '{23.*.1,23.*.2}' order by t asc;
+drop index tstidx;
+create index tstidx on ltreetest using hash (t);
+set enable_seqscan=off;
+
+SELECT * FROM ltreetest WHERE t = '12.3' order by t asc;
+
+set enable_hashagg = true;
+set enable_sort = false;
+
+SELECT count(*) FROM (
+SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t
+) t2;
+set enable_sort = true;
+
drop index tstidx;
create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=0));
create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=2025));
diff --git a/doc/src/sgml/ltree.sgml b/doc/src/sgml/ltree.sgml
index 00a6ae70da..5c109d2d5f 100644
--- a/doc/src/sgml/ltree.sgml
+++ b/doc/src/sgml/ltree.sgml
@@ -623,6 +623,13 @@ Europe & Russia*@ & !Transportation
<literal>>=</literal>, <literal>></literal>
</para>
</listitem>
+ <listitem>
+ <para>
+ Hash index over <type>ltree</type>:
+ <literal>=</literal>
+ </para>
+ </listitem>
+
<listitem>
<para>
GiST index over <type>ltree</type> (<literal>gist_ltree_ops</literal>
@@ -712,6 +719,7 @@ INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Galaxies');
INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Astronauts');
CREATE INDEX path_gist_idx ON test USING GIST (path);
CREATE INDEX path_idx ON test USING BTREE (path);
+CREATE INDEX path_hash_idx ON test USING HASH (path);
</programlisting>
<para>
On Wed, Nov 29, 2023 at 6:09 AM Tommy Pavlicek <tommypav122@gmail.com> wrote:
On Thu, Jul 6, 2023 at 2:18 AM Daniel Gustafsson <daniel@yesql.se> wrote:
On 19 Jun 2023, at 11:18, Tommy Pavlicek <tommypav122@gmail.com> wrote:
Tommy, are you interested in extending ALTER OPERATOR to allow this,
which would also allow fixing the ltree operator?Yes, I can do that. I took a look over the code and email thread and it seems like it should be relatively straight forward. I'll put a patch together for that and then update this patch to alter the operator.
Did you have a chance to look at this for an updated patch for this commitfest?
I finally had a chance to look at this and I've updated the patch to
alter the = operator to enable hash joins.This is ready to be looked at now.
Is there anything I need to do to move this forward?
you only change Makefile, you also need to change contrib/ltree/meson.build?
+drop index tstidx;
+create index tstidx on ltreetest using hash (t);
+set enable_seqscan=off;
+
+SELECT * FROM ltreetest WHERE t = '12.3' order by t asc;
Do you need to use EXPLAIN to demo the index usage?
On Tue, Nov 28, 2023 at 7:38 PM jian he <jian.universality@gmail.com> wrote:
you only change Makefile, you also need to change contrib/ltree/meson.build?
Do you need to use EXPLAIN to demo the index usage?
Thanks! Yes, I missed the Meson build file. I added additional
commands with EXPLAIN (COSTS OFF) as I found in other places.
Patch updated for those comments (and a touch of cleanup in the tests) attached.
Attachments:
0001-ltree-hash-v3.patchapplication/octet-stream; name=0001-ltree-hash-v3.patchDownload
diff --git a/contrib/ltree/Makefile b/contrib/ltree/Makefile
index 770769a730..f50f2c9470 100644
--- a/contrib/ltree/Makefile
+++ b/contrib/ltree/Makefile
@@ -14,7 +14,7 @@ OBJS = \
ltxtquery_op.o
EXTENSION = ltree
-DATA = ltree--1.1--1.2.sql ltree--1.1.sql ltree--1.0--1.1.sql
+DATA = ltree--1.2--1.3.sql ltree--1.1--1.2.sql ltree--1.1.sql ltree--1.0--1.1.sql
PGFILEDESC = "ltree - hierarchical label data type"
HEADERS = ltree.h
diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index 984cd030cf..e0152e16fa 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -1433,8 +1433,27 @@ SELECT '{j.k.l.m, g.b.c.d.e}'::ltree[] ?~ 'A*@|g.b.c.d.e';
g.b.c.d.e
(1 row)
+-- Check that the ltree_hash() and ltree_hash_extended() function's lower
+-- 32 bits match when the seed is 0 and do not match when the seed != 0
+SELECT v as value, ltree_hash(v)::bit(32) as standard,
+ ltree_hash_extended(v, 0)::bit(32) as extended0,
+ ltree_hash_extended(v, 1)::bit(32) as extended1
+FROM (VALUES (NULL::ltree), (''::ltree), ('0'::ltree), ('0.1'::ltree),
+ ('0.1.2'::ltree), ('0'::ltree), ('0_asd.1_ASD'::ltree)) x(v)
+WHERE ltree_hash(v)::bit(32) != ltree_hash_extended(v, 0)::bit(32)
+ OR ltree_hash(v)::bit(32) = ltree_hash_extended(v, 1)::bit(32);
+ value | standard | extended0 | extended1
+-------+----------+-----------+-----------
+(0 rows)
+
CREATE TABLE ltreetest (t ltree);
\copy ltreetest FROM 'data/ltree.data'
+SELECT count(*) from ltreetest;
+ count
+-------
+ 1006
+(1 row)
+
SELECT * FROM ltreetest WHERE t < '12.3' order by t asc;
t
----------------------------------
@@ -7832,6 +7851,52 @@ SELECT * FROM ltreetest WHERE t ? '{23.*.1,23.*.2}' order by t asc;
23.3.32.21.5.14.10.17.1
(4 rows)
+drop index tstidx;
+--- test hash index
+create index tstidx on ltreetest using hash (t);
+set enable_seqscan=off;
+set enable_bitmapscan=off;
+EXPLAIN (COSTS OFF) SELECT * FROM ltreetest WHERE t = '12.3' order by t asc;
+ QUERY PLAN
+--------------------------------------
+ Index Scan using tstidx on ltreetest
+ Index Cond: (t = '12.3'::ltree)
+(2 rows)
+
+SELECT * FROM ltreetest WHERE t = '12.3' order by t asc;
+ t
+------
+ 12.3
+(1 row)
+
+set enable_seqscan=on;
+set enable_bitmapscan=on;
+-- test hash aggregate
+set enable_hashagg=on;
+set enable_sort=off;
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM (
+SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t
+) t2;
+ QUERY PLAN
+-----------------------------------------------------
+ Aggregate
+ -> HashAggregate
+ Group Key: ltreetest.t
+ -> Append
+ -> Seq Scan on ltreetest
+ -> Seq Scan on ltreetest ltreetest_1
+(6 rows)
+
+SELECT count(*) FROM (
+SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t
+) t2;
+ count
+-------
+ 1006
+(1 row)
+
+set enable_sort=on;
drop index tstidx;
create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=0));
ERROR: value 0 out of bounds for option "siglen"
diff --git a/contrib/ltree/ltree--1.2--1.3.sql b/contrib/ltree/ltree--1.2--1.3.sql
new file mode 100644
index 0000000000..8dd1b645c1
--- /dev/null
+++ b/contrib/ltree/ltree--1.2--1.3.sql
@@ -0,0 +1,23 @@
+/* contrib/ltree/ltree--1.2--1.3.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION ltree UPDATE TO '1.3'" to load this file. \quit
+
+CREATE FUNCTION ltree_hash(ltree)
+RETURNS integer
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
+
+CREATE FUNCTION ltree_hash_extended(ltree, bigint)
+RETURNS bigint
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
+
+CREATE OPERATOR CLASS hash_ltree_ops
+DEFAULT FOR TYPE ltree USING hash
+AS
+ OPERATOR 1 = ,
+ FUNCTION 1 ltree_hash(ltree),
+ FUNCTION 2 ltree_hash_extended(ltree, bigint);
+
+ALTER OPERATOR =(ltree, ltree) SET (HASHES);
diff --git a/contrib/ltree/ltree.control b/contrib/ltree/ltree.control
index b408d64781..c2cbeda96c 100644
--- a/contrib/ltree/ltree.control
+++ b/contrib/ltree/ltree.control
@@ -1,6 +1,6 @@
# ltree extension
comment = 'data type for hierarchical tree-like structures'
-default_version = '1.2'
+default_version = '1.3'
module_pathname = '$libdir/ltree'
relocatable = true
trusted = true
diff --git a/contrib/ltree/ltree_op.c b/contrib/ltree/ltree_op.c
index da1db5fcd2..4199c70f7f 100644
--- a/contrib/ltree/ltree_op.c
+++ b/contrib/ltree/ltree_op.c
@@ -9,6 +9,7 @@
#include "access/htup_details.h"
#include "catalog/pg_statistic.h"
+#include "common/hashfn.h"
#include "ltree.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
@@ -37,6 +38,8 @@ PG_FUNCTION_INFO_V1(lca);
PG_FUNCTION_INFO_V1(ltree2text);
PG_FUNCTION_INFO_V1(text2ltree);
PG_FUNCTION_INFO_V1(ltreeparentsel);
+PG_FUNCTION_INFO_V1(ltree_hash);
+PG_FUNCTION_INFO_V1(ltree_hash_extended);
int
ltree_compare(const ltree *a, const ltree *b)
@@ -588,3 +591,68 @@ ltreeparentsel(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8((float8) selec);
}
+
+/*
+ * Hashes the elements of the path using the same logic as hash_array
+ */
+Datum
+ltree_hash(PG_FUNCTION_ARGS)
+{
+ ltree *a = PG_GETARG_LTREE_P(0);
+
+ uint32 result = 1;
+ int an = a->numlevel;
+ ltree_level *al = LTREE_FIRST(a);
+
+ while (an > 0)
+ {
+ uint32 levelHash = hash_any((unsigned char *) al->name, al->len);
+
+ result = (result << 5) - result + levelHash;
+
+ an--;
+ al = LEVEL_NEXT(al);
+ }
+
+ PG_FREE_IF_COPY(a, 0);
+ PG_RETURN_UINT32(result);
+}
+
+
+/*
+ * Hashes the elements of the path using the same logic as hash_array_extended
+ * (and ltree_hash). It differs from hash_array_extended only in how it handles
+ * zero length label paths. We return 1 + seed to ensure that the low 32 bits
+ * of the result match ltree_hash when the seed is 0, as required by the hash
+ * index support functions, but to also return a different value when there is
+ * a seed.
+ */
+Datum
+ltree_hash_extended(PG_FUNCTION_ARGS)
+{
+ ltree *a = PG_GETARG_LTREE_P(0);
+ const uint64 seed = PG_GETARG_INT64(1);
+
+ uint64 result = 1;
+ int an = a->numlevel;
+ ltree_level *al = LTREE_FIRST(a);
+
+ if (an == 0)
+ {
+ PG_FREE_IF_COPY(a, 0);
+ PG_RETURN_UINT64(result + seed);
+ }
+
+ while (an > 0)
+ {
+ uint64 levelHash = hash_any_extended((unsigned char *) al->name, al->len, seed);
+
+ result = (result << 5) - result + levelHash;
+
+ an--;
+ al = LEVEL_NEXT(al);
+ }
+
+ PG_FREE_IF_COPY(a, 0);
+ PG_RETURN_UINT64(result);
+}
diff --git a/contrib/ltree/ltreetest.sql b/contrib/ltree/ltreetest.sql
index d6996caf3c..388d5bb6f5 100644
--- a/contrib/ltree/ltreetest.sql
+++ b/contrib/ltree/ltreetest.sql
@@ -19,3 +19,4 @@ INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Galaxies');
INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Astronauts');
CREATE INDEX path_gist_idx ON test USING gist(path);
CREATE INDEX path_idx ON test USING btree(path);
+CREATE INDEX path_hash_idx ON test USING hash(path);
diff --git a/contrib/ltree/meson.build b/contrib/ltree/meson.build
index 44d0337e6e..e09642a18a 100644
--- a/contrib/ltree/meson.build
+++ b/contrib/ltree/meson.build
@@ -32,6 +32,7 @@ install_data(
'ltree--1.0--1.1.sql',
'ltree--1.1--1.2.sql',
'ltree--1.1.sql',
+ 'ltree--1.2--1.3.sql',
kwargs: contrib_data_args,
)
diff --git a/contrib/ltree/sql/ltree.sql b/contrib/ltree/sql/ltree.sql
index 402096f6c4..33db07d977 100644
--- a/contrib/ltree/sql/ltree.sql
+++ b/contrib/ltree/sql/ltree.sql
@@ -282,9 +282,22 @@ SELECT ('{3456,1.2.3.4}'::ltree[] ?<@ '1.2.5') is null;
SELECT '{ltree.asd, tree.awdfg}'::ltree[] ?@ 'tree & aWdfg@'::ltxtquery;
SELECT '{j.k.l.m, g.b.c.d.e}'::ltree[] ?~ 'A*@|g.b.c.d.e';
+-- Check that the ltree_hash() and ltree_hash_extended() function's lower
+-- 32 bits match when the seed is 0 and do not match when the seed != 0
+SELECT v as value, ltree_hash(v)::bit(32) as standard,
+ ltree_hash_extended(v, 0)::bit(32) as extended0,
+ ltree_hash_extended(v, 1)::bit(32) as extended1
+FROM (VALUES (NULL::ltree), (''::ltree), ('0'::ltree), ('0.1'::ltree),
+ ('0.1.2'::ltree), ('0'::ltree), ('0_asd.1_ASD'::ltree)) x(v)
+WHERE ltree_hash(v)::bit(32) != ltree_hash_extended(v, 0)::bit(32)
+ OR ltree_hash(v)::bit(32) = ltree_hash_extended(v, 1)::bit(32);
+
+
CREATE TABLE ltreetest (t ltree);
\copy ltreetest FROM 'data/ltree.data'
+SELECT count(*) from ltreetest;
+
SELECT * FROM ltreetest WHERE t < '12.3' order by t asc;
SELECT * FROM ltreetest WHERE t <= '12.3' order by t asc;
SELECT * FROM ltreetest WHERE t = '12.3' order by t asc;
@@ -328,6 +341,36 @@ SELECT * FROM ltreetest WHERE t ~ '23.*.1' order by t asc;
SELECT * FROM ltreetest WHERE t ~ '23.*.2' order by t asc;
SELECT * FROM ltreetest WHERE t ? '{23.*.1,23.*.2}' order by t asc;
+drop index tstidx;
+
+--- test hash index
+
+create index tstidx on ltreetest using hash (t);
+set enable_seqscan=off;
+set enable_bitmapscan=off;
+
+EXPLAIN (COSTS OFF) SELECT * FROM ltreetest WHERE t = '12.3' order by t asc;
+SELECT * FROM ltreetest WHERE t = '12.3' order by t asc;
+
+set enable_seqscan=on;
+set enable_bitmapscan=on;
+
+-- test hash aggregate
+
+set enable_hashagg=on;
+set enable_sort=off;
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM (
+SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t
+) t2;
+
+SELECT count(*) FROM (
+SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t
+) t2;
+
+set enable_sort=on;
+
drop index tstidx;
create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=0));
create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=2025));
diff --git a/doc/src/sgml/ltree.sgml b/doc/src/sgml/ltree.sgml
index 00a6ae70da..5c109d2d5f 100644
--- a/doc/src/sgml/ltree.sgml
+++ b/doc/src/sgml/ltree.sgml
@@ -623,6 +623,13 @@ Europe & Russia*@ & !Transportation
<literal>>=</literal>, <literal>></literal>
</para>
</listitem>
+ <listitem>
+ <para>
+ Hash index over <type>ltree</type>:
+ <literal>=</literal>
+ </para>
+ </listitem>
+
<listitem>
<para>
GiST index over <type>ltree</type> (<literal>gist_ltree_ops</literal>
@@ -712,6 +719,7 @@ INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Galaxies');
INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Astronauts');
CREATE INDEX path_gist_idx ON test USING GIST (path);
CREATE INDEX path_idx ON test USING BTREE (path);
+CREATE INDEX path_hash_idx ON test USING HASH (path);
</programlisting>
<para>
On Fri, Dec 1, 2023 at 8:44 AM Tommy Pavlicek <tommypav122@gmail.com> wrote:
Patch updated for those comments (and a touch of cleanup in the tests) attached.
it would be a better name as hash_ltree than ltree_hash, similar logic
applies to ltree_hash_extended.
that would be the convention. see: https://stackoverflow.com/a/69650940/15603477
Other than that, it looks good.
Thanks.
I've attached the latest version that updates the naming in line with
the convention.
Show quoted text
On Mon, Dec 4, 2023 at 12:46 AM jian he <jian.universality@gmail.com> wrote:
On Fri, Dec 1, 2023 at 8:44 AM Tommy Pavlicek <tommypav122@gmail.com> wrote:
Patch updated for those comments (and a touch of cleanup in the tests) attached.
it would be a better name as hash_ltree than ltree_hash, similar logic
applies to ltree_hash_extended.
that would be the convention. see: https://stackoverflow.com/a/69650940/15603477Other than that, it looks good.
Attachments:
0001-ltree-hash-v4.patchapplication/octet-stream; name=0001-ltree-hash-v4.patchDownload
diff --git a/contrib/ltree/Makefile b/contrib/ltree/Makefile
index 770769a730..f50f2c9470 100644
--- a/contrib/ltree/Makefile
+++ b/contrib/ltree/Makefile
@@ -14,7 +14,7 @@ OBJS = \
ltxtquery_op.o
EXTENSION = ltree
-DATA = ltree--1.1--1.2.sql ltree--1.1.sql ltree--1.0--1.1.sql
+DATA = ltree--1.2--1.3.sql ltree--1.1--1.2.sql ltree--1.1.sql ltree--1.0--1.1.sql
PGFILEDESC = "ltree - hierarchical label data type"
HEADERS = ltree.h
diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index 984cd030cf..e8b091d8fe 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -1433,8 +1433,27 @@ SELECT '{j.k.l.m, g.b.c.d.e}'::ltree[] ?~ 'A*@|g.b.c.d.e';
g.b.c.d.e
(1 row)
+-- Check that the hash_ltree() and hash_ltree_extended() function's lower
+-- 32 bits match when the seed is 0 and do not match when the seed != 0
+SELECT v as value, hash_ltree(v)::bit(32) as standard,
+ hash_ltree_extended(v, 0)::bit(32) as extended0,
+ hash_ltree_extended(v, 1)::bit(32) as extended1
+FROM (VALUES (NULL::ltree), (''::ltree), ('0'::ltree), ('0.1'::ltree),
+ ('0.1.2'::ltree), ('0'::ltree), ('0_asd.1_ASD'::ltree)) x(v)
+WHERE hash_ltree(v)::bit(32) != hash_ltree_extended(v, 0)::bit(32)
+ OR hash_ltree(v)::bit(32) = hash_ltree_extended(v, 1)::bit(32);
+ value | standard | extended0 | extended1
+-------+----------+-----------+-----------
+(0 rows)
+
CREATE TABLE ltreetest (t ltree);
\copy ltreetest FROM 'data/ltree.data'
+SELECT count(*) from ltreetest;
+ count
+-------
+ 1006
+(1 row)
+
SELECT * FROM ltreetest WHERE t < '12.3' order by t asc;
t
----------------------------------
@@ -7832,6 +7851,52 @@ SELECT * FROM ltreetest WHERE t ? '{23.*.1,23.*.2}' order by t asc;
23.3.32.21.5.14.10.17.1
(4 rows)
+drop index tstidx;
+--- test hash index
+create index tstidx on ltreetest using hash (t);
+set enable_seqscan=off;
+set enable_bitmapscan=off;
+EXPLAIN (COSTS OFF) SELECT * FROM ltreetest WHERE t = '12.3' order by t asc;
+ QUERY PLAN
+--------------------------------------
+ Index Scan using tstidx on ltreetest
+ Index Cond: (t = '12.3'::ltree)
+(2 rows)
+
+SELECT * FROM ltreetest WHERE t = '12.3' order by t asc;
+ t
+------
+ 12.3
+(1 row)
+
+set enable_seqscan=on;
+set enable_bitmapscan=on;
+-- test hash aggregate
+set enable_hashagg=on;
+set enable_sort=off;
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM (
+SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t
+) t2;
+ QUERY PLAN
+-----------------------------------------------------
+ Aggregate
+ -> HashAggregate
+ Group Key: ltreetest.t
+ -> Append
+ -> Seq Scan on ltreetest
+ -> Seq Scan on ltreetest ltreetest_1
+(6 rows)
+
+SELECT count(*) FROM (
+SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t
+) t2;
+ count
+-------
+ 1006
+(1 row)
+
+set enable_sort=on;
drop index tstidx;
create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=0));
ERROR: value 0 out of bounds for option "siglen"
diff --git a/contrib/ltree/ltree--1.2--1.3.sql b/contrib/ltree/ltree--1.2--1.3.sql
new file mode 100644
index 0000000000..bc9a34dd59
--- /dev/null
+++ b/contrib/ltree/ltree--1.2--1.3.sql
@@ -0,0 +1,23 @@
+/* contrib/ltree/ltree--1.2--1.3.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION ltree UPDATE TO '1.3'" to load this file. \quit
+
+CREATE FUNCTION hash_ltree(ltree)
+RETURNS integer
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
+
+CREATE FUNCTION hash_ltree_extended(ltree, bigint)
+RETURNS bigint
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
+
+CREATE OPERATOR CLASS hash_ltree_ops
+DEFAULT FOR TYPE ltree USING hash
+AS
+ OPERATOR 1 = ,
+ FUNCTION 1 hash_ltree(ltree),
+ FUNCTION 2 hash_ltree_extended(ltree, bigint);
+
+ALTER OPERATOR =(ltree, ltree) SET (HASHES);
diff --git a/contrib/ltree/ltree.control b/contrib/ltree/ltree.control
index b408d64781..c2cbeda96c 100644
--- a/contrib/ltree/ltree.control
+++ b/contrib/ltree/ltree.control
@@ -1,6 +1,6 @@
# ltree extension
comment = 'data type for hierarchical tree-like structures'
-default_version = '1.2'
+default_version = '1.3'
module_pathname = '$libdir/ltree'
relocatable = true
trusted = true
diff --git a/contrib/ltree/ltree_op.c b/contrib/ltree/ltree_op.c
index da1db5fcd2..93043e1b91 100644
--- a/contrib/ltree/ltree_op.c
+++ b/contrib/ltree/ltree_op.c
@@ -9,6 +9,7 @@
#include "access/htup_details.h"
#include "catalog/pg_statistic.h"
+#include "common/hashfn.h"
#include "ltree.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
@@ -37,6 +38,8 @@ PG_FUNCTION_INFO_V1(lca);
PG_FUNCTION_INFO_V1(ltree2text);
PG_FUNCTION_INFO_V1(text2ltree);
PG_FUNCTION_INFO_V1(ltreeparentsel);
+PG_FUNCTION_INFO_V1(hash_ltree);
+PG_FUNCTION_INFO_V1(hash_ltree_extended);
int
ltree_compare(const ltree *a, const ltree *b)
@@ -588,3 +591,68 @@ ltreeparentsel(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8((float8) selec);
}
+
+/*
+ * Hashes the elements of the path using the same logic as hash_array
+ */
+Datum
+hash_ltree(PG_FUNCTION_ARGS)
+{
+ ltree *a = PG_GETARG_LTREE_P(0);
+
+ uint32 result = 1;
+ int an = a->numlevel;
+ ltree_level *al = LTREE_FIRST(a);
+
+ while (an > 0)
+ {
+ uint32 levelHash = hash_any((unsigned char *) al->name, al->len);
+
+ result = (result << 5) - result + levelHash;
+
+ an--;
+ al = LEVEL_NEXT(al);
+ }
+
+ PG_FREE_IF_COPY(a, 0);
+ PG_RETURN_UINT32(result);
+}
+
+
+/*
+ * Hashes the elements of the path using the same logic as hash_array_extended
+ * (and hash_ltree). It differs from hash_array_extended only in how it handles
+ * zero length label paths. We return 1 + seed to ensure that the low 32 bits
+ * of the result match hash_ltree when the seed is 0, as required by the hash
+ * index support functions, but to also return a different value when there is
+ * a seed.
+ */
+Datum
+hash_ltree_extended(PG_FUNCTION_ARGS)
+{
+ ltree *a = PG_GETARG_LTREE_P(0);
+ const uint64 seed = PG_GETARG_INT64(1);
+
+ uint64 result = 1;
+ int an = a->numlevel;
+ ltree_level *al = LTREE_FIRST(a);
+
+ if (an == 0)
+ {
+ PG_FREE_IF_COPY(a, 0);
+ PG_RETURN_UINT64(result + seed);
+ }
+
+ while (an > 0)
+ {
+ uint64 levelHash = hash_any_extended((unsigned char *) al->name, al->len, seed);
+
+ result = (result << 5) - result + levelHash;
+
+ an--;
+ al = LEVEL_NEXT(al);
+ }
+
+ PG_FREE_IF_COPY(a, 0);
+ PG_RETURN_UINT64(result);
+}
diff --git a/contrib/ltree/ltreetest.sql b/contrib/ltree/ltreetest.sql
index d6996caf3c..388d5bb6f5 100644
--- a/contrib/ltree/ltreetest.sql
+++ b/contrib/ltree/ltreetest.sql
@@ -19,3 +19,4 @@ INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Galaxies');
INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Astronauts');
CREATE INDEX path_gist_idx ON test USING gist(path);
CREATE INDEX path_idx ON test USING btree(path);
+CREATE INDEX path_hash_idx ON test USING hash(path);
diff --git a/contrib/ltree/meson.build b/contrib/ltree/meson.build
index 44d0337e6e..e09642a18a 100644
--- a/contrib/ltree/meson.build
+++ b/contrib/ltree/meson.build
@@ -32,6 +32,7 @@ install_data(
'ltree--1.0--1.1.sql',
'ltree--1.1--1.2.sql',
'ltree--1.1.sql',
+ 'ltree--1.2--1.3.sql',
kwargs: contrib_data_args,
)
diff --git a/contrib/ltree/sql/ltree.sql b/contrib/ltree/sql/ltree.sql
index 402096f6c4..d0f2b906a5 100644
--- a/contrib/ltree/sql/ltree.sql
+++ b/contrib/ltree/sql/ltree.sql
@@ -282,9 +282,22 @@ SELECT ('{3456,1.2.3.4}'::ltree[] ?<@ '1.2.5') is null;
SELECT '{ltree.asd, tree.awdfg}'::ltree[] ?@ 'tree & aWdfg@'::ltxtquery;
SELECT '{j.k.l.m, g.b.c.d.e}'::ltree[] ?~ 'A*@|g.b.c.d.e';
+-- Check that the hash_ltree() and hash_ltree_extended() function's lower
+-- 32 bits match when the seed is 0 and do not match when the seed != 0
+SELECT v as value, hash_ltree(v)::bit(32) as standard,
+ hash_ltree_extended(v, 0)::bit(32) as extended0,
+ hash_ltree_extended(v, 1)::bit(32) as extended1
+FROM (VALUES (NULL::ltree), (''::ltree), ('0'::ltree), ('0.1'::ltree),
+ ('0.1.2'::ltree), ('0'::ltree), ('0_asd.1_ASD'::ltree)) x(v)
+WHERE hash_ltree(v)::bit(32) != hash_ltree_extended(v, 0)::bit(32)
+ OR hash_ltree(v)::bit(32) = hash_ltree_extended(v, 1)::bit(32);
+
+
CREATE TABLE ltreetest (t ltree);
\copy ltreetest FROM 'data/ltree.data'
+SELECT count(*) from ltreetest;
+
SELECT * FROM ltreetest WHERE t < '12.3' order by t asc;
SELECT * FROM ltreetest WHERE t <= '12.3' order by t asc;
SELECT * FROM ltreetest WHERE t = '12.3' order by t asc;
@@ -328,6 +341,36 @@ SELECT * FROM ltreetest WHERE t ~ '23.*.1' order by t asc;
SELECT * FROM ltreetest WHERE t ~ '23.*.2' order by t asc;
SELECT * FROM ltreetest WHERE t ? '{23.*.1,23.*.2}' order by t asc;
+drop index tstidx;
+
+--- test hash index
+
+create index tstidx on ltreetest using hash (t);
+set enable_seqscan=off;
+set enable_bitmapscan=off;
+
+EXPLAIN (COSTS OFF) SELECT * FROM ltreetest WHERE t = '12.3' order by t asc;
+SELECT * FROM ltreetest WHERE t = '12.3' order by t asc;
+
+set enable_seqscan=on;
+set enable_bitmapscan=on;
+
+-- test hash aggregate
+
+set enable_hashagg=on;
+set enable_sort=off;
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM (
+SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t
+) t2;
+
+SELECT count(*) FROM (
+SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t
+) t2;
+
+set enable_sort=on;
+
drop index tstidx;
create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=0));
create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=2025));
diff --git a/doc/src/sgml/ltree.sgml b/doc/src/sgml/ltree.sgml
index 00a6ae70da..5c109d2d5f 100644
--- a/doc/src/sgml/ltree.sgml
+++ b/doc/src/sgml/ltree.sgml
@@ -623,6 +623,13 @@ Europe & Russia*@ & !Transportation
<literal>>=</literal>, <literal>></literal>
</para>
</listitem>
+ <listitem>
+ <para>
+ Hash index over <type>ltree</type>:
+ <literal>=</literal>
+ </para>
+ </listitem>
+
<listitem>
<para>
GiST index over <type>ltree</type> (<literal>gist_ltree_ops</literal>
@@ -712,6 +719,7 @@ INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Galaxies');
INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Astronauts');
CREATE INDEX path_gist_idx ON test USING GIST (path);
CREATE INDEX path_idx ON test USING BTREE (path);
+CREATE INDEX path_hash_idx ON test USING HASH (path);
</programlisting>
<para>
On Wed, 6 Dec 2023 at 04:08, Tommy Pavlicek <tommypav122@gmail.com> wrote:
Thanks.
I've attached the latest version that updates the naming in line with
the convention.On Mon, Dec 4, 2023 at 12:46 AM jian he <jian.universality@gmail.com> wrote:
On Fri, Dec 1, 2023 at 8:44 AM Tommy Pavlicek <tommypav122@gmail.com> wrote:
Patch updated for those comments (and a touch of cleanup in the tests) attached.
it would be a better name as hash_ltree than ltree_hash, similar logic
applies to ltree_hash_extended.
that would be the convention. see: https://stackoverflow.com/a/69650940/15603477Other than that, it looks good.
CFBot shows one of the test is failing as in [1]:
diff -U3 /tmp/cirrus-ci-build/contrib/ltree/expected/ltree.out
/tmp/cirrus-ci-build/build-32/testrun/ltree/regress/results/ltree.out
--- /tmp/cirrus-ci-build/contrib/ltree/expected/ltree.out 2024-01-31
15:18:42.893039599 +0000
+++ /tmp/cirrus-ci-build/build-32/testrun/ltree/regress/results/ltree.out
2024-01-31 15:23:25.309028749 +0000
@@ -1442,9 +1442,14 @@
('0.1.2'::ltree), ('0'::ltree), ('0_asd.1_ASD'::ltree)) x(v)
WHERE hash_ltree(v)::bit(32) != hash_ltree_extended(v, 0)::bit(32)
OR hash_ltree(v)::bit(32) = hash_ltree_extended(v, 1)::bit(32);
- value | standard | extended0 | extended1
--------+----------+-----------+-----------
-(0 rows)
+ value | standard |
extended0 | extended1
+-------------+----------------------------------+----------------------------------+----------------------------------
+ 0 | 10001010100010010000000000001011 |
01011001111001000100011001011011 | 01011001111001000100011010011111
+ 0.1 | 10100000111110001010110001001110 |
00111100100010001100110111010101 | 00111100100010001101100011010101
+ 0.1.2 | 01111000011100000101111101110100 |
10101110011101011000000011010111 | 10101110011101110010001111000011
+ 0 | 10001010100010010000000000001011 |
01011001111001000100011001011011 | 01011001111001000100011010011111
+ 0_asd.1_ASD | 01000010001010000000101001001101 |
00111100100010001100110111010101 | 00111100100010001101100011010101
+(5 rows)
Please post an updated version for the same.
Regards,
Vignesh
On Thu, Feb 1, 2024 at 11:11 PM vignesh C <vignesh21@gmail.com> wrote:
On Wed, 6 Dec 2023 at 04:08, Tommy Pavlicek <tommypav122@gmail.com> wrote:
Thanks.
I've attached the latest version that updates the naming in line with
the convention.On Mon, Dec 4, 2023 at 12:46 AM jian he <jian.universality@gmail.com> wrote:
On Fri, Dec 1, 2023 at 8:44 AM Tommy Pavlicek <tommypav122@gmail.com> wrote:
Patch updated for those comments (and a touch of cleanup in the tests) attached.
it would be a better name as hash_ltree than ltree_hash, similar logic
applies to ltree_hash_extended.
that would be the convention. see: https://stackoverflow.com/a/69650940/15603477Other than that, it looks good.
CFBot shows one of the test is failing as in [1]: diff -U3 /tmp/cirrus-ci-build/contrib/ltree/expected/ltree.out /tmp/cirrus-ci-build/build-32/testrun/ltree/regress/results/ltree.out --- /tmp/cirrus-ci-build/contrib/ltree/expected/ltree.out 2024-01-31 15:18:42.893039599 +0000 +++ /tmp/cirrus-ci-build/build-32/testrun/ltree/regress/results/ltree.out 2024-01-31 15:23:25.309028749 +0000 @@ -1442,9 +1442,14 @@ ('0.1.2'::ltree), ('0'::ltree), ('0_asd.1_ASD'::ltree)) x(v) WHERE hash_ltree(v)::bit(32) != hash_ltree_extended(v, 0)::bit(32) OR hash_ltree(v)::bit(32) = hash_ltree_extended(v, 1)::bit(32); - value | standard | extended0 | extended1 --------+----------+-----------+----------- -(0 rows) + value | standard | extended0 | extended1 +-------------+----------------------------------+----------------------------------+---------------------------------- + 0 | 10001010100010010000000000001011 | 01011001111001000100011001011011 | 01011001111001000100011010011111 + 0.1 | 10100000111110001010110001001110 | 00111100100010001100110111010101 | 00111100100010001101100011010101 + 0.1.2 | 01111000011100000101111101110100 | 10101110011101011000000011010111 | 10101110011101110010001111000011 + 0 | 10001010100010010000000000001011 | 01011001111001000100011001011011 | 01011001111001000100011010011111 + 0_asd.1_ASD | 01000010001010000000101001001101 | 00111100100010001100110111010101 | 00111100100010001101100011010101 +(5 rows)Please post an updated version for the same.
It only fails on Linux - Debian Bullseye - Meson.
I fixed the white space, named it v5.
I also made the following changes:
from
uint64 levelHash = hash_any_extended((unsigned char *) al->name, al->len, seed);
uint32 levelHash = hash_any((unsigned char *) al->name, al->len);
to
uint64 levelHash = DatumGetUInt64(hash_any_extended((unsigned char *)
al->name, al->len, seed));
uint32 levelHash = DatumGetUInt32(hash_any((unsigned char *) al->name,
al->len));
(these two line live in different functions)
I have some problems testing it locally, so I post the patch.
Attachments:
v5-0001-add-hash-functions-for-the-ltree-extension.patchapplication/x-patch; name=v5-0001-add-hash-functions-for-the-ltree-extension.patchDownload
From ffc0e3daea5e490d8b9e6b7a0345f780d88b5624 Mon Sep 17 00:00:00 2001
From: jian he <jian.universality@gmail.com>
Date: Fri, 2 Feb 2024 16:03:56 +0800
Subject: [PATCH v5 1/1] add hash functions for the ltree extension.
---
contrib/ltree/Makefile | 2 +-
contrib/ltree/expected/ltree.out | 65 +++++++++++++++++++++++++++++
contrib/ltree/ltree--1.2--1.3.sql | 23 +++++++++++
contrib/ltree/ltree.control | 2 +-
contrib/ltree/ltree_op.c | 68 +++++++++++++++++++++++++++++++
contrib/ltree/ltreetest.sql | 1 +
contrib/ltree/meson.build | 1 +
contrib/ltree/sql/ltree.sql | 43 +++++++++++++++++++
doc/src/sgml/ltree.sgml | 8 ++++
9 files changed, 211 insertions(+), 2 deletions(-)
create mode 100644 contrib/ltree/ltree--1.2--1.3.sql
diff --git a/contrib/ltree/Makefile b/contrib/ltree/Makefile
index 770769a7..f50f2c94 100644
--- a/contrib/ltree/Makefile
+++ b/contrib/ltree/Makefile
@@ -14,7 +14,7 @@ OBJS = \
ltxtquery_op.o
EXTENSION = ltree
-DATA = ltree--1.1--1.2.sql ltree--1.1.sql ltree--1.0--1.1.sql
+DATA = ltree--1.2--1.3.sql ltree--1.1--1.2.sql ltree--1.1.sql ltree--1.0--1.1.sql
PGFILEDESC = "ltree - hierarchical label data type"
HEADERS = ltree.h
diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index 984cd030..dd4f0fc1 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -1433,8 +1433,27 @@ SELECT '{j.k.l.m, g.b.c.d.e}'::ltree[] ?~ 'A*@|g.b.c.d.e';
g.b.c.d.e
(1 row)
+-- Check that the hash_ltree() and hash_ltree_extended() function's lower
+-- 32 bits match when the seed is 0 and do not match when the seed != 0
+SELECT v as value, hash_ltree(v)::bit(32) as standard,
+ hash_ltree_extended(v, 0)::bit(32) as extended0,
+ hash_ltree_extended(v, 1)::bit(32) as extended1
+FROM (VALUES (NULL::ltree), (''::ltree), ('0'::ltree), ('0.1'::ltree),
+ ('0.1.2'::ltree), ('0'::ltree), ('0_asd.1_ASD'::ltree)) x(v)
+WHERE hash_ltree(v)::bit(32) != hash_ltree_extended(v, 0)::bit(32)
+ OR hash_ltree(v)::bit(32) = hash_ltree_extended(v, 1)::bit(32);
+ value | standard | extended0 | extended1
+-------+----------+-----------+-----------
+(0 rows)
+
CREATE TABLE ltreetest (t ltree);
\copy ltreetest FROM 'data/ltree.data'
+SELECT count(*) from ltreetest;
+ count
+-------
+ 1006
+(1 row)
+
SELECT * FROM ltreetest WHERE t < '12.3' order by t asc;
t
----------------------------------
@@ -7832,6 +7851,52 @@ SELECT * FROM ltreetest WHERE t ? '{23.*.1,23.*.2}' order by t asc;
23.3.32.21.5.14.10.17.1
(4 rows)
+drop index tstidx;
+--- test hash index
+create index tstidx on ltreetest using hash (t);
+set enable_seqscan=off;
+set enable_bitmapscan=off;
+EXPLAIN (COSTS OFF) SELECT * FROM ltreetest WHERE t = '12.3' order by t asc;
+ QUERY PLAN
+--------------------------------------
+ Index Scan using tstidx on ltreetest
+ Index Cond: (t = '12.3'::ltree)
+(2 rows)
+
+SELECT * FROM ltreetest WHERE t = '12.3' order by t asc;
+ t
+------
+ 12.3
+(1 row)
+
+set enable_seqscan=on;
+set enable_bitmapscan=on;
+-- test hash aggregate
+set enable_hashagg=on;
+set enable_sort=off;
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM (
+SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t
+) t2;
+ QUERY PLAN
+-----------------------------------------------------
+ Aggregate
+ -> HashAggregate
+ Group Key: ltreetest.t
+ -> Append
+ -> Seq Scan on ltreetest
+ -> Seq Scan on ltreetest ltreetest_1
+(6 rows)
+
+SELECT count(*) FROM (
+SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t
+) t2;
+ count
+-------
+ 1006
+(1 row)
+
+set enable_sort=on;
drop index tstidx;
create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=0));
ERROR: value 0 out of bounds for option "siglen"
diff --git a/contrib/ltree/ltree--1.2--1.3.sql b/contrib/ltree/ltree--1.2--1.3.sql
new file mode 100644
index 00000000..bc9a34dd
--- /dev/null
+++ b/contrib/ltree/ltree--1.2--1.3.sql
@@ -0,0 +1,23 @@
+/* contrib/ltree/ltree--1.2--1.3.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION ltree UPDATE TO '1.3'" to load this file. \quit
+
+CREATE FUNCTION hash_ltree(ltree)
+RETURNS integer
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
+
+CREATE FUNCTION hash_ltree_extended(ltree, bigint)
+RETURNS bigint
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
+
+CREATE OPERATOR CLASS hash_ltree_ops
+DEFAULT FOR TYPE ltree USING hash
+AS
+ OPERATOR 1 = ,
+ FUNCTION 1 hash_ltree(ltree),
+ FUNCTION 2 hash_ltree_extended(ltree, bigint);
+
+ALTER OPERATOR =(ltree, ltree) SET (HASHES);
diff --git a/contrib/ltree/ltree.control b/contrib/ltree/ltree.control
index b408d647..c2cbeda9 100644
--- a/contrib/ltree/ltree.control
+++ b/contrib/ltree/ltree.control
@@ -1,6 +1,6 @@
# ltree extension
comment = 'data type for hierarchical tree-like structures'
-default_version = '1.2'
+default_version = '1.3'
module_pathname = '$libdir/ltree'
relocatable = true
trusted = true
diff --git a/contrib/ltree/ltree_op.c b/contrib/ltree/ltree_op.c
index da1db5fc..4c464153 100644
--- a/contrib/ltree/ltree_op.c
+++ b/contrib/ltree/ltree_op.c
@@ -9,6 +9,7 @@
#include "access/htup_details.h"
#include "catalog/pg_statistic.h"
+#include "common/hashfn.h"
#include "ltree.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
@@ -37,6 +38,8 @@ PG_FUNCTION_INFO_V1(lca);
PG_FUNCTION_INFO_V1(ltree2text);
PG_FUNCTION_INFO_V1(text2ltree);
PG_FUNCTION_INFO_V1(ltreeparentsel);
+PG_FUNCTION_INFO_V1(hash_ltree);
+PG_FUNCTION_INFO_V1(hash_ltree_extended);
int
ltree_compare(const ltree *a, const ltree *b)
@@ -588,3 +591,68 @@ ltreeparentsel(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8((float8) selec);
}
+
+/*
+ * Hashes the elements of the path using the same logic as hash_array
+ */
+Datum
+hash_ltree(PG_FUNCTION_ARGS)
+{
+ ltree *a = PG_GETARG_LTREE_P(0);
+
+ uint32 result = 1;
+ int an = a->numlevel;
+ ltree_level *al = LTREE_FIRST(a);
+
+ while (an > 0)
+ {
+ uint32 levelHash = DatumGetUInt32(hash_any((unsigned char *) al->name, al->len));
+
+ result = (result << 5) - result + levelHash;
+
+ an--;
+ al = LEVEL_NEXT(al);
+ }
+
+ PG_FREE_IF_COPY(a, 0);
+ PG_RETURN_UINT32(result);
+}
+
+
+/*
+ * Hashes the elements of the path using the same logic as hash_array_extended
+ * (and hash_ltree). It differs from hash_array_extended only in how it handles
+ * zero length label paths. We return 1 + seed to ensure that the low 32 bits
+ * of the result match hash_ltree when the seed is 0, as required by the hash
+ * index support functions, but to also return a different value when there is
+ * a seed.
+ */
+Datum
+hash_ltree_extended(PG_FUNCTION_ARGS)
+{
+ ltree *a = PG_GETARG_LTREE_P(0);
+ const uint64 seed = PG_GETARG_INT64(1);
+
+ uint64 result = 1;
+ int an = a->numlevel;
+ ltree_level *al = LTREE_FIRST(a);
+
+ if (an == 0)
+ {
+ PG_FREE_IF_COPY(a, 0);
+ PG_RETURN_UINT64(result + seed);
+ }
+
+ while (an > 0)
+ {
+ uint64 levelHash = DatumGetUInt64(hash_any_extended((unsigned char *) al->name, al->len, seed));
+
+ result = (result << 5) - result + levelHash;
+
+ an--;
+ al = LEVEL_NEXT(al);
+ }
+
+ PG_FREE_IF_COPY(a, 0);
+ PG_RETURN_UINT64(result);
+}
diff --git a/contrib/ltree/ltreetest.sql b/contrib/ltree/ltreetest.sql
index d6996caf..388d5bb6 100644
--- a/contrib/ltree/ltreetest.sql
+++ b/contrib/ltree/ltreetest.sql
@@ -19,3 +19,4 @@ INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Galaxies');
INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Astronauts');
CREATE INDEX path_gist_idx ON test USING gist(path);
CREATE INDEX path_idx ON test USING btree(path);
+CREATE INDEX path_hash_idx ON test USING hash(path);
diff --git a/contrib/ltree/meson.build b/contrib/ltree/meson.build
index 5862943e..29cc6865 100644
--- a/contrib/ltree/meson.build
+++ b/contrib/ltree/meson.build
@@ -32,6 +32,7 @@ install_data(
'ltree--1.0--1.1.sql',
'ltree--1.1--1.2.sql',
'ltree--1.1.sql',
+ 'ltree--1.2--1.3.sql',
kwargs: contrib_data_args,
)
diff --git a/contrib/ltree/sql/ltree.sql b/contrib/ltree/sql/ltree.sql
index 402096f6..869893db 100644
--- a/contrib/ltree/sql/ltree.sql
+++ b/contrib/ltree/sql/ltree.sql
@@ -282,9 +282,22 @@ SELECT ('{3456,1.2.3.4}'::ltree[] ?<@ '1.2.5') is null;
SELECT '{ltree.asd, tree.awdfg}'::ltree[] ?@ 'tree & aWdfg@'::ltxtquery;
SELECT '{j.k.l.m, g.b.c.d.e}'::ltree[] ?~ 'A*@|g.b.c.d.e';
+-- Check that the hash_ltree() and hash_ltree_extended() function's lower
+-- 32 bits match when the seed is 0 and do not match when the seed != 0
+SELECT v as value, hash_ltree(v)::bit(32) as standard,
+ hash_ltree_extended(v, 0)::bit(32) as extended0,
+ hash_ltree_extended(v, 1)::bit(32) as extended1
+FROM (VALUES (NULL::ltree), (''::ltree), ('0'::ltree), ('0.1'::ltree),
+ ('0.1.2'::ltree), ('0'::ltree), ('0_asd.1_ASD'::ltree)) x(v)
+WHERE hash_ltree(v)::bit(32) != hash_ltree_extended(v, 0)::bit(32)
+ OR hash_ltree(v)::bit(32) = hash_ltree_extended(v, 1)::bit(32);
+
+
CREATE TABLE ltreetest (t ltree);
\copy ltreetest FROM 'data/ltree.data'
+SELECT count(*) from ltreetest;
+
SELECT * FROM ltreetest WHERE t < '12.3' order by t asc;
SELECT * FROM ltreetest WHERE t <= '12.3' order by t asc;
SELECT * FROM ltreetest WHERE t = '12.3' order by t asc;
@@ -328,6 +341,36 @@ SELECT * FROM ltreetest WHERE t ~ '23.*.1' order by t asc;
SELECT * FROM ltreetest WHERE t ~ '23.*.2' order by t asc;
SELECT * FROM ltreetest WHERE t ? '{23.*.1,23.*.2}' order by t asc;
+drop index tstidx;
+
+--- test hash index
+
+create index tstidx on ltreetest using hash (t);
+set enable_seqscan=off;
+set enable_bitmapscan=off;
+
+EXPLAIN (COSTS OFF) SELECT * FROM ltreetest WHERE t = '12.3' order by t asc;
+SELECT * FROM ltreetest WHERE t = '12.3' order by t asc;
+
+set enable_seqscan=on;
+set enable_bitmapscan=on;
+
+-- test hash aggregate
+
+set enable_hashagg=on;
+set enable_sort=off;
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM (
+SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t
+) t2;
+
+SELECT count(*) FROM (
+SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t
+) t2;
+
+set enable_sort=on;
+
drop index tstidx;
create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=0));
create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=2025));
diff --git a/doc/src/sgml/ltree.sgml b/doc/src/sgml/ltree.sgml
index 00a6ae70..9584105b 100644
--- a/doc/src/sgml/ltree.sgml
+++ b/doc/src/sgml/ltree.sgml
@@ -623,6 +623,13 @@ Europe & Russia*@ & !Transportation
<literal>>=</literal>, <literal>></literal>
</para>
</listitem>
+ <listitem>
+ <para>
+ Hash index over <type>ltree</type>:
+ <literal>=</literal>
+ </para>
+ </listitem>
+
<listitem>
<para>
GiST index over <type>ltree</type> (<literal>gist_ltree_ops</literal>
@@ -712,6 +719,7 @@ INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Galaxies');
INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Astronauts');
CREATE INDEX path_gist_idx ON test USING GIST (path);
CREATE INDEX path_idx ON test USING BTREE (path);
+CREATE INDEX path_hash_idx ON test USING HASH (path);
</programlisting>
<para>
--
2.34.1
jian he <jian.universality@gmail.com> writes:
I also made the following changes:
from
uint64 levelHash = hash_any_extended((unsigned char *) al->name, al->len, seed);
uint32 levelHash = hash_any((unsigned char *) al->name, al->len);
to
uint64 levelHash = DatumGetUInt64(hash_any_extended((unsigned char *)
al->name, al->len, seed));
uint32 levelHash = DatumGetUInt32(hash_any((unsigned char *) al->name,
al->len));
Yeah, that'd fail on 32-bit machines.
Pushed v5 with some minor cosmetic tweaking.
regards, tom lane