Change GUC hashtable to use simplehash?
I had briefly experimented changing the hash table in guc.c to use
simplehash. It didn't offer any measurable speedup, but the API is
slightly nicer.
I thought I'd post the patch in case others thought this was a good
direction or nice cleanup.
--
Jeff Davis
PostgreSQL Contributor Team - AWS
Attachments:
v2-0001-Convert-GUC-hashtable-to-use-simplehash.patchtext/x-patch; charset=UTF-8; name=v2-0001-Convert-GUC-hashtable-to-use-simplehash.patchDownload
From 54f082288f43b14bb0d0cca20c960c862db1f3d9 Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Wed, 2 Aug 2023 23:04:06 -0700
Subject: [PATCH v2] Convert GUC hashtable to use simplehash.
---
src/backend/utils/misc/guc.c | 141 ++++++++++++++---------------------
1 file changed, 56 insertions(+), 85 deletions(-)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 82d8efbc96..7295b0f00e 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -202,9 +202,10 @@ typedef struct
{
const char *gucname; /* hash key */
struct config_generic *gucvar; /* -> GUC's defining structure */
-} GUCHashEntry;
-static HTAB *guc_hashtab; /* entries are GUCHashEntrys */
+ /* needed by simplehash */
+ char status;
+} GUCHashEntry;
/*
* In addition to the hash table, variables having certain properties are
@@ -227,8 +228,7 @@ static int GUCNestLevel = 0; /* 1 when in main transaction */
static int guc_var_compare(const void *a, const void *b);
-static uint32 guc_name_hash(const void *key, Size keysize);
-static int guc_name_match(const void *key1, const void *key2, Size keysize);
+static uint32 guc_name_hash(const char *name);
static void InitializeGUCOptionsFromEnvironment(void);
static void InitializeOneGUCOption(struct config_generic *gconf);
static void RemoveGUCFromLists(struct config_generic *gconf);
@@ -265,6 +265,18 @@ static bool call_string_check_hook(struct config_string *conf, char **newval,
static bool call_enum_check_hook(struct config_enum *conf, int *newval,
void **extra, GucSource source, int elevel);
+#define SH_PREFIX GUCHash
+#define SH_ELEMENT_TYPE GUCHashEntry
+#define SH_KEY_TYPE const char *
+#define SH_KEY gucname
+#define SH_HASH_KEY(tb, key) guc_name_hash(key)
+#define SH_EQUAL(tb, a, b) (guc_name_compare(a, b) == 0)
+#define SH_SCOPE static inline
+#define SH_DECLARE
+#define SH_DEFINE
+#include "lib/simplehash.h"
+
+static GUCHash_hash *guc_hashtab = NULL; /* entries are GUCHashEntrys */
/*
* This function handles both actual config file (re)loads and execution of
@@ -282,7 +294,7 @@ ProcessConfigFileInternal(GucContext context, bool applySettings, int elevel)
ConfigVariable *item,
*head,
*tail;
- HASH_SEQ_STATUS status;
+ GUCHash_iterator iter;
GUCHashEntry *hentry;
/* Parse the main config file into a list of option names and values */
@@ -358,8 +370,8 @@ ProcessConfigFileInternal(GucContext context, bool applySettings, int elevel)
* need this so that we can tell below which ones have been removed from
* the file since we last processed it.
*/
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
{
struct config_generic *gconf = hentry->gucvar;
@@ -445,8 +457,8 @@ ProcessConfigFileInternal(GucContext context, bool applySettings, int elevel)
* boot-time defaults. If such a variable can't be changed after startup,
* report that and continue.
*/
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
{
struct config_generic *gconf = hentry->gucvar;
GucStack *stack;
@@ -867,17 +879,17 @@ struct config_generic **
get_guc_variables(int *num_vars)
{
struct config_generic **result;
- HASH_SEQ_STATUS status;
+ GUCHash_iterator iter;
GUCHashEntry *hentry;
int i;
- *num_vars = hash_get_num_entries(guc_hashtab);
+ *num_vars = guc_hashtab->members;
result = palloc(sizeof(struct config_generic *) * *num_vars);
/* Extract pointers from the hash table */
i = 0;
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
result[i++] = hentry->gucvar;
Assert(i == *num_vars);
@@ -899,7 +911,6 @@ build_guc_variables(void)
{
int size_vars;
int num_vars = 0;
- HASHCTL hash_ctl;
GUCHashEntry *hentry;
bool found;
int i;
@@ -961,24 +972,14 @@ build_guc_variables(void)
*/
size_vars = num_vars + num_vars / 4;
- hash_ctl.keysize = sizeof(char *);
- hash_ctl.entrysize = sizeof(GUCHashEntry);
- hash_ctl.hash = guc_name_hash;
- hash_ctl.match = guc_name_match;
- hash_ctl.hcxt = GUCMemoryContext;
- guc_hashtab = hash_create("GUC hash table",
- size_vars,
- &hash_ctl,
- HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
+ guc_hashtab = GUCHash_create(GUCMemoryContext, size_vars, NULL);
for (i = 0; ConfigureNamesBool[i].gen.name; i++)
{
struct config_generic *gucvar = &ConfigureNamesBool[i].gen;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &gucvar->name,
- HASH_ENTER,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
Assert(!found);
hentry->gucvar = gucvar;
}
@@ -987,10 +988,8 @@ build_guc_variables(void)
{
struct config_generic *gucvar = &ConfigureNamesInt[i].gen;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &gucvar->name,
- HASH_ENTER,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
Assert(!found);
hentry->gucvar = gucvar;
}
@@ -999,10 +998,8 @@ build_guc_variables(void)
{
struct config_generic *gucvar = &ConfigureNamesReal[i].gen;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &gucvar->name,
- HASH_ENTER,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
Assert(!found);
hentry->gucvar = gucvar;
}
@@ -1011,10 +1008,8 @@ build_guc_variables(void)
{
struct config_generic *gucvar = &ConfigureNamesString[i].gen;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &gucvar->name,
- HASH_ENTER,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
Assert(!found);
hentry->gucvar = gucvar;
}
@@ -1023,15 +1018,13 @@ build_guc_variables(void)
{
struct config_generic *gucvar = &ConfigureNamesEnum[i].gen;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &gucvar->name,
- HASH_ENTER,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
Assert(!found);
hentry->gucvar = gucvar;
}
- Assert(num_vars == hash_get_num_entries(guc_hashtab));
+ Assert(num_vars == guc_hashtab->members);
}
/*
@@ -1044,10 +1037,8 @@ add_guc_variable(struct config_generic *var, int elevel)
GUCHashEntry *hentry;
bool found;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &var->name,
- HASH_ENTER_NULL,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, var->name, &found);
+
if (unlikely(hentry == NULL))
{
ereport(elevel,
@@ -1236,10 +1227,8 @@ find_option(const char *name, bool create_placeholders, bool skip_errors,
Assert(name);
/* Look it up using the hash table. */
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &name,
- HASH_FIND,
- NULL);
+ hentry = GUCHash_lookup(guc_hashtab, name);
+
if (hentry)
return hentry->gucvar;
@@ -1322,10 +1311,9 @@ guc_name_compare(const char *namea, const char *nameb)
* Hash function that's compatible with guc_name_compare
*/
static uint32
-guc_name_hash(const void *key, Size keysize)
+guc_name_hash(const char *name)
{
uint32 result = 0;
- const char *name = *(const char *const *) key;
while (*name)
{
@@ -1342,19 +1330,6 @@ guc_name_hash(const void *key, Size keysize)
return result;
}
-/*
- * Dynahash match function to use in guc_hashtab
- */
-static int
-guc_name_match(const void *key1, const void *key2, Size keysize)
-{
- const char *name1 = *(const char *const *) key1;
- const char *name2 = *(const char *const *) key2;
-
- return guc_name_compare(name1, name2);
-}
-
-
/*
* Convert a GUC name to the form that should be used in pg_parameter_acl.
*
@@ -1524,7 +1499,7 @@ check_GUC_init(struct config_generic *gconf)
void
InitializeGUCOptions(void)
{
- HASH_SEQ_STATUS status;
+ GUCHash_iterator iter;
GUCHashEntry *hentry;
/*
@@ -1542,8 +1517,8 @@ InitializeGUCOptions(void)
* Load all variables with their compiled-in defaults, and initialize
* status fields as needed.
*/
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
{
/* Check mapping between initial and default value */
Assert(check_GUC_init(hentry->gucvar));
@@ -2528,7 +2503,7 @@ AtEOXact_GUC(bool isCommit, int nestLevel)
void
BeginReportingGUCOptions(void)
{
- HASH_SEQ_STATUS status;
+ GUCHash_iterator iter;
GUCHashEntry *hentry;
/*
@@ -2552,8 +2527,8 @@ BeginReportingGUCOptions(void)
PGC_INTERNAL, PGC_S_OVERRIDE);
/* Transmit initial values of interesting variables */
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
{
struct config_generic *conf = hentry->gucvar;
@@ -4809,10 +4784,8 @@ define_custom_variable(struct config_generic *variable)
/*
* See if there's a placeholder by the same name.
*/
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &name,
- HASH_FIND,
- NULL);
+ hentry = GUCHash_lookup(guc_hashtab, name);
+
if (hentry == NULL)
{
/*
@@ -5148,7 +5121,7 @@ void
MarkGUCPrefixReserved(const char *className)
{
int classLen = strlen(className);
- HASH_SEQ_STATUS status;
+ GUCHash_iterator iter;
GUCHashEntry *hentry;
MemoryContext oldcontext;
@@ -5158,8 +5131,8 @@ MarkGUCPrefixReserved(const char *className)
* don't bother trying to free associated memory, since this shouldn't
* happen often.)
*/
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
{
struct config_generic *var = hentry->gucvar;
@@ -5174,10 +5147,8 @@ MarkGUCPrefixReserved(const char *className)
errdetail("\"%s\" is now a reserved prefix.",
className)));
/* Remove it from the hash table */
- hash_search(guc_hashtab,
- &var->name,
- HASH_REMOVE,
- NULL);
+ GUCHash_delete(guc_hashtab, var->name);
+
/* Remove it from any lists it's in, too */
RemoveGUCFromLists(var);
}
@@ -5208,7 +5179,7 @@ get_explain_guc_options(int *num)
* While only a fraction of all the GUC variables are marked GUC_EXPLAIN,
* it doesn't seem worth dynamically resizing this array.
*/
- result = palloc(sizeof(struct config_generic *) * hash_get_num_entries(guc_hashtab));
+ result = palloc(sizeof(struct config_generic *) * guc_hashtab->members);
/* We need only consider GUCs with source not PGC_S_DEFAULT */
dlist_foreach(iter, &guc_nondef_list)
--
2.34.1
On Fri, Nov 17, 2023 at 11:02 AM Jeff Davis <pgsql@j-davis.com> wrote:
I had briefly experimented changing the hash table in guc.c to use
simplehash. It didn't offer any measurable speedup, but the API is
slightly nicer.I thought I'd post the patch in case others thought this was a good
direction or nice cleanup.
This is not a comment on the patch itself, but since GUC operations
are not typically considered performance or space sensitive, this
comment from simplehash.h makes a case against it.
* It's probably not worthwhile to generate such a specialized
implementation
* for hash tables that aren't performance or space sensitive.
But your argument of a nicer API might make a case for the patch. I
Best regards,
Gurjeet
http://Gurje.et
On Fri, 2023-11-17 at 13:22 -0800, Gurjeet Singh wrote:
This is not a comment on the patch itself, but since GUC operations
are not typically considered performance or space sensitive,
A "SET search_path" clause on a CREATE FUNCTION is a case for better
performance in guc.c, because it repeatedly sets and rolls back the
setting on each function invocation.
Unfortunately, this patch doesn't really improve the performance. The
reason the hash table in guc.c is slow is because of the case folding
in both hashing and comparison. I might get around to fixing that,
which could have a minor impact, and perhaps then the choice between
hsearch/simplehash would matter.
this
comment from simplehash.h makes a case against it.* It's probably not worthwhile to generate such a specialized
implementation
* for hash tables that aren't performance or space sensitive.But your argument of a nicer API might make a case for the patch.
Yeah, that's what I was thinking. simplehash is newer and has a nicer
API, so if we like it and want to move more code over, this is one
step. But if we are fine using both hsearch.h and simplehash.h for
overlapping use cases indefinitely, then I'll drop this.
Regards,
Jeff Davis
Jeff Davis <pgsql@j-davis.com> writes:
On Fri, 2023-11-17 at 13:22 -0800, Gurjeet Singh wrote:
But your argument of a nicer API might make a case for the patch.
Yeah, that's what I was thinking. simplehash is newer and has a nicer
API, so if we like it and want to move more code over, this is one
step. But if we are fine using both hsearch.h and simplehash.h for
overlapping use cases indefinitely, then I'll drop this.
I can't imagine wanting to convert *every* hashtable in the system
to simplehash; the added code bloat would be unreasonable. So yeah,
I think we'll have two mechanisms indefinitely. That's not to say
that we might not rewrite hsearch. But simplehash was never meant
to be a universal solution.
regards, tom lane
Hi,
On 2023-11-17 13:44:21 -0800, Jeff Davis wrote:
On Fri, 2023-11-17 at 13:22 -0800, Gurjeet Singh wrote:
This is not a comment on the patch itself, but since GUC operations
are not typically considered performance or space sensitive,
I don't think that's quite right - we have a lot of GUCs and they're loaded in
each connection. And there's set/reset around transactions etc. So even
without search path stuff that Jeff mentioned, it could be worth optimizing
this.
Yeah, that's what I was thinking. simplehash is newer and has a nicer
API, so if we like it and want to move more code over, this is one
step. But if we are fine using both hsearch.h and simplehash.h for
overlapping use cases indefinitely, then I'll drop this.
Right now there are use cases where simplehash isn't really usable (if stable
pointers to hash elements are needed and/or the entries are very large). I've
been wondering about providing a layer ontop of simplehash, or an option to
simplehash, providing that though. That then could perhaps also implement
runtime defined key sizes.
I think this would be a completely fair thing to port over - whether it's
worth it I don't quite know, but I'd not be against it on principle or such.
Greetings,
Andres Freund
On Fri, 2023-11-17 at 17:04 -0500, Tom Lane wrote:
I can't imagine wanting to convert *every* hashtable in the system
to simplehash; the added code bloat would be unreasonable. So yeah,
I think we'll have two mechanisms indefinitely. That's not to say
that we might not rewrite hsearch. But simplehash was never meant
to be a universal solution.
OK, I will withdraw the patch until/unless it provides a concrete
benefit.
Regards,
Jeff Davis
Hi,
On 2023-11-17 17:04:04 -0500, Tom Lane wrote:
Jeff Davis <pgsql@j-davis.com> writes:
On Fri, 2023-11-17 at 13:22 -0800, Gurjeet Singh wrote:
But your argument of a nicer API might make a case for the patch.
Yeah, that's what I was thinking. simplehash is newer and has a nicer
API, so if we like it and want to move more code over, this is one
step. But if we are fine using both hsearch.h and simplehash.h for
overlapping use cases indefinitely, then I'll drop this.I can't imagine wanting to convert *every* hashtable in the system
to simplehash; the added code bloat would be unreasonable.
Yea. And it's also just not suitable for everything. Stable pointers can be
very useful and some places have entries that are too large to be moved during
collisions. Chained hashtables have their place.
So yeah, I think we'll have two mechanisms indefinitely. That's not to say
that we might not rewrite hsearch.
We probably should. It's awkward to use, the code is very hard to follow, and
it's really not very fast. Part of that is due to serving too many masters.
I doubt it's good idea to use the same code for highly contended, partitioned,
shared memory hashtables and many tiny local memory hashtables. The design
goals are just very different.
Greetings,
Andres Freund
On Fri, 2023-11-17 at 14:08 -0800, Andres Freund wrote:
I think this would be a completely fair thing to port over - whether
it's
worth it I don't quite know, but I'd not be against it on principle
or such.
Right now I don't think it offers much. I'll see if I can solve the
case-folding slowness first, and then maybe it will be measurable.
Regards,
Jeff Davis
Hi,
On 2023-11-17 14:08:56 -0800, Jeff Davis wrote:
On Fri, 2023-11-17 at 17:04 -0500, Tom Lane wrote:
I can't imagine wanting to convert *every* hashtable in the system
to simplehash; the added code bloat would be unreasonable. So yeah,
I think we'll have two mechanisms indefinitely. That's not to say
that we might not rewrite hsearch. But simplehash was never meant
to be a universal solution.OK, I will withdraw the patch until/unless it provides a concrete
benefit.
It might already in the space domain:
SELECT count(*), sum(total_bytes) total_bytes, sum(total_nblocks) total_nblocks, sum(free_bytes) free_bytes, sum(free_chunks) free_chunks, sum(used_bytes) used_bytes
FROM pg_backend_memory_contexts
WHERE name LIKE 'GUC%';
HEAD:
┌───────┬─────────────┬───────────────┬────────────┬─────────────┬────────────┐
│ count │ total_bytes │ total_nblocks │ free_bytes │ free_chunks │ used_bytes │
├───────┼─────────────┼───────────────┼────────────┼─────────────┼────────────┤
│ 2 │ 57344 │ 5 │ 25032 │ 10 │ 32312 │
└───────┴─────────────┴───────────────┴────────────┴─────────────┴────────────┘
your patch:
┌───────┬─────────────┬───────────────┬────────────┬─────────────┬────────────┐
│ count │ total_bytes │ total_nblocks │ free_bytes │ free_chunks │ used_bytes │
├───────┼─────────────┼───────────────┼────────────┼─────────────┼────────────┤
│ 1 │ 36928 │ 3 │ 12360 │ 3 │ 24568 │
└───────┴─────────────┴───────────────┴────────────┴─────────────┴────────────┘
However, it fares less well at larger number of GUCs, performance wise. At
first I thought that that's largely because you aren't using SH_STORE_HASH.
With that, it's slower when creating a large number of GUCs, but a good bit
faster retrieving them. But that slowness didn't seem right.
Then I noticed that memory usage was too large when creating many GUCs - a bit
of debugging later, I figured out that that's due to guc_name_hash() being
terrifyingly bad. There's no bit mixing whatsoever! Which leads to very large
numbers of hash conflicts - which simplehash tries to defend against a bit by
making the table larger.
(gdb) p guc_name_hash("andres.c2")
$14 = 3798554171
(gdb) p guc_name_hash("andres.c3")
$15 = 3798554170
Fixing that makes simplehash always faster, but still doesn't win on memory
usage at the upper end - the two pointers in GUCHashEntry make it too big.
I think, independent of this patch, it might be worth requiring that hash
table lookups applied the transformation before the lookup. A comparison
function this expensive is not great...
Greetings,
Andres Freund
On Fri, 2023-11-17 at 15:27 -0800, Andres Freund wrote:
At
first I thought that that's largely because you aren't using
SH_STORE_HASH.
I might want to use that in the search_path cache, then. The lookup
wasn't showing up much in the profile the last I checked, but I'll take
a second look.
Then I noticed that memory usage was too large when creating many
GUCs - a bit
of debugging later, I figured out that that's due to guc_name_hash()
being
terrifyingly bad. There's no bit mixing whatsoever!
Wow.
It seems like hash_combine() could be more widely used in other places,
too? Here it seems like a worse problem because strings really need
mixing, and maybe ExecHashGetHashValue doesn't. But it seems easier to
use hash_combine() everywhere so that we don't have to think about
strange cases.
I think, independent of this patch, it might be worth requiring that
hash
table lookups applied the transformation before the lookup. A
comparison
function this expensive is not great...
The requested name is already case-folded in most contexts. We can do a
lookup first, and if that fails, case-fold and try again. I'll hack up
a patch -- I believe that would be measurable for the proconfigs.
Regards,
Jeff Davis
Hi,
On 2023-11-17 16:01:31 -0800, Jeff Davis wrote:
On Fri, 2023-11-17 at 15:27 -0800, Andres Freund wrote:
At
first I thought that that's largely because you aren't using
SH_STORE_HASH.I might want to use that in the search_path cache, then. The lookup
wasn't showing up much in the profile the last I checked, but I'll take
a second look.
It also matters for insertions, fwiw.
Then I noticed that memory usage was too large when creating many
GUCs - a bit
of debugging later, I figured out that that's due to guc_name_hash()
being
terrifyingly bad. There's no bit mixing whatsoever!Wow.
It seems like hash_combine() could be more widely used in other places,
too?
I don't think hash_combine() alone helps that much - you need to actually use
a hash function for the values you are combining. Using a character value
alone as a 32bit hash value unsurprisingly leads to very distribution of bits
set in hashvalues.
Here it seems like a worse problem because strings really need
mixing, and maybe ExecHashGetHashValue doesn't. But it seems easier to
use hash_combine() everywhere so that we don't have to think about
strange cases.
Yea.
I think, independent of this patch, it might be worth requiring that
hash
table lookups applied the transformation before the lookup. A
comparison
function this expensive is not great...The requested name is already case-folded in most contexts. We can do a
lookup first, and if that fails, case-fold and try again. I'll hack up
a patch -- I believe that would be measurable for the proconfigs.
I'd just always case fold before lookups. The expensive bit of the case
folding imo is that you need to do awkward things during hash lookups.
Greetings,
Andres Freund
Hi,
On Fri, 2023-11-17 at 16:10 -0800, Andres Freund wrote:
The requested name is already case-folded in most contexts. We can
do a
lookup first, and if that fails, case-fold and try again. I'll hack
up
a patch -- I believe that would be measurable for the proconfigs.I'd just always case fold before lookups. The expensive bit of the
case
folding imo is that you need to do awkward things during hash
lookups.
Attached are a bunch of tiny patches and some perf numbers based on
simple test described here:
/messages/by-id/04c8592dbd694e4114a3ed87139a7a04e4363030.camel@j-davis.com
0001: Use simplehash (without SH_STORE_HASH)
0002: fold before lookups
0003: have gen->name_key alias gen->name in typical case. Saves
allocations in typical case where the name is already folded.
0004: second-chance lookup in hash table (avoids case-folding for
already-folded names)
0005: Use SH_STORE_HASH
(These are split out into tiny patches for perf measurement, some are
pretty obvious but I wanted to see the impact, if any.)
Numbers below are cumulative (i.e. 0003 includes 0002 and 0001):
master: 7899ms
0001: 7850
0002: 7958
0003: 7942
0004: 7549
0005: 7411
I'm inclined toward all of these patches. I'll also look at adding
SH_STORE_HASH for the search_path cache.
Looks like we're on track to bring the overhead of SET search_path down
to reasonable levels. Thank you!
Regards,
Jeff Davis
Attachments:
v3-0005-Use-SH_STORE_HASH-for-GUC-hash-table.patchtext/x-patch; charset=UTF-8; name=v3-0005-Use-SH_STORE_HASH-for-GUC-hash-table.patchDownload
From 712c42f106d6a362ad201b56881c707af4412dc7 Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Sun, 19 Nov 2023 14:24:31 -0800
Subject: [PATCH v3 5/5] Use SH_STORE_HASH for GUC hash table.
---
src/backend/utils/misc/guc.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index bcadbface6..1102ea9954 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -205,6 +205,7 @@ typedef struct
struct config_generic *gucvar; /* -> GUC's defining structure */
/* needed by simplehash */
+ uint32 hash;
char status;
} GUCHashEntry;
@@ -276,6 +277,8 @@ static char * guc_name_key(int elevel, const char *name);
#define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0)
#define SH_SCOPE static inline
#define SH_DECLARE
+#define SH_GET_HASH(tb, a) a->hash
+#define SH_STORE_HASH
#define SH_DEFINE
#include "lib/simplehash.h"
--
2.34.1
v3-0004-GUC-optimize-for-already-case-folded-names.patchtext/x-patch; charset=UTF-8; name=v3-0004-GUC-optimize-for-already-case-folded-names.patchDownload
From 5ba89f4bdfc6b1056d6e7b78a5577c711cebd0af Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Sun, 19 Nov 2023 14:00:23 -0800
Subject: [PATCH v3 4/5] GUC: optimize for already case-folded names.
---
src/backend/utils/misc/guc.c | 23 +++++++++++++++--------
1 file changed, 15 insertions(+), 8 deletions(-)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 5e3b95e3f2..bcadbface6 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1310,17 +1310,24 @@ find_option(const char *name, bool create_placeholders, bool skip_errors,
{
GUCHashEntry *hentry;
int i;
- char *name_key;
Assert(name);
- /* Look it up using the hash table. */
- name_key = guc_name_key(elevel, name);
- if (name_key == NULL)
- return NULL;
- hentry = GUCHash_lookup(guc_hashtab, name_key);
- guc_free(name_key);
- name_key = NULL;
+ /*
+ * Look it up using the hash table without case-folding first, as an
+ * optimization.
+ */
+ hentry = GUCHash_lookup(guc_hashtab, name);
+
+ /* Try again with case folding. */
+ if (!hentry)
+ {
+ char *name_key = guc_name_key(elevel, name);
+ if (name_key == NULL)
+ return NULL;
+ hentry = GUCHash_lookup(guc_hashtab, name_key);
+ guc_free(name_key);
+ }
if (hentry)
return hentry->gucvar;
--
2.34.1
v3-0003-Avoid-duplicating-GUC-name-when-it-s-already-case.patchtext/x-patch; charset=UTF-8; name=v3-0003-Avoid-duplicating-GUC-name-when-it-s-already-case.patchDownload
From 485dd56a0a2a50f725fcc9af2d370bfa4a197c8b Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Sun, 19 Nov 2023 13:41:49 -0800
Subject: [PATCH v3 3/5] Avoid duplicating GUC name when it's already
case-folded.
---
src/backend/utils/misc/guc.c | 70 ++++++++++++++++++++++++++++------
src/include/utils/guc_tables.h | 3 +-
2 files changed, 60 insertions(+), 13 deletions(-)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index b51d10dbc0..5e3b95e3f2 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -265,6 +265,7 @@ static bool call_string_check_hook(struct config_string *conf, char **newval,
void **extra, GucSource source, int elevel);
static bool call_enum_check_hook(struct config_enum *conf, int *newval,
void **extra, GucSource source, int elevel);
+static bool guc_is_name_key(const char *name);
static char * guc_name_key(int elevel, const char *name);
#define SH_PREFIX GUCHash
@@ -903,6 +904,22 @@ get_guc_variables(int *num_vars)
}
+/*
+ * Check if name is already case-folded.
+ */
+static bool
+guc_is_name_key(const char *name)
+{
+ for(const char *p = name; *p; p++)
+ {
+ if (*p >= 'A' && *p <= 'Z')
+ return false;
+ }
+
+ return true;
+}
+
+
/*
* Convert to key by case folding.
*/
@@ -953,7 +970,11 @@ build_guc_variables(void)
/* Rather than requiring vartype to be filled in by hand, do this: */
conf->gen.vartype = PGC_BOOL;
- conf->gen.name_key = guc_name_key(ERROR, conf->gen.name);
+ if (guc_is_name_key(conf->gen.name))
+ conf->gen.name_key = conf->gen.name;
+ else
+ conf->gen.name_key = guc_name_key(ERROR, conf->gen.name);
+
num_vars++;
}
@@ -963,7 +984,11 @@ build_guc_variables(void)
conf->gen.vartype = PGC_INT;
- conf->gen.name_key = guc_name_key(ERROR, conf->gen.name);
+ if (guc_is_name_key(conf->gen.name))
+ conf->gen.name_key = conf->gen.name;
+ else
+ conf->gen.name_key = guc_name_key(ERROR, conf->gen.name);
+
num_vars++;
}
@@ -973,7 +998,11 @@ build_guc_variables(void)
conf->gen.vartype = PGC_REAL;
- conf->gen.name_key = guc_name_key(ERROR, conf->gen.name);
+ if (guc_is_name_key(conf->gen.name))
+ conf->gen.name_key = conf->gen.name;
+ else
+ conf->gen.name_key = guc_name_key(ERROR, conf->gen.name);
+
num_vars++;
}
@@ -983,7 +1012,11 @@ build_guc_variables(void)
conf->gen.vartype = PGC_STRING;
- conf->gen.name_key = guc_name_key(ERROR, conf->gen.name);
+ if (guc_is_name_key(conf->gen.name))
+ conf->gen.name_key = conf->gen.name;
+ else
+ conf->gen.name_key = guc_name_key(ERROR, conf->gen.name);
+
num_vars++;
}
@@ -993,7 +1026,11 @@ build_guc_variables(void)
conf->gen.vartype = PGC_ENUM;
- conf->gen.name_key = guc_name_key(ERROR, conf->gen.name);
+ if (guc_is_name_key(conf->gen.name))
+ conf->gen.name_key = conf->gen.name;
+ else
+ conf->gen.name_key = guc_name_key(ERROR, conf->gen.name);
+
num_vars++;
}
@@ -1212,13 +1249,18 @@ add_placeholder_variable(const char *constname, int elevel)
return NULL;
}
- name_key = guc_name_key(elevel, name);
- if (name_key == NULL)
+ if (!guc_is_name_key(name))
{
- guc_free(name);
- guc_free(var);
- return NULL;
+ name_key = guc_name_key(elevel, name);
+ if (name_key == NULL)
+ {
+ guc_free(name);
+ guc_free(var);
+ return NULL;
+ }
}
+ else
+ name_key = name;
gen->name = name;
gen->context = PGC_USERSET;
@@ -1237,7 +1279,8 @@ add_placeholder_variable(const char *constname, int elevel)
if (!add_guc_variable((struct config_generic *) var, elevel))
{
- guc_free(name_key);
+ if (name_key != name)
+ guc_free(name_key);
guc_free(name);
guc_free(var);
return NULL;
@@ -4796,7 +4839,10 @@ init_custom_variable(const char *name,
memset(gen, 0, sz);
gen->name = guc_strdup(ERROR, name);
- gen->name_key = guc_name_key(ERROR, name);
+ if (guc_is_name_key(name))
+ gen->name_key = name;
+ else
+ gen->name_key = guc_name_key(ERROR, name);
gen->context = context;
gen->group = CUSTOM_OPTIONS;
gen->short_desc = short_desc;
diff --git a/src/include/utils/guc_tables.h b/src/include/utils/guc_tables.h
index 41b0316c0b..26bf7e0978 100644
--- a/src/include/utils/guc_tables.h
+++ b/src/include/utils/guc_tables.h
@@ -160,7 +160,8 @@ struct config_generic
int flags; /* flag bits, see guc.h */
/* variable fields, initialized at runtime: */
enum config_type vartype; /* type of variable (set only at startup) */
- const char *name_key; /* name folded to lower case */
+ const char *name_key; /* name folded to lower case; alias of name if
+ * equal */
int status; /* status bits, see below */
GucSource source; /* source of the current actual value */
GucSource reset_source; /* source of the reset_value */
--
2.34.1
v3-0002-Case-fold-earlier.patchtext/x-patch; charset=UTF-8; name=v3-0002-Case-fold-earlier.patchDownload
From e896e326688bef27f1f612ec64c5a0ce8fc8d6b3 Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Thu, 19 Oct 2023 23:42:58 -0700
Subject: [PATCH v3 2/5] Case fold earlier.
---
src/backend/utils/misc/guc.c | 102 +++++++++++++++++++++++----------
src/include/utils/guc_tables.h | 1 +
2 files changed, 72 insertions(+), 31 deletions(-)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 7295b0f00e..b51d10dbc0 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -33,6 +33,7 @@
#include "catalog/objectaccess.h"
#include "catalog/pg_authid.h"
#include "catalog/pg_parameter_acl.h"
+#include "common/hashfn.h"
#include "guc_internal.h"
#include "libpq/pqformat.h"
#include "parser/scansup.h"
@@ -200,7 +201,7 @@ static MemoryContext GUCMemoryContext;
*/
typedef struct
{
- const char *gucname; /* hash key */
+ const char *guc_hashkey; /* case-folded GUC name */
struct config_generic *gucvar; /* -> GUC's defining structure */
/* needed by simplehash */
@@ -264,13 +265,14 @@ static bool call_string_check_hook(struct config_string *conf, char **newval,
void **extra, GucSource source, int elevel);
static bool call_enum_check_hook(struct config_enum *conf, int *newval,
void **extra, GucSource source, int elevel);
+static char * guc_name_key(int elevel, const char *name);
#define SH_PREFIX GUCHash
#define SH_ELEMENT_TYPE GUCHashEntry
#define SH_KEY_TYPE const char *
-#define SH_KEY gucname
+#define SH_KEY guc_hashkey
#define SH_HASH_KEY(tb, key) guc_name_hash(key)
-#define SH_EQUAL(tb, a, b) (guc_name_compare(a, b) == 0)
+#define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0)
#define SH_SCOPE static inline
#define SH_DECLARE
#define SH_DEFINE
@@ -901,6 +903,24 @@ get_guc_variables(int *num_vars)
}
+/*
+ * Convert to key by case folding.
+ */
+static char *
+guc_name_key(int elevel, const char *name)
+{
+ char *newstr = guc_strdup(elevel, name);
+
+ for(char *p = newstr; *p; p++)
+ {
+ if (*p >= 'A' && *p <= 'Z')
+ *p += 'a' - 'A';
+ }
+
+ return newstr;
+}
+
+
/*
* Build the GUC hash table. This is split out so that help_config.c can
* extract all the variables without running all of InitializeGUCOptions.
@@ -932,6 +952,8 @@ build_guc_variables(void)
/* Rather than requiring vartype to be filled in by hand, do this: */
conf->gen.vartype = PGC_BOOL;
+
+ conf->gen.name_key = guc_name_key(ERROR, conf->gen.name);
num_vars++;
}
@@ -940,6 +962,8 @@ build_guc_variables(void)
struct config_int *conf = &ConfigureNamesInt[i];
conf->gen.vartype = PGC_INT;
+
+ conf->gen.name_key = guc_name_key(ERROR, conf->gen.name);
num_vars++;
}
@@ -948,6 +972,8 @@ build_guc_variables(void)
struct config_real *conf = &ConfigureNamesReal[i];
conf->gen.vartype = PGC_REAL;
+
+ conf->gen.name_key = guc_name_key(ERROR, conf->gen.name);
num_vars++;
}
@@ -956,6 +982,8 @@ build_guc_variables(void)
struct config_string *conf = &ConfigureNamesString[i];
conf->gen.vartype = PGC_STRING;
+
+ conf->gen.name_key = guc_name_key(ERROR, conf->gen.name);
num_vars++;
}
@@ -964,6 +992,8 @@ build_guc_variables(void)
struct config_enum *conf = &ConfigureNamesEnum[i];
conf->gen.vartype = PGC_ENUM;
+
+ conf->gen.name_key = guc_name_key(ERROR, conf->gen.name);
num_vars++;
}
@@ -978,7 +1008,7 @@ build_guc_variables(void)
{
struct config_generic *gucvar = &ConfigureNamesBool[i].gen;
- hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name_key, &found);
Assert(!found);
hentry->gucvar = gucvar;
@@ -988,7 +1018,7 @@ build_guc_variables(void)
{
struct config_generic *gucvar = &ConfigureNamesInt[i].gen;
- hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name_key, &found);
Assert(!found);
hentry->gucvar = gucvar;
@@ -998,7 +1028,7 @@ build_guc_variables(void)
{
struct config_generic *gucvar = &ConfigureNamesReal[i].gen;
- hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name_key, &found);
Assert(!found);
hentry->gucvar = gucvar;
@@ -1008,7 +1038,7 @@ build_guc_variables(void)
{
struct config_generic *gucvar = &ConfigureNamesString[i].gen;
- hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name_key, &found);
Assert(!found);
hentry->gucvar = gucvar;
@@ -1018,7 +1048,7 @@ build_guc_variables(void)
{
struct config_generic *gucvar = &ConfigureNamesEnum[i].gen;
- hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name_key, &found);
Assert(!found);
hentry->gucvar = gucvar;
@@ -1037,7 +1067,8 @@ add_guc_variable(struct config_generic *var, int elevel)
GUCHashEntry *hentry;
bool found;
- hentry = GUCHash_insert(guc_hashtab, var->name, &found);
+ Assert(var->name_key);
+ hentry = GUCHash_insert(guc_hashtab, var->name_key, &found);
if (unlikely(hentry == NULL))
{
@@ -1160,11 +1191,13 @@ assignable_custom_variable_name(const char *name, bool skip_errors, int elevel)
* Create and add a placeholder variable for a custom variable name.
*/
static struct config_generic *
-add_placeholder_variable(const char *name, int elevel)
+add_placeholder_variable(const char *constname, int elevel)
{
size_t sz = sizeof(struct config_string) + sizeof(char *);
struct config_string *var;
struct config_generic *gen;
+ char *name;
+ char *name_key;
var = (struct config_string *) guc_malloc(elevel, sz);
if (var == NULL)
@@ -1172,18 +1205,28 @@ add_placeholder_variable(const char *name, int elevel)
memset(var, 0, sz);
gen = &var->gen;
- gen->name = guc_strdup(elevel, name);
- if (gen->name == NULL)
+ name = guc_strdup(elevel, constname);
+ if (name == NULL)
{
guc_free(var);
return NULL;
}
+ name_key = guc_name_key(elevel, name);
+ if (name_key == NULL)
+ {
+ guc_free(name);
+ guc_free(var);
+ return NULL;
+ }
+
+ gen->name = name;
gen->context = PGC_USERSET;
gen->group = CUSTOM_OPTIONS;
gen->short_desc = "GUC placeholder variable";
gen->flags = GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE | GUC_CUSTOM_PLACEHOLDER;
gen->vartype = PGC_STRING;
+ gen->name_key = name_key;
/*
* The char* is allocated at the end of the struct since we have no
@@ -1194,7 +1237,8 @@ add_placeholder_variable(const char *name, int elevel)
if (!add_guc_variable((struct config_generic *) var, elevel))
{
- guc_free(unconstify(char *, gen->name));
+ guc_free(name_key);
+ guc_free(name);
guc_free(var);
return NULL;
}
@@ -1223,11 +1267,17 @@ find_option(const char *name, bool create_placeholders, bool skip_errors,
{
GUCHashEntry *hentry;
int i;
+ char *name_key;
Assert(name);
/* Look it up using the hash table. */
- hentry = GUCHash_lookup(guc_hashtab, name);
+ name_key = guc_name_key(elevel, name);
+ if (name_key == NULL)
+ return NULL;
+ hentry = GUCHash_lookup(guc_hashtab, name_key);
+ guc_free(name_key);
+ name_key = NULL;
if (hentry)
return hentry->gucvar;
@@ -1313,21 +1363,10 @@ guc_name_compare(const char *namea, const char *nameb)
static uint32
guc_name_hash(const char *name)
{
- uint32 result = 0;
+ const unsigned char *bytes = (const unsigned char *)name;
+ int blen = strlen(name);
- while (*name)
- {
- char ch = *name++;
-
- /* Case-fold in the same way as guc_name_compare */
- if (ch >= 'A' && ch <= 'Z')
- ch += 'a' - 'A';
-
- /* Merge into hash ... not very bright, but it needn't be */
- result = pg_rotate_left32(result, 5);
- result ^= (uint32) ch;
- }
- return result;
+ return hash_bytes(bytes, blen);
}
/*
@@ -4757,6 +4796,7 @@ init_custom_variable(const char *name,
memset(gen, 0, sz);
gen->name = guc_strdup(ERROR, name);
+ gen->name_key = guc_name_key(ERROR, name);
gen->context = context;
gen->group = CUSTOM_OPTIONS;
gen->short_desc = short_desc;
@@ -4784,7 +4824,7 @@ define_custom_variable(struct config_generic *variable)
/*
* See if there's a placeholder by the same name.
*/
- hentry = GUCHash_lookup(guc_hashtab, name);
+ hentry = GUCHash_lookup(guc_hashtab, variable->name_key);
if (hentry == NULL)
{
@@ -4819,7 +4859,7 @@ define_custom_variable(struct config_generic *variable)
* Replace the placeholder in the hash table. We aren't changing the name
* (at least up to case-folding), so the hash value is unchanged.
*/
- hentry->gucname = name;
+ hentry->guc_hashkey = variable->name_key;
hentry->gucvar = variable;
/*
@@ -5147,7 +5187,7 @@ MarkGUCPrefixReserved(const char *className)
errdetail("\"%s\" is now a reserved prefix.",
className)));
/* Remove it from the hash table */
- GUCHash_delete(guc_hashtab, var->name);
+ GUCHash_delete(guc_hashtab, var->name_key);
/* Remove it from any lists it's in, too */
RemoveGUCFromLists(var);
diff --git a/src/include/utils/guc_tables.h b/src/include/utils/guc_tables.h
index 0c38255961..41b0316c0b 100644
--- a/src/include/utils/guc_tables.h
+++ b/src/include/utils/guc_tables.h
@@ -160,6 +160,7 @@ struct config_generic
int flags; /* flag bits, see guc.h */
/* variable fields, initialized at runtime: */
enum config_type vartype; /* type of variable (set only at startup) */
+ const char *name_key; /* name folded to lower case */
int status; /* status bits, see below */
GucSource source; /* source of the current actual value */
GucSource reset_source; /* source of the reset_value */
--
2.34.1
v3-0001-Convert-GUC-hashtable-to-use-simplehash.patchtext/x-patch; charset=UTF-8; name=v3-0001-Convert-GUC-hashtable-to-use-simplehash.patchDownload
From b1dc4bd1a732fb6572a0e4d5a966709ca30ddf79 Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Wed, 2 Aug 2023 23:04:06 -0700
Subject: [PATCH v3 1/5] Convert GUC hashtable to use simplehash.
---
src/backend/utils/misc/guc.c | 141 ++++++++++++++---------------------
1 file changed, 56 insertions(+), 85 deletions(-)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 82d8efbc96..7295b0f00e 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -202,9 +202,10 @@ typedef struct
{
const char *gucname; /* hash key */
struct config_generic *gucvar; /* -> GUC's defining structure */
-} GUCHashEntry;
-static HTAB *guc_hashtab; /* entries are GUCHashEntrys */
+ /* needed by simplehash */
+ char status;
+} GUCHashEntry;
/*
* In addition to the hash table, variables having certain properties are
@@ -227,8 +228,7 @@ static int GUCNestLevel = 0; /* 1 when in main transaction */
static int guc_var_compare(const void *a, const void *b);
-static uint32 guc_name_hash(const void *key, Size keysize);
-static int guc_name_match(const void *key1, const void *key2, Size keysize);
+static uint32 guc_name_hash(const char *name);
static void InitializeGUCOptionsFromEnvironment(void);
static void InitializeOneGUCOption(struct config_generic *gconf);
static void RemoveGUCFromLists(struct config_generic *gconf);
@@ -265,6 +265,18 @@ static bool call_string_check_hook(struct config_string *conf, char **newval,
static bool call_enum_check_hook(struct config_enum *conf, int *newval,
void **extra, GucSource source, int elevel);
+#define SH_PREFIX GUCHash
+#define SH_ELEMENT_TYPE GUCHashEntry
+#define SH_KEY_TYPE const char *
+#define SH_KEY gucname
+#define SH_HASH_KEY(tb, key) guc_name_hash(key)
+#define SH_EQUAL(tb, a, b) (guc_name_compare(a, b) == 0)
+#define SH_SCOPE static inline
+#define SH_DECLARE
+#define SH_DEFINE
+#include "lib/simplehash.h"
+
+static GUCHash_hash *guc_hashtab = NULL; /* entries are GUCHashEntrys */
/*
* This function handles both actual config file (re)loads and execution of
@@ -282,7 +294,7 @@ ProcessConfigFileInternal(GucContext context, bool applySettings, int elevel)
ConfigVariable *item,
*head,
*tail;
- HASH_SEQ_STATUS status;
+ GUCHash_iterator iter;
GUCHashEntry *hentry;
/* Parse the main config file into a list of option names and values */
@@ -358,8 +370,8 @@ ProcessConfigFileInternal(GucContext context, bool applySettings, int elevel)
* need this so that we can tell below which ones have been removed from
* the file since we last processed it.
*/
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
{
struct config_generic *gconf = hentry->gucvar;
@@ -445,8 +457,8 @@ ProcessConfigFileInternal(GucContext context, bool applySettings, int elevel)
* boot-time defaults. If such a variable can't be changed after startup,
* report that and continue.
*/
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
{
struct config_generic *gconf = hentry->gucvar;
GucStack *stack;
@@ -867,17 +879,17 @@ struct config_generic **
get_guc_variables(int *num_vars)
{
struct config_generic **result;
- HASH_SEQ_STATUS status;
+ GUCHash_iterator iter;
GUCHashEntry *hentry;
int i;
- *num_vars = hash_get_num_entries(guc_hashtab);
+ *num_vars = guc_hashtab->members;
result = palloc(sizeof(struct config_generic *) * *num_vars);
/* Extract pointers from the hash table */
i = 0;
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
result[i++] = hentry->gucvar;
Assert(i == *num_vars);
@@ -899,7 +911,6 @@ build_guc_variables(void)
{
int size_vars;
int num_vars = 0;
- HASHCTL hash_ctl;
GUCHashEntry *hentry;
bool found;
int i;
@@ -961,24 +972,14 @@ build_guc_variables(void)
*/
size_vars = num_vars + num_vars / 4;
- hash_ctl.keysize = sizeof(char *);
- hash_ctl.entrysize = sizeof(GUCHashEntry);
- hash_ctl.hash = guc_name_hash;
- hash_ctl.match = guc_name_match;
- hash_ctl.hcxt = GUCMemoryContext;
- guc_hashtab = hash_create("GUC hash table",
- size_vars,
- &hash_ctl,
- HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
+ guc_hashtab = GUCHash_create(GUCMemoryContext, size_vars, NULL);
for (i = 0; ConfigureNamesBool[i].gen.name; i++)
{
struct config_generic *gucvar = &ConfigureNamesBool[i].gen;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &gucvar->name,
- HASH_ENTER,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
Assert(!found);
hentry->gucvar = gucvar;
}
@@ -987,10 +988,8 @@ build_guc_variables(void)
{
struct config_generic *gucvar = &ConfigureNamesInt[i].gen;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &gucvar->name,
- HASH_ENTER,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
Assert(!found);
hentry->gucvar = gucvar;
}
@@ -999,10 +998,8 @@ build_guc_variables(void)
{
struct config_generic *gucvar = &ConfigureNamesReal[i].gen;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &gucvar->name,
- HASH_ENTER,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
Assert(!found);
hentry->gucvar = gucvar;
}
@@ -1011,10 +1008,8 @@ build_guc_variables(void)
{
struct config_generic *gucvar = &ConfigureNamesString[i].gen;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &gucvar->name,
- HASH_ENTER,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
Assert(!found);
hentry->gucvar = gucvar;
}
@@ -1023,15 +1018,13 @@ build_guc_variables(void)
{
struct config_generic *gucvar = &ConfigureNamesEnum[i].gen;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &gucvar->name,
- HASH_ENTER,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
Assert(!found);
hentry->gucvar = gucvar;
}
- Assert(num_vars == hash_get_num_entries(guc_hashtab));
+ Assert(num_vars == guc_hashtab->members);
}
/*
@@ -1044,10 +1037,8 @@ add_guc_variable(struct config_generic *var, int elevel)
GUCHashEntry *hentry;
bool found;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &var->name,
- HASH_ENTER_NULL,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, var->name, &found);
+
if (unlikely(hentry == NULL))
{
ereport(elevel,
@@ -1236,10 +1227,8 @@ find_option(const char *name, bool create_placeholders, bool skip_errors,
Assert(name);
/* Look it up using the hash table. */
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &name,
- HASH_FIND,
- NULL);
+ hentry = GUCHash_lookup(guc_hashtab, name);
+
if (hentry)
return hentry->gucvar;
@@ -1322,10 +1311,9 @@ guc_name_compare(const char *namea, const char *nameb)
* Hash function that's compatible with guc_name_compare
*/
static uint32
-guc_name_hash(const void *key, Size keysize)
+guc_name_hash(const char *name)
{
uint32 result = 0;
- const char *name = *(const char *const *) key;
while (*name)
{
@@ -1342,19 +1330,6 @@ guc_name_hash(const void *key, Size keysize)
return result;
}
-/*
- * Dynahash match function to use in guc_hashtab
- */
-static int
-guc_name_match(const void *key1, const void *key2, Size keysize)
-{
- const char *name1 = *(const char *const *) key1;
- const char *name2 = *(const char *const *) key2;
-
- return guc_name_compare(name1, name2);
-}
-
-
/*
* Convert a GUC name to the form that should be used in pg_parameter_acl.
*
@@ -1524,7 +1499,7 @@ check_GUC_init(struct config_generic *gconf)
void
InitializeGUCOptions(void)
{
- HASH_SEQ_STATUS status;
+ GUCHash_iterator iter;
GUCHashEntry *hentry;
/*
@@ -1542,8 +1517,8 @@ InitializeGUCOptions(void)
* Load all variables with their compiled-in defaults, and initialize
* status fields as needed.
*/
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
{
/* Check mapping between initial and default value */
Assert(check_GUC_init(hentry->gucvar));
@@ -2528,7 +2503,7 @@ AtEOXact_GUC(bool isCommit, int nestLevel)
void
BeginReportingGUCOptions(void)
{
- HASH_SEQ_STATUS status;
+ GUCHash_iterator iter;
GUCHashEntry *hentry;
/*
@@ -2552,8 +2527,8 @@ BeginReportingGUCOptions(void)
PGC_INTERNAL, PGC_S_OVERRIDE);
/* Transmit initial values of interesting variables */
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
{
struct config_generic *conf = hentry->gucvar;
@@ -4809,10 +4784,8 @@ define_custom_variable(struct config_generic *variable)
/*
* See if there's a placeholder by the same name.
*/
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &name,
- HASH_FIND,
- NULL);
+ hentry = GUCHash_lookup(guc_hashtab, name);
+
if (hentry == NULL)
{
/*
@@ -5148,7 +5121,7 @@ void
MarkGUCPrefixReserved(const char *className)
{
int classLen = strlen(className);
- HASH_SEQ_STATUS status;
+ GUCHash_iterator iter;
GUCHashEntry *hentry;
MemoryContext oldcontext;
@@ -5158,8 +5131,8 @@ MarkGUCPrefixReserved(const char *className)
* don't bother trying to free associated memory, since this shouldn't
* happen often.)
*/
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
{
struct config_generic *var = hentry->gucvar;
@@ -5174,10 +5147,8 @@ MarkGUCPrefixReserved(const char *className)
errdetail("\"%s\" is now a reserved prefix.",
className)));
/* Remove it from the hash table */
- hash_search(guc_hashtab,
- &var->name,
- HASH_REMOVE,
- NULL);
+ GUCHash_delete(guc_hashtab, var->name);
+
/* Remove it from any lists it's in, too */
RemoveGUCFromLists(var);
}
@@ -5208,7 +5179,7 @@ get_explain_guc_options(int *num)
* While only a fraction of all the GUC variables are marked GUC_EXPLAIN,
* it doesn't seem worth dynamically resizing this array.
*/
- result = palloc(sizeof(struct config_generic *) * hash_get_num_entries(guc_hashtab));
+ result = palloc(sizeof(struct config_generic *) * guc_hashtab->members);
/* We need only consider GUCs with source not PGC_S_DEFAULT */
dlist_foreach(iter, &guc_nondef_list)
--
2.34.1
On Mon, Nov 20, 2023 at 5:54 AM Jeff Davis <pgsql@j-davis.com> wrote:
Attached are a bunch of tiny patches and some perf numbers based on
simple test described here:/messages/by-id/04c8592dbd694e4114a3ed87139a7a04e4363030.camel@j-davis.com
I tried taking I/O out, like this, thinking the times would be less variable:
cat bench.sql
select 1 from generate_series(1,500000) x(x), lateral (SELECT
inc_ab(x)) a offset 10000000;
(with turbo off)
pgbench -n -T 30 -f bench.sql -M prepared
master:
latency average = 643.625 ms
0001-0005:
latency average = 607.354 ms
...about 5.5% less time, similar to what Jeff found.
I get a noticeable regression in 0002, though, and I think I see why:
guc_name_hash(const char *name)
{
- uint32 result = 0;
+ const unsigned char *bytes = (const unsigned char *)name;
+ int blen = strlen(name);
The strlen call required for hashbytes() is not free. The lack of
mixing in the (probably inlined after 0001) previous hash function can
remedied directly, as in the attached:
0001-0002 only:
latency average = 670.059 ms
0001-0002, plus revert hashbytes, add finalizer:
latency average = 656.810 ms
-#define SH_EQUAL(tb, a, b) (guc_name_compare(a, b) == 0)
+#define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0)
Likewise, I suspect calling out to the C library is going to throw
away some of the gains that were won by not needing to downcase all
the time, but I haven't dug deeper.
Attachments:
0002-ADDENDUM-add-finalizer-to-guc-name-hash.patch.txttext/plain; charset=US-ASCII; name=0002-ADDENDUM-add-finalizer-to-guc-name-hash.patch.txtDownload
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index b51d10dbc0..124b8fbe85 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1363,10 +1363,17 @@ guc_name_compare(const char *namea, const char *nameb)
static uint32
guc_name_hash(const char *name)
{
- const unsigned char *bytes = (const unsigned char *)name;
- int blen = strlen(name);
+ uint32 result = 0;
- return hash_bytes(bytes, blen);
+ while (*name)
+ {
+ char ch = *name++;
+
+ /* Merge into hash ... not very bright, but it needn't be */
+ result = pg_rotate_left32(result, 5);
+ result ^= (uint32) ch;
+ }
+ return murmurhash32(result);
}
/*
On Tue, 2023-11-21 at 16:42 +0700, John Naylor wrote:
The strlen call required for hashbytes() is not free.
Should we have a hash_string() that's like hash_bytes() but checks for
the NUL terminator itself?
That wouldn't be inlinable, but it would save on the strlen() call. It
might benefit some other callers?
Regards,
Jeff Davis
On Wed, Nov 22, 2023 at 12:00 AM Jeff Davis <pgsql@j-davis.com> wrote:
On Tue, 2023-11-21 at 16:42 +0700, John Naylor wrote:
The strlen call required for hashbytes() is not free.
Should we have a hash_string() that's like hash_bytes() but checks for
the NUL terminator itself?That wouldn't be inlinable, but it would save on the strlen() call. It
might benefit some other callers?
We do have string_hash(), which...calls strlen. :-)
Thinking some more, I'm not quite comfortable with the number of
places in these patches that have to know about the pre-downcased
strings, or whether we need that in the first place. If lower case is
common enough to optimize for, it seems the equality function can just
check strict equality on the char and only on mismatch try downcasing
before returning false. Doing our own function would allow the
compiler to inline it, or at least keep it on the same page. Further,
the old hash function shouldn't need to branch to do the same
downcasing, since hashing is lossy anyway. In the keyword hashes, we
just do "*ch |= 0x20", which downcases letters and turns undercores to
DEL. I can take a stab at that later.
I wrote:
Thinking some more, I'm not quite comfortable with the number of
places in these patches that have to know about the pre-downcased
strings, or whether we need that in the first place. If lower case is
common enough to optimize for, it seems the equality function can just
check strict equality on the char and only on mismatch try downcasing
before returning false. Doing our own function would allow the
compiler to inline it, or at least keep it on the same page. Further,
the old hash function shouldn't need to branch to do the same
downcasing, since hashing is lossy anyway. In the keyword hashes, we
just do "*ch |= 0x20", which downcases letters and turns undercores to
DEL. I can take a stab at that later.
v4 is a quick POC for that. I haven't verified that it's correct for
the case of the probe and the entry don't match, but in case it
doesn't it should be easy to fix. I also didn't bother with
SH_STORE_HASH in my testing.
0001 adds the murmur32 finalizer -- we should do that regardless of
anything else in this thread.
0002 is just Jeff's 0001
0003 adds an equality function that downcases lazily, and teaches the
hash function about the 0x20 trick.
master:
latency average = 581.765 ms
v3 0001-0005:
latency average = 544.576 ms
v4 0001-0003:
latency average = 547.489 ms
This gives similar results with a tiny amount of code (excluding the
simplehash conversion). I didn't check if the compiler inlined these
functions, but we can hint it if necessary. We could use the new
equality function in all the call sites that currently test for
"guc_name_compare() == 0", in which case it might not end up inlined,
but that's probably okay.
We could also try to improve the hash function's collision behavior by
collecting the bytes on a uint64 and calling our new murmur64 before
returning the lower half, but that's speculative.
Attachments:
v4-0002-Convert-GUC-hashtable-to-use-simplehash.patchtext/x-patch; charset=US-ASCII; name=v4-0002-Convert-GUC-hashtable-to-use-simplehash.patchDownload
From 3db510405e03be41dfad34b82069b36586591f42 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Wed, 22 Nov 2023 17:35:18 +0700
Subject: [PATCH v4 2/3] Convert GUC hashtable to use simplehash
---
src/backend/utils/misc/guc.c | 141 ++++++++++++++---------------------
1 file changed, 56 insertions(+), 85 deletions(-)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index e3834d52ee..bf05b022c3 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -203,9 +203,10 @@ typedef struct
{
const char *gucname; /* hash key */
struct config_generic *gucvar; /* -> GUC's defining structure */
-} GUCHashEntry;
-static HTAB *guc_hashtab; /* entries are GUCHashEntrys */
+ /* needed by simplehash */
+ char status;
+} GUCHashEntry;
/*
* In addition to the hash table, variables having certain properties are
@@ -228,8 +229,7 @@ static int GUCNestLevel = 0; /* 1 when in main transaction */
static int guc_var_compare(const void *a, const void *b);
-static uint32 guc_name_hash(const void *key, Size keysize);
-static int guc_name_match(const void *key1, const void *key2, Size keysize);
+static uint32 guc_name_hash(const char *name);
static void InitializeGUCOptionsFromEnvironment(void);
static void InitializeOneGUCOption(struct config_generic *gconf);
static void RemoveGUCFromLists(struct config_generic *gconf);
@@ -266,6 +266,18 @@ static bool call_string_check_hook(struct config_string *conf, char **newval,
static bool call_enum_check_hook(struct config_enum *conf, int *newval,
void **extra, GucSource source, int elevel);
+#define SH_PREFIX GUCHash
+#define SH_ELEMENT_TYPE GUCHashEntry
+#define SH_KEY_TYPE const char *
+#define SH_KEY gucname
+#define SH_HASH_KEY(tb, key) guc_name_hash(key)
+#define SH_EQUAL(tb, a, b) (guc_name_compare(a, b) == 0)
+#define SH_SCOPE static inline
+#define SH_DECLARE
+#define SH_DEFINE
+#include "lib/simplehash.h"
+
+static GUCHash_hash *guc_hashtab = NULL; /* entries are GUCHashEntrys */
/*
* This function handles both actual config file (re)loads and execution of
@@ -283,7 +295,7 @@ ProcessConfigFileInternal(GucContext context, bool applySettings, int elevel)
ConfigVariable *item,
*head,
*tail;
- HASH_SEQ_STATUS status;
+ GUCHash_iterator iter;
GUCHashEntry *hentry;
/* Parse the main config file into a list of option names and values */
@@ -359,8 +371,8 @@ ProcessConfigFileInternal(GucContext context, bool applySettings, int elevel)
* need this so that we can tell below which ones have been removed from
* the file since we last processed it.
*/
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
{
struct config_generic *gconf = hentry->gucvar;
@@ -446,8 +458,8 @@ ProcessConfigFileInternal(GucContext context, bool applySettings, int elevel)
* boot-time defaults. If such a variable can't be changed after startup,
* report that and continue.
*/
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
{
struct config_generic *gconf = hentry->gucvar;
GucStack *stack;
@@ -868,17 +880,17 @@ struct config_generic **
get_guc_variables(int *num_vars)
{
struct config_generic **result;
- HASH_SEQ_STATUS status;
+ GUCHash_iterator iter;
GUCHashEntry *hentry;
int i;
- *num_vars = hash_get_num_entries(guc_hashtab);
+ *num_vars = guc_hashtab->members;
result = palloc(sizeof(struct config_generic *) * *num_vars);
/* Extract pointers from the hash table */
i = 0;
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
result[i++] = hentry->gucvar;
Assert(i == *num_vars);
@@ -900,7 +912,6 @@ build_guc_variables(void)
{
int size_vars;
int num_vars = 0;
- HASHCTL hash_ctl;
GUCHashEntry *hentry;
bool found;
int i;
@@ -962,24 +973,14 @@ build_guc_variables(void)
*/
size_vars = num_vars + num_vars / 4;
- hash_ctl.keysize = sizeof(char *);
- hash_ctl.entrysize = sizeof(GUCHashEntry);
- hash_ctl.hash = guc_name_hash;
- hash_ctl.match = guc_name_match;
- hash_ctl.hcxt = GUCMemoryContext;
- guc_hashtab = hash_create("GUC hash table",
- size_vars,
- &hash_ctl,
- HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
+ guc_hashtab = GUCHash_create(GUCMemoryContext, size_vars, NULL);
for (i = 0; ConfigureNamesBool[i].gen.name; i++)
{
struct config_generic *gucvar = &ConfigureNamesBool[i].gen;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &gucvar->name,
- HASH_ENTER,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
Assert(!found);
hentry->gucvar = gucvar;
}
@@ -988,10 +989,8 @@ build_guc_variables(void)
{
struct config_generic *gucvar = &ConfigureNamesInt[i].gen;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &gucvar->name,
- HASH_ENTER,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
Assert(!found);
hentry->gucvar = gucvar;
}
@@ -1000,10 +999,8 @@ build_guc_variables(void)
{
struct config_generic *gucvar = &ConfigureNamesReal[i].gen;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &gucvar->name,
- HASH_ENTER,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
Assert(!found);
hentry->gucvar = gucvar;
}
@@ -1012,10 +1009,8 @@ build_guc_variables(void)
{
struct config_generic *gucvar = &ConfigureNamesString[i].gen;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &gucvar->name,
- HASH_ENTER,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
Assert(!found);
hentry->gucvar = gucvar;
}
@@ -1024,15 +1019,13 @@ build_guc_variables(void)
{
struct config_generic *gucvar = &ConfigureNamesEnum[i].gen;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &gucvar->name,
- HASH_ENTER,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
Assert(!found);
hentry->gucvar = gucvar;
}
- Assert(num_vars == hash_get_num_entries(guc_hashtab));
+ Assert(num_vars == guc_hashtab->members);
}
/*
@@ -1045,10 +1038,8 @@ add_guc_variable(struct config_generic *var, int elevel)
GUCHashEntry *hentry;
bool found;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &var->name,
- HASH_ENTER_NULL,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, var->name, &found);
+
if (unlikely(hentry == NULL))
{
ereport(elevel,
@@ -1237,10 +1228,8 @@ find_option(const char *name, bool create_placeholders, bool skip_errors,
Assert(name);
/* Look it up using the hash table. */
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &name,
- HASH_FIND,
- NULL);
+ hentry = GUCHash_lookup(guc_hashtab, name);
+
if (hentry)
return hentry->gucvar;
@@ -1323,10 +1312,9 @@ guc_name_compare(const char *namea, const char *nameb)
* Hash function that's compatible with guc_name_compare
*/
static uint32
-guc_name_hash(const void *key, Size keysize)
+guc_name_hash(const char *name)
{
uint32 result = 0;
- const char *name = *(const char *const *) key;
while (*name)
{
@@ -1343,19 +1331,6 @@ guc_name_hash(const void *key, Size keysize)
return murmurhash32(result);
}
-/*
- * Dynahash match function to use in guc_hashtab
- */
-static int
-guc_name_match(const void *key1, const void *key2, Size keysize)
-{
- const char *name1 = *(const char *const *) key1;
- const char *name2 = *(const char *const *) key2;
-
- return guc_name_compare(name1, name2);
-}
-
-
/*
* Convert a GUC name to the form that should be used in pg_parameter_acl.
*
@@ -1525,7 +1500,7 @@ check_GUC_init(struct config_generic *gconf)
void
InitializeGUCOptions(void)
{
- HASH_SEQ_STATUS status;
+ GUCHash_iterator iter;
GUCHashEntry *hentry;
/*
@@ -1543,8 +1518,8 @@ InitializeGUCOptions(void)
* Load all variables with their compiled-in defaults, and initialize
* status fields as needed.
*/
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
{
/* Check mapping between initial and default value */
Assert(check_GUC_init(hentry->gucvar));
@@ -2529,7 +2504,7 @@ AtEOXact_GUC(bool isCommit, int nestLevel)
void
BeginReportingGUCOptions(void)
{
- HASH_SEQ_STATUS status;
+ GUCHash_iterator iter;
GUCHashEntry *hentry;
/*
@@ -2553,8 +2528,8 @@ BeginReportingGUCOptions(void)
PGC_INTERNAL, PGC_S_OVERRIDE);
/* Transmit initial values of interesting variables */
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
{
struct config_generic *conf = hentry->gucvar;
@@ -4810,10 +4785,8 @@ define_custom_variable(struct config_generic *variable)
/*
* See if there's a placeholder by the same name.
*/
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &name,
- HASH_FIND,
- NULL);
+ hentry = GUCHash_lookup(guc_hashtab, name);
+
if (hentry == NULL)
{
/*
@@ -5149,7 +5122,7 @@ void
MarkGUCPrefixReserved(const char *className)
{
int classLen = strlen(className);
- HASH_SEQ_STATUS status;
+ GUCHash_iterator iter;
GUCHashEntry *hentry;
MemoryContext oldcontext;
@@ -5159,8 +5132,8 @@ MarkGUCPrefixReserved(const char *className)
* don't bother trying to free associated memory, since this shouldn't
* happen often.)
*/
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
{
struct config_generic *var = hentry->gucvar;
@@ -5175,10 +5148,8 @@ MarkGUCPrefixReserved(const char *className)
errdetail("\"%s\" is now a reserved prefix.",
className)));
/* Remove it from the hash table */
- hash_search(guc_hashtab,
- &var->name,
- HASH_REMOVE,
- NULL);
+ GUCHash_delete(guc_hashtab, var->name);
+
/* Remove it from any lists it's in, too */
RemoveGUCFromLists(var);
}
@@ -5209,7 +5180,7 @@ get_explain_guc_options(int *num)
* While only a fraction of all the GUC variables are marked GUC_EXPLAIN,
* it doesn't seem worth dynamically resizing this array.
*/
- result = palloc(sizeof(struct config_generic *) * hash_get_num_entries(guc_hashtab));
+ result = palloc(sizeof(struct config_generic *) * guc_hashtab->members);
/* We need only consider GUCs with source not PGC_S_DEFAULT */
dlist_foreach(iter, &guc_nondef_list)
--
2.42.0
v4-0001-Add-finalizer-to-guc_name_hash.patchtext/x-patch; charset=US-ASCII; name=v4-0001-Add-finalizer-to-guc_name_hash.patchDownload
From 1a516bb341afb72680470897d75c1d23f75fb37e Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Wed, 22 Nov 2023 17:28:41 +0700
Subject: [PATCH v4 1/3] Add finalizer to guc_name_hash
---
src/backend/utils/misc/guc.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 82d8efbc96..e3834d52ee 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -33,6 +33,7 @@
#include "catalog/objectaccess.h"
#include "catalog/pg_authid.h"
#include "catalog/pg_parameter_acl.h"
+#include "common/hashfn.h"
#include "guc_internal.h"
#include "libpq/pqformat.h"
#include "parser/scansup.h"
@@ -1339,7 +1340,7 @@ guc_name_hash(const void *key, Size keysize)
result = pg_rotate_left32(result, 5);
result ^= (uint32) ch;
}
- return result;
+ return murmurhash32(result);
}
/*
--
2.42.0
v4-0003-Optimize-GUC-functions-for-simple-hash.patchtext/x-patch; charset=US-ASCII; name=v4-0003-Optimize-GUC-functions-for-simple-hash.patchDownload
From 01b053b473d897c71725a7bb09a3127fc78140dc Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Wed, 22 Nov 2023 18:45:48 +0700
Subject: [PATCH v4 3/3] Optimize GUC functions for simple hash
Only downcase a character when an equality check fails,
since we expect most names to be lower case. Also simplify
downcasing in the hash function.
---
src/backend/utils/misc/guc.c | 40 ++++++++++++++++++++++++++++++++----
1 file changed, 36 insertions(+), 4 deletions(-)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index bf05b022c3..7896deb63b 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -229,6 +229,7 @@ static int GUCNestLevel = 0; /* 1 when in main transaction */
static int guc_var_compare(const void *a, const void *b);
+static bool guc_name_eq(const char *namea, const char *nameb);
static uint32 guc_name_hash(const char *name);
static void InitializeGUCOptionsFromEnvironment(void);
static void InitializeOneGUCOption(struct config_generic *gconf);
@@ -271,7 +272,7 @@ static bool call_enum_check_hook(struct config_enum *conf, int *newval,
#define SH_KEY_TYPE const char *
#define SH_KEY gucname
#define SH_HASH_KEY(tb, key) guc_name_hash(key)
-#define SH_EQUAL(tb, a, b) (guc_name_compare(a, b) == 0)
+#define SH_EQUAL(tb, a, b) (guc_name_eq(a, b))
#define SH_SCOPE static inline
#define SH_DECLARE
#define SH_DEFINE
@@ -1308,6 +1309,38 @@ guc_name_compare(const char *namea, const char *nameb)
return 0;
}
+static bool
+guc_name_eq(const char *namea, const char *nameb)
+{
+ char cha;
+ char chb;
+
+ while (*namea && *nameb)
+ {
+ cha = *namea++;
+ chb = *nameb++;
+
+ if (cha != chb)
+ {
+ /* Casefold lazily since we expect lower case */
+ if (cha >= 'A' && cha <= 'Z')
+ cha += 'a' - 'A';
+ if (chb >= 'A' && chb <= 'Z')
+ chb += 'a' - 'A';
+
+ if (cha != chb)
+ return false;
+ }
+ }
+
+ if (*namea == *nameb)
+ return true;
+ else
+ return false;
+
+ //Assert(guc_name_compare(namea, nameb) == 0);
+}
+
/*
* Hash function that's compatible with guc_name_compare
*/
@@ -1320,9 +1353,8 @@ guc_name_hash(const char *name)
{
char ch = *name++;
- /* Case-fold in the same way as guc_name_compare */
- if (ch >= 'A' && ch <= 'Z')
- ch += 'a' - 'A';
+ /* quick and dirty casefolding suitable for hashing */
+ ch |= 0x20;
/* Merge into hash ... not very bright, but it needn't be */
result = pg_rotate_left32(result, 5);
--
2.42.0
Hi,
On 2023-11-21 16:42:55 +0700, John Naylor wrote:
I get a noticeable regression in 0002, though, and I think I see why:
guc_name_hash(const char *name) { - uint32 result = 0; + const unsigned char *bytes = (const unsigned char *)name; + int blen = strlen(name);The strlen call required for hashbytes() is not free. The lack of
mixing in the (probably inlined after 0001) previous hash function can
remedied directly, as in the attached:
I doubt this is a good hashfunction. For short strings, sure, but after
that... I don't think it makes sense to reduce the internal state of a hash
function to something this small.
Greetings,
Andres Freund
Andres Freund <andres@anarazel.de> writes:
On 2023-11-21 16:42:55 +0700, John Naylor wrote:
The strlen call required for hashbytes() is not free. The lack of
mixing in the (probably inlined after 0001) previous hash function can
remedied directly, as in the attached:
I doubt this is a good hashfunction. For short strings, sure, but after
that... I don't think it makes sense to reduce the internal state of a hash
function to something this small.
GUC names are just about always short, though, so I'm not sure you've
made your point? At worst, maybe this with 64-bit state instead of 32?
regards, tom lane
Hi,
On 2023-11-22 15:56:21 -0500, Tom Lane wrote:
Andres Freund <andres@anarazel.de> writes:
On 2023-11-21 16:42:55 +0700, John Naylor wrote:
The strlen call required for hashbytes() is not free. The lack of
mixing in the (probably inlined after 0001) previous hash function can
remedied directly, as in the attached:I doubt this is a good hashfunction. For short strings, sure, but after
that... I don't think it makes sense to reduce the internal state of a hash
function to something this small.GUC names are just about always short, though, so I'm not sure you've
made your point?
With short I meant <= 6 characters (32 / 5 = 6.x). After that you're
overwriting bits that you previously set, without dispersing the "overwritten"
bits throughout the hash state.
It's pretty easy to create conflicts this way, even just on paper. E.g. I
think abcdefgg and cbcdefgw would have the same hash, because the accumulated
value passed to murmurhash32 is the same.
The fact that this happens when a large part of the string is the same
is bad, because it makes it more likely that prefixed strings trigger such
conflicts, and they're obviously common with GUC strings.
Greetings,
Andres Freund
Andres Freund <andres@anarazel.de> writes:
On 2023-11-22 15:56:21 -0500, Tom Lane wrote:
GUC names are just about always short, though, so I'm not sure you've
made your point?
With short I meant <= 6 characters (32 / 5 = 6.x). After that you're
overwriting bits that you previously set, without dispersing the "overwritten"
bits throughout the hash state.
I'm less than convinced about the "overwrite" part:
+ /* Merge into hash ... not very bright, but it needn't be */
+ result = pg_rotate_left32(result, 5);
+ result ^= (uint32) ch;
Rotating a 32-bit value 5 bits at a time doesn't result in successive
characters lining up exactly, and even once they do, XOR is not
"overwrite". I'm pretty dubious that we need something better than this.
regards, tom lane
Hi,
On 2023-11-22 16:27:56 -0500, Tom Lane wrote:
Andres Freund <andres@anarazel.de> writes:
On 2023-11-22 15:56:21 -0500, Tom Lane wrote:
GUC names are just about always short, though, so I'm not sure you've
made your point?With short I meant <= 6 characters (32 / 5 = 6.x). After that you're
overwriting bits that you previously set, without dispersing the "overwritten"
bits throughout the hash state.I'm less than convinced about the "overwrite" part:
+ /* Merge into hash ... not very bright, but it needn't be */ + result = pg_rotate_left32(result, 5); + result ^= (uint32) ch;Rotating a 32-bit value 5 bits at a time doesn't result in successive
characters lining up exactly, and even once they do, XOR is not
"overwrite".
I didn't know what word to use, hence the air quotes. Yes, xor doesn't just
set the bits to the right hand side in, but it just affects data on a per-bit
basis, which easily can be cancelled out.
My understanding of writing hash functions is that every added bit mixed in
should have a ~50% chance of causing each other bit to flip. The proposed
function obviously doesn't get there.
It's worth noting that the limited range of the input values means that
there's a lot of bias toward some bits being set ('a' to 'z' all start with
0b011).
I'm pretty dubious that we need something better than this.
Well, we know that the current attempt at a dedicated hashfunctions for this
does result in substantial amounts of conflicts. And it's hard to understand
such cases when you hit them, so I think it's better to avoid exposing
ourselves to such dangers, without a distinct need.
And I don't really see the need here to risk it, even if we are somewhat
confident it's fine.
If, which I mildly doubt, we can't afford to call murmurhash32 for every
character, we could just call it for 32/5 input characters together. Or we
could just load up to 8 characters into an 64bit integer, can call
murmurhash64.
Something roughly like
uint64 result;
while (*name)
{
uint64 value = 0;
for (int i = 0; i < 8 && *name; i++)
{
char ch = *name++;
value |= *name;
value = value << 8;
}
result = hash_combine64(result, murmurhash64(value));
}
The hash_combine use isn't quite right either, we should use the full
accumulator state of a proper hash function, but it's seems very unlikely to
matter here.
The fact that string_hash() is slow due to the strlen(), which causes us to
process the input twice and which is optimized to also handle very long
strings which typically string_hash() doesn't encounter, seems problematic far
beyond this case. We use string_hash() in a *lot* of places, and that strlen()
does regularly show up in profiles. We should fix that.
The various hash functions being external functions also shows up in a bunch
of profiles too. It's particularly ridiculous for cases like tag_hash(),
where the caller typically knows the lenght, but calls a routine in a
different translation unit, which obviously can't be optimized for a specific
length.
I think we ought to adjust our APIs around this:
1) The accumulator state of the hash functions should be exposed, so one can
accumulate values into the hash state, without reducing the internal state
to a single 32/64 bit variable.
2) For callers that know the length of data, we should use a static inline
hash function, rather than an external function call. This should include
special cased inline functions for adding 32/64bit of data to the hash
state.
Perhaps with a bit of logic to *not* use the inline version if the hashed
input is long (and thus the call overhead doesn't matter). Something like
if (__builtin_constant_p(len) && len < 128)
/* call inline implementation */
else
/* call out of line implementation, not worth the code bloat */
We know that hash functions should have the split into init/process
data*/finish steps, as e.g. evidenced by pg_crc.h/pg_crc32.h.
With something like that, you could write a function that lowercases
characters inline without incurring unnecessary overhead.
hash32_state hs;
hash32_init(&hs);
while (*name)
{
char ch = *name++;
/* crappy lowercase for this situation */
ch |= 0x20;
hash32_process_byte(&hs, ch);
}
return hash32_finish(&hs);
Perhaps with some additional optimization for processing the input string in
32/64 bit quantities.
Greetings,
Andres Freund
On Thu, Nov 23, 2023 at 5:34 AM Andres Freund <andres@anarazel.de> wrote:
It's worth noting that the limited range of the input values means that
there's a lot of bias toward some bits being set ('a' to 'z' all start with
0b011).
We can take advantage of the limited range with a single additional
instruction: After "ch |= 0x20", do "ch -= ('a' - 1)". That'll shrink
letters and underscores to the range [1,31], which fits in 5 bits.
(Other characters are much less common in a guc name). That increases
randomness and allows 12 chars to be xor'd in before the first bits
rotate around.
If, which I mildly doubt, we can't afford to call murmurhash32 for every
character, we could just call it for 32/5 input characters together. Or we
could just load up to 8 characters into an 64bit integer, can call
murmurhash64.
I'll play around with this idea, as well.
The fact that string_hash() is slow due to the strlen(), which causes us to
process the input twice and which is optimized to also handle very long
strings which typically string_hash() doesn't encounter, seems problematic far
beyond this case. We use string_hash() in a *lot* of places, and that strlen()
does regularly show up in profiles. We should fix that.
+1
I think we ought to adjust our APIs around this:
1) The accumulator state of the hash functions should be exposed, so one can
accumulate values into the hash state, without reducing the internal state
to a single 32/64 bit variable.
If so, it might make sense to vendor a small, suitably licensed hash
function that already has these APIs.
While on the subject, it'd be good to have a clear separation between
in-memory and on-disk usage, so we can make breaking changes in the
former.
Attached is a rough start with Andres's earlier ideas, to get
something concrete out there.
I took a look around at other implementations a bit. Many modern hash
functions use MUM-style hashing, which typically uses 128-bit
arithmetic. Even if they already have an incremental interface and
have a compatible license, it seems a bit too much work to adopt just
for a couple string use cases. Might be useful elsewhere, though, but
that's off topic.
However, I did find a couple hash functions that are much simpler to
adapt to a bytewise interface, pass SMHasher, and are decently fast on
short inputs:
- fast-hash, MIT licensed, and apparently has some use in software [1]https://code.google.com/archive/p/fast-hash/
- MX3, CC0 license (looking around, seems controversial for a code
license, so didn't go further). [2]https://github.com/jonmaiga/mx3 Seems to be a for-fun project, but
the accompanying articles are very informative on how to develop these
things.
After wacking fast-hash around, it doesn't really resemble the
original much, and if for some reason we went as far as switching out
the mixing/final functions, it may as well be called completely
original work. I thought it best to start with something whose mixing
behavior passes SMHasher, and hopefully preserve that property.
Note that the combining and final steps share most of their arithmetic
operations. This may have been done on purpose to minimize binary
size, but I didn't check. Also, it incorporates input length into the
calculation. Since we don't know the length of C strings up front, I
threw that out for now. It'd be possible to track the length as we go
and incorporate something into the final step. The hard part is
verifying it hasn't lost any quality.
v5-0001 puts fash-hash as-is into a new header, named in a way to
convey in-memory use e.g. hash tables.
v5-0002 does the minimal to allow dynash to use this for string_hash,
inlined but still calling strlen.
v5-0003 shows one way to do a incremental interface. It might be okay
for simplehash with fixed length keys, but seems awkward for strings.
v5-0004 shows a bytewise incremental interface, with implementations
for dynahash (getting rid of strlen) and guc hash.
[1]: https://code.google.com/archive/p/fast-hash/
[2]: https://github.com/jonmaiga/mx3
Attachments:
v5-0004-Add-bytewise-interface-for-incrementing-the-hash-.patchtext/x-patch; charset=US-ASCII; name=v5-0004-Add-bytewise-interface-for-incrementing-the-hash-.patchDownload
From c2b799dd2418fb68fcfc6ccf006a50f74c9072fe Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Wed, 29 Nov 2023 16:28:58 +0700
Subject: [PATCH v5 4/4] Add bytewise interface for incrementing the hash state
This is the nicest interface for guc_name_hash, and seems
good for string_hash too. It's not clear if this is good for
the latter from a performance perspective.
---
src/backend/utils/hash/dynahash.c | 14 +++--------
src/backend/utils/misc/guc.c | 16 ++++++-------
src/include/common/hashfn_unstable.h | 35 ++++++++++++++++++++++++++++
3 files changed, 46 insertions(+), 19 deletions(-)
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c
index 1c08dc8942..ab2dbefd12 100644
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -328,19 +328,11 @@ string_hash(const void *key, Size keysize)
fasthash64_init(&hs, 0);
- while (*buf)
+ while (*buf && s_len < keysize)
{
- int chunk_len = 0;
+ s_len++;
- for (int i = 0;
- i < 8 && *buf++ && s_len < keysize;
- i++)
- {
- chunk_len++;
- s_len++;
- }
-
- fasthash64_accum(&hs, buf, chunk_len);
+ fasthash64_accum_byte(&hs, *buf);
}
return fasthash64_final32(&hs);
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 82d8efbc96..2428f2475c 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -33,6 +33,7 @@
#include "catalog/objectaccess.h"
#include "catalog/pg_authid.h"
#include "catalog/pg_parameter_acl.h"
+#include "common/hashfn_unstable.h"
#include "guc_internal.h"
#include "libpq/pqformat.h"
#include "parser/scansup.h"
@@ -1324,22 +1325,21 @@ guc_name_compare(const char *namea, const char *nameb)
static uint32
guc_name_hash(const void *key, Size keysize)
{
- uint32 result = 0;
const char *name = *(const char *const *) key;
+ fasthash64_state hs;
+
+ fasthash64_init(&hs, 0);
while (*name)
{
char ch = *name++;
- /* Case-fold in the same way as guc_name_compare */
- if (ch >= 'A' && ch <= 'Z')
- ch += 'a' - 'A';
+ /* quick and dirty case-folding suitable for hashing */
+ ch |= 0x20;
- /* Merge into hash ... not very bright, but it needn't be */
- result = pg_rotate_left32(result, 5);
- result ^= (uint32) ch;
+ fasthash64_accum_byte(&hs, ch);
}
- return result;
+ return fasthash64_final32(&hs);
}
/*
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index a95942f7af..1b7db5ac07 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -26,6 +26,7 @@
typedef struct fasthash64_state
{
uint64 accum;
+ int8 accum_len;
uint64 hash;
} fasthash64_state;
@@ -46,6 +47,31 @@ void fasthash64_init(fasthash64_state *hs, uint64_t seed)
// setting seed would go here
}
+static inline
+void fasthash64_accum_byte(fasthash64_state *hs, const unsigned char ch)
+{
+ const uint64_t m = 0x880355f21e6d1965ULL;
+
+ hs->accum |= ch;
+
+ // wip: is there a better way to get sizeof struct member?
+ if (hs->accum_len == sizeof(((fasthash64_state *) 0)->accum))
+ {
+ // combine into hash
+ hs->hash ^= mix(hs->accum);
+ hs->hash *= m;
+
+ // reset accum
+ hs->accum = 0;
+ hs->accum_len = 0;
+ }
+ else
+ {
+ hs->accum <<= sizeof(unsigned char);
+ hs->accum_len += sizeof(unsigned char);
+ }
+}
+
static inline
void fasthash64_accum(fasthash64_state *hs, const void *buf, int len)
{
@@ -94,6 +120,15 @@ void fasthash64_accum(fasthash64_state *hs, const void *buf, int len)
static inline
uint64_t fasthash64_final(fasthash64_state *hs)
{
+ const uint64_t m = 0x880355f21e6d1965ULL;
+
+ // check for remaining bytes to combine into hash
+ if (hs->accum_len > 0)
+ {
+ hs->hash ^= mix(hs->accum);
+ hs->hash *= m;
+ }
+
return mix(hs->hash);
}
--
2.42.0
v5-0003-Add-incremental-interface-to-fasthash-and-use-it-.patchtext/x-patch; charset=US-ASCII; name=v5-0003-Add-incremental-interface-to-fasthash-and-use-it-.patchDownload
From fcabd2c486b46c0d99ab7e17739ce664e1c5860f Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Tue, 28 Nov 2023 19:22:54 +0700
Subject: [PATCH v5 3/4] Add incremental interface to fasthash and use it in
string_hash
---
src/backend/utils/hash/dynahash.c | 25 ++++++++++++++---
src/include/common/hashfn_unstable.h | 40 +++++++++++++++++++++-------
2 files changed, 53 insertions(+), 12 deletions(-)
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c
index 6ca1442647..1c08dc8942 100644
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -316,15 +316,34 @@ string_compare(const char *key1, const char *key2, Size keysize)
static inline uint32
string_hash(const void *key, Size keysize)
{
+ fasthash64_state hs;
+ int s_len = 0;
+ const char *buf = (const char *) key;
+
/*
* If the string exceeds keysize-1 bytes, we want to hash only that many,
* because when it is copied into the hash table it will be truncated at
* that length.
*/
- size_t s_len = strlen((const char *) key);
- s_len = Min(s_len, keysize - 1);
- return fasthash32(key, s_len, 0);
+ fasthash64_init(&hs, 0);
+
+ while (*buf)
+ {
+ int chunk_len = 0;
+
+ for (int i = 0;
+ i < 8 && *buf++ && s_len < keysize;
+ i++)
+ {
+ chunk_len++;
+ s_len++;
+ }
+
+ fasthash64_accum(&hs, buf, chunk_len);
+ }
+
+ return fasthash64_final32(&hs);
}
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index 04a24934bc..a95942f7af 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -23,6 +23,11 @@
SOFTWARE.
*/
+typedef struct fasthash64_state
+{
+ uint64 accum;
+ uint64 hash;
+} fasthash64_state;
// Compression function for Merkle-Damgard construction.
// This function is generated using the framework provided.
@@ -33,19 +38,31 @@ static inline uint64_t mix(uint64_t h) {
return h;
}
-// security: if the system allows empty keys (len=3) the seed is exposed, the reverse of mix.
-// objsize: 0-1fd: 509
static inline
-uint64_t fasthash64(const void *buf, size_t len, uint64_t seed)
+void fasthash64_init(fasthash64_state *hs, uint64_t seed)
+{
+ memset(hs, 0, sizeof(fasthash64_state));
+
+ // setting seed would go here
+}
+
+static inline
+void fasthash64_accum(fasthash64_state *hs, const void *buf, int len)
{
const uint64_t m = 0x880355f21e6d1965ULL;
const uint64_t *pos = (const uint64_t *)buf;
const uint64_t *end = pos + (len / 8);
const unsigned char *pos2;
- uint64_t h = seed ^ (len * m);
- uint64_t v;
+
+ // since we don't know the length for a nul-terminated string
+ // handle some other way -- maybe we can accum the length in
+ // the state and fold it in during the finalizer (cf. xxHash3)
+ //uint64_t h = seed ^ (len * m);
+ uint64_t v = hs->accum;
+ uint64 h = hs->hash;
while (pos != end) {
+ // wip: use memcpy for alignment-picky platforms
v = *pos++;
h ^= mix(v);
h *= m;
@@ -71,17 +88,22 @@ uint64_t fasthash64(const void *buf, size_t len, uint64_t seed)
h ^= mix(v);
h *= m;
}
-
- return mix(h);
}
+
+static inline
+uint64_t fasthash64_final(fasthash64_state *hs)
+{
+ return mix(hs->hash);
+}
+
// objsize: 0-236: 566
static inline
-uint32_t fasthash32(const void *buf, size_t len, uint32_t seed)
+uint32_t fasthash64_final32(fasthash64_state *hs)
{
// the following trick converts the 64-bit hashcode to Fermat
// residue, which shall retain information from both the higher
// and lower parts of hashcode.
- uint64_t h = fasthash64(buf, len, seed);
+ uint64_t h = fasthash64_final(hs);
return h - (h >> 32);
}
--
2.42.0
v5-0001-Vendor-fasthash.patchtext/x-patch; charset=US-ASCII; name=v5-0001-Vendor-fasthash.patchDownload
From bcfa6ba22de8c85b3448de6aec14edeb27003bc8 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Mon, 27 Nov 2023 17:03:38 +0700
Subject: [PATCH v5 1/4] Vendor fasthash
MIT licensed
Using copy found at
https://github.com/rurban/smhasher/commit/375a0b20272ca928830c1f4c890d34b3919cbcb3
---
src/include/common/hashfn_unstable.h | 80 ++++++++++++++++++++++++++++
1 file changed, 80 insertions(+)
create mode 100644 src/include/common/hashfn_unstable.h
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
new file mode 100644
index 0000000000..76ed27c0a0
--- /dev/null
+++ b/src/include/common/hashfn_unstable.h
@@ -0,0 +1,80 @@
+/* The MIT License
+
+ Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
+
+ Permission is hereby granted, free of charge, to any person
+ obtaining a copy of this software and associated documentation
+ files (the "Software"), to deal in the Software without
+ restriction, including without limitation the rights to use, copy,
+ modify, merge, publish, distribute, sublicense, and/or sell copies
+ of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+*/
+
+#include "fasthash.h"
+
+// Compression function for Merkle-Damgard construction.
+// This function is generated using the framework provided.
+static inline uint64_t mix(uint64_t h) {
+ h ^= h >> 23;
+ h *= 0x2127599bf4325c37ULL;
+ h ^= h >> 47;
+ return h;
+}
+
+// security: if the system allows empty keys (len=3) the seed is exposed, the reverse of mix.
+// objsize: 0-1fd: 509
+uint64_t fasthash64(const void *buf, size_t len, uint64_t seed)
+{
+ const uint64_t m = 0x880355f21e6d1965ULL;
+ const uint64_t *pos = (const uint64_t *)buf;
+ const uint64_t *end = pos + (len / 8);
+ const unsigned char *pos2;
+ uint64_t h = seed ^ (len * m);
+ uint64_t v;
+
+ while (pos != end) {
+ v = *pos++;
+ h ^= mix(v);
+ h *= m;
+ }
+
+ pos2 = (const unsigned char*)pos;
+ v = 0;
+
+ switch (len & 7) {
+ case 7: v ^= (uint64_t)pos2[6] << 48;
+ case 6: v ^= (uint64_t)pos2[5] << 40;
+ case 5: v ^= (uint64_t)pos2[4] << 32;
+ case 4: v ^= (uint64_t)pos2[3] << 24;
+ case 3: v ^= (uint64_t)pos2[2] << 16;
+ case 2: v ^= (uint64_t)pos2[1] << 8;
+ case 1: v ^= (uint64_t)pos2[0];
+ h ^= mix(v);
+ h *= m;
+ }
+
+ return mix(h);
+}
+
+// objsize: 0-236: 566
+uint32_t fasthash32(const void *buf, size_t len, uint32_t seed)
+{
+ // the following trick converts the 64-bit hashcode to Fermat
+ // residue, which shall retain information from both the higher
+ // and lower parts of hashcode.
+ uint64_t h = fasthash64(buf, len, seed);
+ return h - (h >> 32);
+}
--
2.42.0
v5-0002-Inline-string_hash-and-call-out-to-fasthash-inste.patchtext/x-patch; charset=US-ASCII; name=v5-0002-Inline-string_hash-and-call-out-to-fasthash-inste.patchDownload
From 0b29afbee9cf795136b722841c8fc4ee4667ee0d Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Tue, 28 Nov 2023 18:09:00 +0700
Subject: [PATCH v5 2/4] Inline string_hash and call out to fasthash instead of
hash_bytes
---
src/backend/utils/hash/dynahash.c | 20 ++++++++++++++++++++
src/common/hashfn.c | 19 -------------------
src/include/common/hashfn.h | 1 -
src/include/common/hashfn_unstable.h | 9 ++++++++-
4 files changed, 28 insertions(+), 21 deletions(-)
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c
index 012d4a0b1f..6ca1442647 100644
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -98,6 +98,7 @@
#include "access/xact.h"
#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "port/pg_bitutils.h"
#include "storage/shmem.h"
#include "storage/spin.h"
@@ -307,6 +308,25 @@ string_compare(const char *key1, const char *key2, Size keysize)
return strncmp(key1, key2, keysize - 1);
}
+/*
+ * string_hash: hash function for keys that are NUL-terminated strings.
+ *
+ * NOTE: this is the default hash function if none is specified.
+ */
+static inline uint32
+string_hash(const void *key, Size keysize)
+{
+ /*
+ * If the string exceeds keysize-1 bytes, we want to hash only that many,
+ * because when it is copied into the hash table it will be truncated at
+ * that length.
+ */
+ size_t s_len = strlen((const char *) key);
+
+ s_len = Min(s_len, keysize - 1);
+ return fasthash32(key, s_len, 0);
+}
+
/************************** CREATE ROUTINES **********************/
diff --git a/src/common/hashfn.c b/src/common/hashfn.c
index 2490607eea..65e4dd07ba 100644
--- a/src/common/hashfn.c
+++ b/src/common/hashfn.c
@@ -651,25 +651,6 @@ hash_bytes_uint32_extended(uint32 k, uint64 seed)
return ((uint64) b << 32) | c;
}
-/*
- * string_hash: hash function for keys that are NUL-terminated strings.
- *
- * NOTE: this is the default hash function if none is specified.
- */
-uint32
-string_hash(const void *key, Size keysize)
-{
- /*
- * If the string exceeds keysize-1 bytes, we want to hash only that many,
- * because when it is copied into the hash table it will be truncated at
- * that length.
- */
- Size s_len = strlen((const char *) key);
-
- s_len = Min(s_len, keysize - 1);
- return hash_bytes((const unsigned char *) key, (int) s_len);
-}
-
/*
* tag_hash: hash function for fixed-size tag values
*/
diff --git a/src/include/common/hashfn.h b/src/include/common/hashfn.h
index adc1dc1de8..54ab616ba4 100644
--- a/src/include/common/hashfn.h
+++ b/src/include/common/hashfn.h
@@ -52,7 +52,6 @@ hash_uint32_extended(uint32 k, uint64 seed)
}
#endif
-extern uint32 string_hash(const void *key, Size keysize);
extern uint32 tag_hash(const void *key, Size keysize);
extern uint32 uint32_hash(const void *key, Size keysize);
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index 76ed27c0a0..04a24934bc 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -23,7 +23,6 @@
SOFTWARE.
*/
-#include "fasthash.h"
// Compression function for Merkle-Damgard construction.
// This function is generated using the framework provided.
@@ -36,6 +35,7 @@ static inline uint64_t mix(uint64_t h) {
// security: if the system allows empty keys (len=3) the seed is exposed, the reverse of mix.
// objsize: 0-1fd: 509
+static inline
uint64_t fasthash64(const void *buf, size_t len, uint64_t seed)
{
const uint64_t m = 0x880355f21e6d1965ULL;
@@ -56,11 +56,17 @@ uint64_t fasthash64(const void *buf, size_t len, uint64_t seed)
switch (len & 7) {
case 7: v ^= (uint64_t)pos2[6] << 48;
+ /* FALLTHROUGH */
case 6: v ^= (uint64_t)pos2[5] << 40;
+ /* FALLTHROUGH */
case 5: v ^= (uint64_t)pos2[4] << 32;
+ /* FALLTHROUGH */
case 4: v ^= (uint64_t)pos2[3] << 24;
+ /* FALLTHROUGH */
case 3: v ^= (uint64_t)pos2[2] << 16;
+ /* FALLTHROUGH */
case 2: v ^= (uint64_t)pos2[1] << 8;
+ /* FALLTHROUGH */
case 1: v ^= (uint64_t)pos2[0];
h ^= mix(v);
h *= m;
@@ -70,6 +76,7 @@ uint64_t fasthash64(const void *buf, size_t len, uint64_t seed)
}
// objsize: 0-236: 566
+static inline
uint32_t fasthash32(const void *buf, size_t len, uint32_t seed)
{
// the following trick converts the 64-bit hashcode to Fermat
--
2.42.0
On 29/11/2023 15:31, John Naylor wrote:
However, I did find a couple hash functions that are much simpler to
adapt to a bytewise interface, pass SMHasher, and are decently fast on
short inputs:- fast-hash, MIT licensed, and apparently has some use in software [1]
- MX3, CC0 license (looking around, seems controversial for a code
license, so didn't go further). [2] Seems to be a for-fun project, but
the accompanying articles are very informative on how to develop these
things.After wacking fast-hash around, it doesn't really resemble the
original much, and if for some reason we went as far as switching out
the mixing/final functions, it may as well be called completely
original work. I thought it best to start with something whose mixing
behavior passes SMHasher, and hopefully preserve that property.
I didn't understand what you meant by the above. Did you wack around
fast-hash, or who did? Who switched mixing/final functions; compared to
what? The version you have in the patch matches the implementation in
smhasher, did you mean that the smhasher author changed it compared to
the original?
In any case, +1 on the implementation you had in the patch at a quick
glance.
Let's also replace the partial murmurhash implementations we have in
hashfn.h with this. It's a very similar algorithm, and we don't need two.
--
Heikki Linnakangas
Neon (https://neon.tech)
On Wed, Nov 29, 2023 at 9:59 PM Heikki Linnakangas <hlinnaka@iki.fi> wrote:
I didn't understand what you meant by the above. Did you wack around
fast-hash, or who did?
I turned it into an init/accum/final style (shouldn't affect the
result), and took out the input length from the calculation (will
affect the result and I'll look into putting it back some other way).
Who switched mixing/final functions; compared to
what?
Sorry for the confusion. I didn't change those, I was speaking hypothetically.
In any case, +1 on the implementation you had in the patch at a quick
glance.Let's also replace the partial murmurhash implementations we have in
hashfn.h with this. It's a very similar algorithm, and we don't need two.
Thanks for taking a look! For small fixed-sized values, it's common to
special-case a murmur-style finalizer regardless of the algorithm for
longer inputs. Syscache combines multiple hashes for multiple keys, so
it's probably worth it to avoid adding cycles there.
On Wed, Nov 29, 2023 at 8:31 PM John Naylor <johncnaylorls@gmail.com> wrote:
Attached is a rough start with Andres's earlier ideas, to get
something concrete out there.
While looking at the assembly out of curiosity, I found a couple bugs
in the split API that I've fixed locally.
I think the path forward is:
- performance measurements with both byte-at-a-time and
word-at-a-time, once I make sure they're fixed
- based on the above decide which one is best for guc_name_hash
- clean up hash function implementation
- test with with a new guc_name_compare (using what we learned from my
guc_name_eq) and see how well we do with keeping dynahash vs.
simplehash
Separately, for string_hash:
- run SMHasher and see about reincorporating length in the
calculation. v5 should be a clear improvement in collision behavior
over the current guc_name_hash, but we need to make sure it's at least
as good as hash_bytes, and ideally not lose anything compared to
standard fast_hash.
On Wed, 2023-11-29 at 20:31 +0700, John Naylor wrote:
v5-0001 puts fash-hash as-is into a new header, named in a way to
convey in-memory use e.g. hash tables.v5-0002 does the minimal to allow dynash to use this for string_hash,
inlined but still calling strlen.v5-0003 shows one way to do a incremental interface. It might be okay
for simplehash with fixed length keys, but seems awkward for strings.v5-0004 shows a bytewise incremental interface, with implementations
for dynahash (getting rid of strlen) and guc hash.
I'm trying to follow the distinctions you're making between dynahash
and simplehash -- are you saying it's easier to do incremental hashing
with dynahash, and if so, why?
If I understood what Andres was saying, the exposed hash state would be
useful for writing a hash function like guc_name_hash(). But whether we
use simplehash or dynahash is a separate question, right?
Also, while the |= 0x20 is a nice trick for lowercasing, did we decide
that it's better than my approach in patch 0004 here:
/messages/by-id/27a7a289d5b8f42e1b1e79b1bcaeef3a40583bd2.camel@j-davis.com
which optimizes exact hits (most GUC names are already folded) before
trying case folding?
Regards,
Jeff Davis
On Mon, Dec 4, 2023 at 4:16 AM Jeff Davis <pgsql@j-davis.com> wrote:
I'm trying to follow the distinctions you're making between dynahash
and simplehash -- are you saying it's easier to do incremental hashing
with dynahash, and if so, why?
That's a good thing to clear up. This thread has taken simplehash as a
starting point from the very beginning. It initially showed no
improvement, and then we identified problems with the hashing and
equality computations. The latter seem like independently commitable
improvements, so I'm curious if they help on their own, even if we
still need to switch to simplehash as a last step to meet your
performance goals.
If I understood what Andres was saying, the exposed hash state would be
useful for writing a hash function like guc_name_hash().
From my point of view, it would at least be useful for C-strings,
where we don't have the length available up front.
Aside from that, we have multiple places that compute full 32-bit
hashes on multiple individual values, and then combine them with
various ad-hoc ways. It could be worth exploring whether an
incremental interface would be better in those places on a
case-by-case basis.
(If Andres had something else in mind, I'll let him address that.)
But whether we
use simplehash or dynahash is a separate question, right?
Right, the table implementation should treat the hash function as a
black box. Think of the incremental API as lower-level building blocks
for building hash functions.
Also, while the |= 0x20 is a nice trick for lowercasing, did we decide
that it's better than my approach in patch 0004 here:/messages/by-id/27a7a289d5b8f42e1b1e79b1bcaeef3a40583bd2.camel@j-davis.com
which optimizes exact hits (most GUC names are already folded) before
trying case folding?
Note there were two aspects there: hashing and equality. I demonstrated in
/messages/by-id/CANWCAZbQ30O9j-bEZ_1zVCyKPpSjwbE4u19cSDDBJ=TYrHvPig@mail.gmail.com
... in v4-0003 that the equality function can be optimized for
already-folded names (and in fact measured almost equally) using way,
way, way less code.
On Mon, 2023-12-04 at 12:12 +0700, John Naylor wrote:
That's a good thing to clear up. This thread has taken simplehash as
a
starting point from the very beginning. It initially showed no
improvement, and then we identified problems with the hashing and
equality computations. The latter seem like independently commitable
improvements, so I'm curious if they help on their own, even if we
still need to switch to simplehash as a last step to meet your
performance goals.
There's already a patch to use simplehash, and the API is a bit
cleaner, and there's a minor performance improvement. It seems fairly
non-controversial -- should I just proceed with that patch?
If I understood what Andres was saying, the exposed hash state
would be
useful for writing a hash function like guc_name_hash().From my point of view, it would at least be useful for C-strings,
where we don't have the length available up front.
That's good news.
By the way, is there any reason that we would need hash_bytes(s,
strlen(s)) == cstring_hash(s)?
Also, while the |= 0x20 is a nice trick for lowercasing, did we
decide
that it's better than my approach in patch 0004 here:/messages/by-id/27a7a289d5b8f42e1b1e79b1bcaeef3a40583bd2.camel@j-davis.com
which optimizes exact hits (most GUC names are already folded)
before
trying case folding?Note there were two aspects there: hashing and equality. I
demonstrated in/messages/by-id/CANWCAZbQ30O9j-bEZ_1zVCyKPpSjwbE4u19cSDDBJ=TYrHvPig@mail.gmail.com
... in v4-0003 that the equality function can be optimized for
already-folded names (and in fact measured almost equally) using way,
way, way less code.
Thinking in terms of API layers, there are two approaches: (a) make the
hash and equality functions aware of the case-insensitivity, as we
currently do; or (b) make it the caller's responsibility to do case
folding, and the hash and equality functions are based on exact
equality.
Each approach has its own optimization techniques. In (a), we can use
the |= 0x20 trick, and for equality do a memcmp() check first. In (b),
the caller can first try lookup of the key in whatever form is
provided, and only if that fails, case-fold it and try again.
As a tangential point, we may eventually want to provide a more
internationalized definition of "case insensitive" for GUC names. That
would be slightly easier with (b) than with (a), but we can cross that
bridge if and when we come to it.
It seems you are moving toward (a) whereas my patches moved toward (b).
I am fine with either approach but I wanted to clarify which approach
we are using.
In the abstract, I kind of like approach (b) because we don't need to
be as special/clever with the hash functions. We would still want the
faster hash for C-strings, but that's general and helps all callers.
But you're right that it's more code, and that's not great.
Regards,
Jeff Davis
On Tue, Dec 5, 2023 at 1:57 AM Jeff Davis <pgsql@j-davis.com> wrote:
On Mon, 2023-12-04 at 12:12 +0700, John Naylor wrote:
There's already a patch to use simplehash, and the API is a bit
cleaner, and there's a minor performance improvement. It seems fairly
non-controversial -- should I just proceed with that patch?
I won't object if you want to commit that piece now, but I hesitate to
call it a performance improvement on its own.
- The runtime measurements I saw reported were well within the noise level.
- The memory usage starts out better, but with more entries is worse.
From my point of view, it would at least be useful for C-strings,
where we don't have the length available up front.That's good news.
By the way, is there any reason that we would need hash_bytes(s,
strlen(s)) == cstring_hash(s)?
"git grep cstring_hash" found nothing, so not sure what you're asking.
Each approach has its own optimization techniques. In (a), we can use
the |= 0x20 trick, and for equality do a memcmp() check first.
I will assume you are referring to semantics, but on the odd chance
readers take this to mean the actual C library call, that wouldn't be
an optimization, that'd be a pessimization.
As a tangential point, we may eventually want to provide a more
internationalized definition of "case insensitive" for GUC names. That
would be slightly easier with (b) than with (a), but we can cross that
bridge if and when we come to it.
The risk/reward ratio seems pretty bad.
It seems you are moving toward (a) whereas my patches moved toward (b).
I am fine with either approach but I wanted to clarify which approach
we are using.
I will make my case:
In the abstract, I kind of like approach (b) because we don't need to
be as special/clever with the hash functions.
In the abstract, I consider (b) to be a layering violation. As a
consequence, the cleverness in (b) is not confined to one or two
places, but is smeared over a whole bunch of places. I find it hard to
follow.
Concretely, it also adds another pointer to the element struct. That's
not good for a linear open-addressing array, which simplehash has.
Further, remember the equality function is important as well. In v3,
it was "strcmp(a,b)==0", which is a holdover from the dynahash API.
One of the advantages of the simplehash API is that we can 1) use an
equality function, which should be slightly cheaper than a full
comparison function, and 2) we have the option to inline it. (It
doesn't make sense in turn, to jump to a shared lib page and invoke an
indirect function call.) Once we've done that, it's already "special",
so it's not a stretch to make it do what we want to begin with. If a
nicer API is important, why not use it?
On Wed, 2023-12-06 at 07:39 +0700, John Naylor wrote:
"git grep cstring_hash" found nothing, so not sure what you're
asking.
Sorry, I meant string_hash(). Your v5-0002 changes the way hashing
works for cstrings, and that means it's no longer equivalent to
hash_bytes with strlen. That's probably fine, but someone might assume
that they are equivalent.
In the abstract, I consider (b) to be a layering violation. As a
consequence, the cleverness in (b) is not confined to one or two
places, but is smeared over a whole bunch of places. I find it hard
to
follow.
OK. I am fine with (a).
Regards,
Jeff Davis
On Wed, Dec 6, 2023 at 11:48 PM Jeff Davis <pgsql@j-davis.com> wrote:
On Wed, 2023-12-06 at 07:39 +0700, John Naylor wrote:
"git grep cstring_hash" found nothing, so not sure what you're
asking.Sorry, I meant string_hash(). Your v5-0002 changes the way hashing
works for cstrings, and that means it's no longer equivalent to
hash_bytes with strlen. That's probably fine, but someone might assume
that they are equivalent.
That's a good point. It might be best to leave string_hash where it is
and remove the comment that it's the default. Then the new function (I
like the name cstring_hash) can live in dynahash.c where it's obvious
what "default" means.
On Wed, 2023-11-29 at 20:31 +0700, John Naylor wrote:
Attached is a rough start with Andres's earlier ideas, to get
something concrete out there.
The implementation of string hash in 0004 forgot to increment 'buf'.
I tested using the new hash function APIs for my search path cache, and
there's a significant speedup for cases not benefiting from a86c61c9ee.
It's enough that we almost don't need a86c61c9ee. So a definite +1 to
the new APIs.
Regards,
Jeff Davis
I committed 867dd2dc87, which means my use case for a fast GUC hash
table (quickly setting proconfigs) is now solved.
Andres mentioned that it could still be useful to reduce overhead in a
few other places:
/messages/by-id/20231117220830.t6sb7di6h6am4ep5@awork3.anarazel.de
How should we evaluate GUC hash table performance optimizations? Just
microbenchmarks, or are there end-to-end tests where the costs are
showing up?
(As I said in another email, I think the hash function APIs justify
themselves regardless of improvements to the GUC hash table.)
On Wed, 2023-12-06 at 07:39 +0700, John Naylor wrote:
There's already a patch to use simplehash, and the API is a bit
cleaner, and there's a minor performance improvement. It seems
fairly
non-controversial -- should I just proceed with that patch?I won't object if you want to commit that piece now, but I hesitate
to
call it a performance improvement on its own.- The runtime measurements I saw reported were well within the noise
level.
- The memory usage starts out better, but with more entries is worse.
I suppose I'll wait until there's a reason to commit it, then.
Regards,
Jeff Davis
On Sat, Dec 9, 2023 at 3:32 AM Jeff Davis <pgsql@j-davis.com> wrote:
On Wed, 2023-11-29 at 20:31 +0700, John Naylor wrote:
Attached is a rough start with Andres's earlier ideas, to get
something concrete out there.The implementation of string hash in 0004 forgot to increment 'buf'.
Yeah, that was one of the bugs I mentioned. In v6, I fixed it so we
get the right answer.
0001 pure copy of fasthash upstream
0002 keeps the originals for validation, and then re-implements them
using the new incremental interfaces
0003 adds UINT64CONST. After writing this I saw that murmur64 didn't
have UINT64CONST (and obviously no buildfarm member complained), so
probably not needed.
0004 Assert that the original and incrementalized versions give the
same answer. This requires the length to be known up front.
0005 Demo with pgstat_hash_hash_key, which currently runs 3 finalizers
joined with hash_combine. Might shave a few cycles.
0006 Add bytewise interface for C strings.
0007 Use it in guc_name_hash
0008 Teach guc_name_cmp to case fold lazily
I'll test these two and see if there's a detectable difference. Then
each of these:
0009 Jeff's conversion to simplehash
0010 Use an inline equality function for guc nam. hash
0011/12 An experiment to push case-folding down inside fasthash. It's
not great looking, but I'm curious if it makes a difference.
0013 Get rid of strlen in dynahash with default string hashing. I'll
hold on to this and start a new thread, because it's off-topic and has
some open questions.
I haven't tested yet, but I want to see what CI thinks.
I tested using the new hash function APIs for my search path cache, and
there's a significant speedup for cases not benefiting from a86c61c9ee.
It's enough that we almost don't need a86c61c9ee. So a definite +1 to
the new APIs.
Do you have a new test?
Attachments:
v6-0001-Vendor-fasthash.patchtext/x-patch; charset=US-ASCII; name=v6-0001-Vendor-fasthash.patchDownload
From eca956b55c71a11f39497b60469f13642819c349 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Mon, 27 Nov 2023 17:03:38 +0700
Subject: [PATCH v6 01/13] Vendor fasthash
MIT licensed
---
src/include/common/hashfn_unstable.h | 80 ++++++++++++++++++++++++++++
1 file changed, 80 insertions(+)
create mode 100644 src/include/common/hashfn_unstable.h
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
new file mode 100644
index 0000000000..76ed27c0a0
--- /dev/null
+++ b/src/include/common/hashfn_unstable.h
@@ -0,0 +1,80 @@
+/* The MIT License
+
+ Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
+
+ Permission is hereby granted, free of charge, to any person
+ obtaining a copy of this software and associated documentation
+ files (the "Software"), to deal in the Software without
+ restriction, including without limitation the rights to use, copy,
+ modify, merge, publish, distribute, sublicense, and/or sell copies
+ of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+*/
+
+#include "fasthash.h"
+
+// Compression function for Merkle-Damgard construction.
+// This function is generated using the framework provided.
+static inline uint64_t mix(uint64_t h) {
+ h ^= h >> 23;
+ h *= 0x2127599bf4325c37ULL;
+ h ^= h >> 47;
+ return h;
+}
+
+// security: if the system allows empty keys (len=3) the seed is exposed, the reverse of mix.
+// objsize: 0-1fd: 509
+uint64_t fasthash64(const void *buf, size_t len, uint64_t seed)
+{
+ const uint64_t m = 0x880355f21e6d1965ULL;
+ const uint64_t *pos = (const uint64_t *)buf;
+ const uint64_t *end = pos + (len / 8);
+ const unsigned char *pos2;
+ uint64_t h = seed ^ (len * m);
+ uint64_t v;
+
+ while (pos != end) {
+ v = *pos++;
+ h ^= mix(v);
+ h *= m;
+ }
+
+ pos2 = (const unsigned char*)pos;
+ v = 0;
+
+ switch (len & 7) {
+ case 7: v ^= (uint64_t)pos2[6] << 48;
+ case 6: v ^= (uint64_t)pos2[5] << 40;
+ case 5: v ^= (uint64_t)pos2[4] << 32;
+ case 4: v ^= (uint64_t)pos2[3] << 24;
+ case 3: v ^= (uint64_t)pos2[2] << 16;
+ case 2: v ^= (uint64_t)pos2[1] << 8;
+ case 1: v ^= (uint64_t)pos2[0];
+ h ^= mix(v);
+ h *= m;
+ }
+
+ return mix(h);
+}
+
+// objsize: 0-236: 566
+uint32_t fasthash32(const void *buf, size_t len, uint32_t seed)
+{
+ // the following trick converts the 64-bit hashcode to Fermat
+ // residue, which shall retain information from both the higher
+ // and lower parts of hashcode.
+ uint64_t h = fasthash64(buf, len, seed);
+ return h - (h >> 32);
+}
--
2.43.0
v6-0003-Add-UINT64CONST-not-sure-when-we-actually-need-th.patchtext/x-patch; charset=US-ASCII; name=v6-0003-Add-UINT64CONST-not-sure-when-we-actually-need-th.patchDownload
From 4b2080bee986da18077fe2055558e49ab7752086 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sat, 9 Dec 2023 16:14:04 +0700
Subject: [PATCH v6 03/13] Add UINT64CONST (not sure when we actually need
that)
fasthash*_orig left alone for now.
---
src/include/common/hashfn_unstable.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index e5288ae723..b278c72f90 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -56,7 +56,7 @@ static inline uint64
fasthash_mix(uint64 h)
{
h ^= h >> 23;
- h *= 0x2127599bf4325c37ULL;
+ h *= UINT64CONST(0x2127599bf4325c37);
h ^= h >> 47;
return h;
}
@@ -65,7 +65,7 @@ static inline void
fasthash_combine(fasthash_state* hs)
{
hs->hash ^= fasthash_mix(hs->accum);
- hs->hash *= 0x880355f21e6d1965ULL;
+ hs->hash *= UINT64CONST(0x880355f21e6d1965);
/* reset hash state for next input */
hs->accum = 0;
@@ -79,7 +79,7 @@ fasthash_init(fasthash_state *hs, int len, uint64 seed)
// since we don't know the length for a nul-terminated string
// handle some other way -- maybe we can accum the length in
// the state and fold it in during the finalizer (cf. xxHash3)
- hs->hash = seed ^ (len * 0x880355f21e6d1965ULL);
+ hs->hash = seed ^ (len * UINT64CONST(0x880355f21e6d1965));
}
static inline void
--
2.43.0
v6-0004-Assert-that-the-incremental-fasthash-variants-giv.patchtext/x-patch; charset=US-ASCII; name=v6-0004-Assert-that-the-incremental-fasthash-variants-giv.patchDownload
From 31c9e4161b5d0b96a939048991a838a74cb559f3 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sat, 9 Dec 2023 16:22:35 +0700
Subject: [PATCH v6 04/13] Assert that the incremental fasthash variants give
the same answer as the original
---
src/common/hashfn.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/src/common/hashfn.c b/src/common/hashfn.c
index 2490607eea..37c8d307c7 100644
--- a/src/common/hashfn.c
+++ b/src/common/hashfn.c
@@ -26,6 +26,7 @@
#include "common/hashfn.h"
#include "port/pg_bitutils.h"
+#include "common/hashfn_unstable.h"
/*
* This hash function was written by Bob Jenkins
@@ -150,6 +151,9 @@ hash_bytes(const unsigned char *k, int keylen)
c,
len;
+ // XXX not for commit
+ Assert(fasthash64_orig((void *) k, keylen, 0) == fasthash64(k, keylen, 0));
+
/* Set up the internal state */
len = keylen;
a = b = c = 0x9e3779b9 + len + 3923095;
--
2.43.0
v6-0002-Rewrite-fasthash-functions-using-a-homegrown-incr.patchtext/x-patch; charset=US-ASCII; name=v6-0002-Rewrite-fasthash-functions-using-a-homegrown-incr.patchDownload
From ec51e2e7b60e305020792d2608f677d263094a8f Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sat, 9 Dec 2023 12:39:45 +0700
Subject: [PATCH v6 02/13] Rewrite fasthash functions using a homegrown
incremental interface
This serves as a model for correct use of the interface.
---
src/include/common/hashfn_unstable.h | 154 +++++++++++++++++++++++++--
1 file changed, 147 insertions(+), 7 deletions(-)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index 76ed27c0a0..e5288ae723 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -1,3 +1,25 @@
+/*
+Building blocks for creating fast inlineable hash functions. The
+unstable designation is in contrast to hashfn.h, which cannot break
+compatibility because hashes can be writen to disk and so must have
+the same hashes between versions.
+
+ *
+ * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group
+ *
+ * src/include/common/hashfn_unstable.c
+ */
+
+#ifndef HASHFN_UNSTABLE_H
+#define HASHFN_UNSTABLE_H
+
+/*
+ * fasthash is a modification of code taken from
+ * https://code.google.com/archive/p/fast-hash/source/default/source
+ * under the terms of the MIT licencse. The original copyright
+ * notice follows:
+ */
+
/* The MIT License
Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
@@ -23,7 +45,118 @@
SOFTWARE.
*/
-#include "fasthash.h"
+typedef struct fasthash_state
+{
+ uint64 accum;
+#define FH_SIZEOF_ACCUM sizeof(uint64)
+ uint64 hash;
+} fasthash_state;
+
+static inline uint64
+fasthash_mix(uint64 h)
+{
+ h ^= h >> 23;
+ h *= 0x2127599bf4325c37ULL;
+ h ^= h >> 47;
+ return h;
+}
+
+static inline void
+fasthash_combine(fasthash_state* hs)
+{
+ hs->hash ^= fasthash_mix(hs->accum);
+ hs->hash *= 0x880355f21e6d1965ULL;
+
+ /* reset hash state for next input */
+ hs->accum = 0;
+}
+
+static inline void
+fasthash_init(fasthash_state *hs, int len, uint64 seed)
+{
+ memset(hs, 0, sizeof(fasthash_state));
+
+ // since we don't know the length for a nul-terminated string
+ // handle some other way -- maybe we can accum the length in
+ // the state and fold it in during the finalizer (cf. xxHash3)
+ hs->hash = seed ^ (len * 0x880355f21e6d1965ULL);
+}
+
+static inline void
+fasthash_accum(fasthash_state *hs, const unsigned char *k, int len)
+{
+ Assert(hs->accum == 0);
+ Assert(len <= FH_SIZEOF_ACCUM);
+
+ switch (len)
+ {
+ case 8: memcpy(&hs->accum, k, 8);
+ break;
+ case 7: hs->accum |= (uint64) k[6] << 48;
+ /* FALLTHROUGH */
+ case 6: hs->accum |= (uint64) k[5] << 40;
+ /* FALLTHROUGH */
+ case 5: hs->accum |= (uint64) k[4] << 32;
+ /* FALLTHROUGH */
+ case 4: hs->accum |= (uint64) k[3] << 24;
+ /* FALLTHROUGH */
+ case 3: hs->accum |= (uint64) k[2] << 16;
+ /* FALLTHROUGH */
+ case 2: hs->accum |= (uint64) k[1] << 8;
+ /* FALLTHROUGH */
+ case 1: hs->accum |= (uint64) k[0];
+ break;
+ case 0:
+ return;
+ }
+
+ fasthash_combine(hs);
+}
+
+
+static inline uint64
+fasthash_final64(fasthash_state *hs)
+{
+ return fasthash_mix(hs->hash);
+}
+
+static inline uint32
+fasthash_final32(fasthash_state *hs)
+{
+ // the following trick converts the 64-bit hashcode to Fermat
+ // residue, which shall retain information from both the higher
+ // and lower parts of hashcode.
+ uint64 h = fasthash_final64(hs);
+ return h - (h >> 32);
+}
+
+static inline uint64
+fasthash64(const unsigned char * k, int len, uint64 seed)
+{
+ fasthash_state hs;
+
+ fasthash_init(&hs, len, seed);
+
+ while (len >= FH_SIZEOF_ACCUM)
+ {
+ fasthash_accum(&hs, k, FH_SIZEOF_ACCUM);
+ k += FH_SIZEOF_ACCUM;
+ len -= FH_SIZEOF_ACCUM;
+ }
+
+ fasthash_accum(&hs, k, len);
+ return fasthash_final64(&hs);
+}
+
+static inline uint64
+fasthash32(const unsigned char * k, int len, uint64 seed)
+{
+ uint64 h = fasthash64(k, len, seed);
+ return h - (h >> 32);
+}
+
+
+// XXX NOT FOR COMMIT
// Compression function for Merkle-Damgard construction.
// This function is generated using the framework provided.
@@ -34,9 +167,8 @@ static inline uint64_t mix(uint64_t h) {
return h;
}
-// security: if the system allows empty keys (len=3) the seed is exposed, the reverse of mix.
-// objsize: 0-1fd: 509
-uint64_t fasthash64(const void *buf, size_t len, uint64_t seed)
+static inline
+uint64_t fasthash64_orig(const void *buf, size_t len, uint64_t seed)
{
const uint64_t m = 0x880355f21e6d1965ULL;
const uint64_t *pos = (const uint64_t *)buf;
@@ -56,11 +188,17 @@ uint64_t fasthash64(const void *buf, size_t len, uint64_t seed)
switch (len & 7) {
case 7: v ^= (uint64_t)pos2[6] << 48;
+ /* FALLTHROUGH */
case 6: v ^= (uint64_t)pos2[5] << 40;
+ /* FALLTHROUGH */
case 5: v ^= (uint64_t)pos2[4] << 32;
+ /* FALLTHROUGH */
case 4: v ^= (uint64_t)pos2[3] << 24;
+ /* FALLTHROUGH */
case 3: v ^= (uint64_t)pos2[2] << 16;
+ /* FALLTHROUGH */
case 2: v ^= (uint64_t)pos2[1] << 8;
+ /* FALLTHROUGH */
case 1: v ^= (uint64_t)pos2[0];
h ^= mix(v);
h *= m;
@@ -69,12 +207,14 @@ uint64_t fasthash64(const void *buf, size_t len, uint64_t seed)
return mix(h);
}
-// objsize: 0-236: 566
-uint32_t fasthash32(const void *buf, size_t len, uint32_t seed)
+static inline
+uint32_t fasthash32_orig(const void *buf, size_t len, uint32_t seed)
{
// the following trick converts the 64-bit hashcode to Fermat
// residue, which shall retain information from both the higher
// and lower parts of hashcode.
- uint64_t h = fasthash64(buf, len, seed);
+ uint64_t h = fasthash64_orig(buf, len, seed);
return h - (h >> 32);
}
+
+#endif /* HASHFN_UNSTABLE_H */
--
2.43.0
v6-0005-Demonstrate-fasthash32-with-pgstat_hash_hash_key.patchtext/x-patch; charset=US-ASCII; name=v6-0005-Demonstrate-fasthash32-with-pgstat_hash_hash_key.patchDownload
From d62b880581c852f9a3c515d1f546408255da1f8e Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sat, 9 Dec 2023 16:24:56 +0700
Subject: [PATCH v6 05/13] Demonstrate fasthash32 with pgstat_hash_hash_key
---
src/include/utils/pgstat_internal.h | 9 ++-------
1 file changed, 2 insertions(+), 7 deletions(-)
diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h
index 60fbf9394b..df310efee1 100644
--- a/src/include/utils/pgstat_internal.h
+++ b/src/include/utils/pgstat_internal.h
@@ -14,7 +14,7 @@
#define PGSTAT_INTERNAL_H
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "lib/dshash.h"
#include "lib/ilist.h"
#include "pgstat.h"
@@ -777,15 +777,10 @@ static inline uint32
pgstat_hash_hash_key(const void *d, size_t size, void *arg)
{
const PgStat_HashKey *key = (PgStat_HashKey *) d;
- uint32 hash;
Assert(size == sizeof(PgStat_HashKey) && arg == NULL);
- hash = murmurhash32(key->kind);
- hash = hash_combine(hash, murmurhash32(key->dboid));
- hash = hash_combine(hash, murmurhash32(key->objoid));
-
- return hash;
+ return fasthash32((const unsigned char *) key, size, 0);
}
/*
--
2.43.0
v6-0007-Use-bytewise-fasthash-in-guc_name_hash.patchtext/x-patch; charset=US-ASCII; name=v6-0007-Use-bytewise-fasthash-in-guc_name_hash.patchDownload
From fa57dcebf84bdcd16dd057e1515ef380ac3c9058 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sat, 9 Dec 2023 17:19:10 +0700
Subject: [PATCH v6 07/13] Use bytewise fasthash in guc_name_hash
---
src/backend/utils/misc/guc.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index e76c083003..053be81d14 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -33,6 +33,7 @@
#include "catalog/objectaccess.h"
#include "catalog/pg_authid.h"
#include "catalog/pg_parameter_acl.h"
+#include "common/hashfn_unstable.h"
#include "guc_internal.h"
#include "libpq/pqformat.h"
#include "parser/scansup.h"
@@ -1324,22 +1325,21 @@ guc_name_compare(const char *namea, const char *nameb)
static uint32
guc_name_hash(const void *key, Size keysize)
{
- uint32 result = 0;
const char *name = *(const char *const *) key;
+ fasthash_state hs;
+
+ fasthash_init(&hs, 0, 0);
while (*name)
{
char ch = *name++;
- /* Case-fold in the same way as guc_name_compare */
- if (ch >= 'A' && ch <= 'Z')
- ch += 'a' - 'A';
+ /* quick and dirty casefolding suitable for hashing */
+ ch |= 0x20;
- /* Merge into hash ... not very bright, but it needn't be */
- result = pg_rotate_left32(result, 5);
- result ^= (uint32) ch;
+ fasthash_accum_byte(&hs, (unsigned char) ch);
}
- return result;
+ return fasthash_final32(&hs);
}
/*
--
2.43.0
v6-0006-Add-bytewise-interface.patchtext/x-patch; charset=US-ASCII; name=v6-0006-Add-bytewise-interface.patchDownload
From 09338d0399008e284a69189601130534936fbc6f Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sat, 9 Dec 2023 16:32:05 +0700
Subject: [PATCH v6 06/13] Add bytewise interface
This is useful for hashing values with unknown length,
like NUL-terminated strings. It should be faster than calling
strlen() first and passing the length, which most hash
functions require.
Note: This method can't give the same answer as
regular fasthash, so it will need to be evaluated. It's possible
we need to mix in the length at the finalization step (at which
time can know the length), in order to safeguard against
collisions.
---
src/include/common/hashfn_unstable.h | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index b278c72f90..3c3690d063 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -49,6 +49,7 @@ typedef struct fasthash_state
{
uint64 accum;
#define FH_SIZEOF_ACCUM sizeof(uint64)
+ int8 accum_len;
uint64 hash;
} fasthash_state;
@@ -69,6 +70,7 @@ fasthash_combine(fasthash_state* hs)
/* reset hash state for next input */
hs->accum = 0;
+ hs->accum_len = 0;
}
static inline void
@@ -82,6 +84,18 @@ fasthash_init(fasthash_state *hs, int len, uint64 seed)
hs->hash = seed ^ (len * UINT64CONST(0x880355f21e6d1965));
}
+static inline void
+fasthash_accum_byte(fasthash_state *hs, const unsigned char ch)
+{
+ hs->accum <<= BITS_PER_BYTE;
+ hs->accum |= ch;
+ hs->accum_len++;
+
+ // wip: is there a better way to get sizeof struct member?
+ if (hs->accum_len == sizeof(((fasthash_state *) 0)->accum))
+ fasthash_combine(hs);
+}
+
static inline void
fasthash_accum(fasthash_state *hs, const unsigned char *k, int len)
{
@@ -117,6 +131,11 @@ fasthash_accum(fasthash_state *hs, const unsigned char *k, int len)
static inline uint64
fasthash_final64(fasthash_state *hs)
{
+ // check for remaining bytes to combine into hash
+ // should only be used by the bytewise interface
+ if (hs->accum_len > 0)
+ fasthash_combine(hs);
+
return fasthash_mix(hs->hash);
}
--
2.43.0
v6-0009-Convert-GUC-hashtable-to-use-simplehash.patchtext/x-patch; charset=US-ASCII; name=v6-0009-Convert-GUC-hashtable-to-use-simplehash.patchDownload
From 504ff916c531c417a20625148807b439a4cc8527 Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Wed, 2 Aug 2023 23:04:06 -0700
Subject: [PATCH v6 09/13] Convert GUC hashtable to use simplehash.
---
src/backend/utils/misc/guc.c | 147 ++++++++++++++---------------------
1 file changed, 59 insertions(+), 88 deletions(-)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 1484e11a42..1d5d144c41 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -203,9 +203,10 @@ typedef struct
{
const char *gucname; /* hash key */
struct config_generic *gucvar; /* -> GUC's defining structure */
-} GUCHashEntry;
-static HTAB *guc_hashtab; /* entries are GUCHashEntrys */
+ /* needed by simplehash */
+ char status;
+} GUCHashEntry;
/*
* In addition to the hash table, variables having certain properties are
@@ -228,8 +229,7 @@ static int GUCNestLevel = 0; /* 1 when in main transaction */
static int guc_var_compare(const void *a, const void *b);
-static uint32 guc_name_hash(const void *key, Size keysize);
-static int guc_name_match(const void *key1, const void *key2, Size keysize);
+static inline uint32 guc_name_hash(const char *name);
static void InitializeGUCOptionsFromEnvironment(void);
static void InitializeOneGUCOption(struct config_generic *gconf);
static void RemoveGUCFromLists(struct config_generic *gconf);
@@ -266,6 +266,18 @@ static bool call_string_check_hook(struct config_string *conf, char **newval,
static bool call_enum_check_hook(struct config_enum *conf, int *newval,
void **extra, GucSource source, int elevel);
+#define SH_PREFIX GUCHash
+#define SH_ELEMENT_TYPE GUCHashEntry
+#define SH_KEY_TYPE const char *
+#define SH_KEY gucname
+#define SH_HASH_KEY(tb, key) guc_name_hash(key)
+#define SH_EQUAL(tb, a, b) (guc_name_compare(a, b) == 0)
+#define SH_SCOPE static inline
+#define SH_DECLARE
+#define SH_DEFINE
+#include "lib/simplehash.h"
+
+static GUCHash_hash *guc_hashtab = NULL; /* entries are GUCHashEntrys */
/*
* This function handles both actual config file (re)loads and execution of
@@ -283,7 +295,7 @@ ProcessConfigFileInternal(GucContext context, bool applySettings, int elevel)
ConfigVariable *item,
*head,
*tail;
- HASH_SEQ_STATUS status;
+ GUCHash_iterator iter;
GUCHashEntry *hentry;
/* Parse the main config file into a list of option names and values */
@@ -359,8 +371,8 @@ ProcessConfigFileInternal(GucContext context, bool applySettings, int elevel)
* need this so that we can tell below which ones have been removed from
* the file since we last processed it.
*/
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
{
struct config_generic *gconf = hentry->gucvar;
@@ -446,8 +458,8 @@ ProcessConfigFileInternal(GucContext context, bool applySettings, int elevel)
* boot-time defaults. If such a variable can't be changed after startup,
* report that and continue.
*/
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
{
struct config_generic *gconf = hentry->gucvar;
GucStack *stack;
@@ -868,17 +880,17 @@ struct config_generic **
get_guc_variables(int *num_vars)
{
struct config_generic **result;
- HASH_SEQ_STATUS status;
+ GUCHash_iterator iter;
GUCHashEntry *hentry;
int i;
- *num_vars = hash_get_num_entries(guc_hashtab);
+ *num_vars = guc_hashtab->members;
result = palloc(sizeof(struct config_generic *) * *num_vars);
/* Extract pointers from the hash table */
i = 0;
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
result[i++] = hentry->gucvar;
Assert(i == *num_vars);
@@ -900,7 +912,6 @@ build_guc_variables(void)
{
int size_vars;
int num_vars = 0;
- HASHCTL hash_ctl;
GUCHashEntry *hentry;
bool found;
int i;
@@ -962,24 +973,14 @@ build_guc_variables(void)
*/
size_vars = num_vars + num_vars / 4;
- hash_ctl.keysize = sizeof(char *);
- hash_ctl.entrysize = sizeof(GUCHashEntry);
- hash_ctl.hash = guc_name_hash;
- hash_ctl.match = guc_name_match;
- hash_ctl.hcxt = GUCMemoryContext;
- guc_hashtab = hash_create("GUC hash table",
- size_vars,
- &hash_ctl,
- HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
+ guc_hashtab = GUCHash_create(GUCMemoryContext, size_vars, NULL);
for (i = 0; ConfigureNamesBool[i].gen.name; i++)
{
struct config_generic *gucvar = &ConfigureNamesBool[i].gen;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &gucvar->name,
- HASH_ENTER,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
Assert(!found);
hentry->gucvar = gucvar;
}
@@ -988,10 +989,8 @@ build_guc_variables(void)
{
struct config_generic *gucvar = &ConfigureNamesInt[i].gen;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &gucvar->name,
- HASH_ENTER,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
Assert(!found);
hentry->gucvar = gucvar;
}
@@ -1000,10 +999,8 @@ build_guc_variables(void)
{
struct config_generic *gucvar = &ConfigureNamesReal[i].gen;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &gucvar->name,
- HASH_ENTER,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
Assert(!found);
hentry->gucvar = gucvar;
}
@@ -1012,10 +1009,8 @@ build_guc_variables(void)
{
struct config_generic *gucvar = &ConfigureNamesString[i].gen;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &gucvar->name,
- HASH_ENTER,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
Assert(!found);
hentry->gucvar = gucvar;
}
@@ -1024,15 +1019,13 @@ build_guc_variables(void)
{
struct config_generic *gucvar = &ConfigureNamesEnum[i].gen;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &gucvar->name,
- HASH_ENTER,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
Assert(!found);
hentry->gucvar = gucvar;
}
- Assert(num_vars == hash_get_num_entries(guc_hashtab));
+ Assert(num_vars == guc_hashtab->members);
}
/*
@@ -1045,10 +1038,8 @@ add_guc_variable(struct config_generic *var, int elevel)
GUCHashEntry *hentry;
bool found;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &var->name,
- HASH_ENTER_NULL,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, var->name, &found);
+
if (unlikely(hentry == NULL))
{
ereport(elevel,
@@ -1237,10 +1228,8 @@ find_option(const char *name, bool create_placeholders, bool skip_errors,
Assert(name);
/* Look it up using the hash table. */
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &name,
- HASH_FIND,
- NULL);
+ hentry = GUCHash_lookup(guc_hashtab, name);
+
if (hentry)
return hentry->gucvar;
@@ -1326,39 +1315,25 @@ guc_name_compare(const char *namea, const char *nameb)
/*
* Hash function that's compatible with guc_name_compare
*/
-static uint32
-guc_name_hash(const void *key, Size keysize)
+static inline uint32
+guc_name_hash(const char *name)
{
- const char *name = *(const char *const *) key;
fasthash_state hs;
fasthash_init(&hs, 0, 0);
while (*name)
{
- char ch = *name++;
+ unsigned char ch = *name++;
/* quick and dirty casefolding suitable for hashing */
ch |= 0x20;
- fasthash_accum_byte(&hs, (unsigned char) ch);
+ fasthash_accum_byte(&hs, ch);
}
return fasthash_final32(&hs);
}
-/*
- * Dynahash match function to use in guc_hashtab
- */
-static int
-guc_name_match(const void *key1, const void *key2, Size keysize)
-{
- const char *name1 = *(const char *const *) key1;
- const char *name2 = *(const char *const *) key2;
-
- return guc_name_compare(name1, name2);
-}
-
-
/*
* Convert a GUC name to the form that should be used in pg_parameter_acl.
*
@@ -1528,7 +1503,7 @@ check_GUC_init(struct config_generic *gconf)
void
InitializeGUCOptions(void)
{
- HASH_SEQ_STATUS status;
+ GUCHash_iterator iter;
GUCHashEntry *hentry;
/*
@@ -1546,8 +1521,8 @@ InitializeGUCOptions(void)
* Load all variables with their compiled-in defaults, and initialize
* status fields as needed.
*/
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
{
/* Check mapping between initial and default value */
Assert(check_GUC_init(hentry->gucvar));
@@ -2532,7 +2507,7 @@ AtEOXact_GUC(bool isCommit, int nestLevel)
void
BeginReportingGUCOptions(void)
{
- HASH_SEQ_STATUS status;
+ GUCHash_iterator iter;
GUCHashEntry *hentry;
/*
@@ -2556,8 +2531,8 @@ BeginReportingGUCOptions(void)
PGC_INTERNAL, PGC_S_OVERRIDE);
/* Transmit initial values of interesting variables */
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
{
struct config_generic *conf = hentry->gucvar;
@@ -4813,10 +4788,8 @@ define_custom_variable(struct config_generic *variable)
/*
* See if there's a placeholder by the same name.
*/
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &name,
- HASH_FIND,
- NULL);
+ hentry = GUCHash_lookup(guc_hashtab, name);
+
if (hentry == NULL)
{
/*
@@ -5152,7 +5125,7 @@ void
MarkGUCPrefixReserved(const char *className)
{
int classLen = strlen(className);
- HASH_SEQ_STATUS status;
+ GUCHash_iterator iter;
GUCHashEntry *hentry;
MemoryContext oldcontext;
@@ -5162,8 +5135,8 @@ MarkGUCPrefixReserved(const char *className)
* don't bother trying to free associated memory, since this shouldn't
* happen often.)
*/
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
{
struct config_generic *var = hentry->gucvar;
@@ -5178,10 +5151,8 @@ MarkGUCPrefixReserved(const char *className)
errdetail("\"%s\" is now a reserved prefix.",
className)));
/* Remove it from the hash table */
- hash_search(guc_hashtab,
- &var->name,
- HASH_REMOVE,
- NULL);
+ GUCHash_delete(guc_hashtab, var->name);
+
/* Remove it from any lists it's in, too */
RemoveGUCFromLists(var);
}
@@ -5212,7 +5183,7 @@ get_explain_guc_options(int *num)
* While only a fraction of all the GUC variables are marked GUC_EXPLAIN,
* it doesn't seem worth dynamically resizing this array.
*/
- result = palloc(sizeof(struct config_generic *) * hash_get_num_entries(guc_hashtab));
+ result = palloc(sizeof(struct config_generic *) * guc_hashtab->members);
/* We need only consider GUCs with source not PGC_S_DEFAULT */
dlist_foreach(iter, &guc_nondef_list)
--
2.43.0
v6-0008-Casefold-lazily-in-guc_name_compare.patchtext/x-patch; charset=US-ASCII; name=v6-0008-Casefold-lazily-in-guc_name_compare.patchDownload
From 2ae11cf0a491bbe264830fb77f56affee98dac99 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sat, 9 Dec 2023 17:30:39 +0700
Subject: [PATCH v6 08/13] Casefold lazily in guc_name_compare
---
src/backend/utils/misc/guc.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 053be81d14..1484e11a42 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1305,12 +1305,16 @@ guc_name_compare(const char *namea, const char *nameb)
char cha = *namea++;
char chb = *nameb++;
- if (cha >= 'A' && cha <= 'Z')
- cha += 'a' - 'A';
- if (chb >= 'A' && chb <= 'Z')
- chb += 'a' - 'A';
if (cha != chb)
- return cha - chb;
+ {
+ /* Casefold lazily since we expect lower case */
+ if (cha >= 'A' && cha <= 'Z')
+ cha += 'a' - 'A';
+ if (chb >= 'A' && chb <= 'Z')
+ chb += 'a' - 'A';
+ if (cha != chb)
+ return cha - chb;
+ }
}
if (*namea)
return 1; /* a is longer */
--
2.43.0
v6-0010-Use-inline-equality-function-for-guc-hash.patchtext/x-patch; charset=US-ASCII; name=v6-0010-Use-inline-equality-function-for-guc-hash.patchDownload
From a0f2434bd34d3bce5b6f6c144a7c16cde62111c2 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sat, 9 Dec 2023 17:54:13 +0700
Subject: [PATCH v6 10/13] Use inline equality function for guc hash
---
src/backend/utils/misc/guc.c | 33 ++++++++++++++++++++++++++++++++-
1 file changed, 32 insertions(+), 1 deletion(-)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 1d5d144c41..46591172fd 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -229,6 +229,7 @@ static int GUCNestLevel = 0; /* 1 when in main transaction */
static int guc_var_compare(const void *a, const void *b);
+static inline bool guc_name_eq(const char *namea, const char *nameb);
static inline uint32 guc_name_hash(const char *name);
static void InitializeGUCOptionsFromEnvironment(void);
static void InitializeOneGUCOption(struct config_generic *gconf);
@@ -271,7 +272,7 @@ static bool call_enum_check_hook(struct config_enum *conf, int *newval,
#define SH_KEY_TYPE const char *
#define SH_KEY gucname
#define SH_HASH_KEY(tb, key) guc_name_hash(key)
-#define SH_EQUAL(tb, a, b) (guc_name_compare(a, b) == 0)
+#define SH_EQUAL(tb, a, b) (guc_name_eq(a, b))
#define SH_SCOPE static inline
#define SH_DECLARE
#define SH_DEFINE
@@ -1312,6 +1313,36 @@ guc_name_compare(const char *namea, const char *nameb)
return 0;
}
+static inline bool
+guc_name_eq(const char *namea, const char *nameb)
+{
+ char cha;
+ char chb;
+
+ while (*namea && *nameb)
+ {
+ cha = *namea++;
+ chb = *nameb++;
+
+ if (cha != chb)
+ {
+ /* Casefold lazily since we expect lower case */
+ if (cha >= 'A' && cha <= 'Z')
+ cha += 'a' - 'A';
+ if (chb >= 'A' && chb <= 'Z')
+ chb += 'a' - 'A';
+
+ if (cha != chb)
+ return false;
+ }
+ }
+
+ if (*namea == *nameb)
+ return true;
+ else
+ return false;
+}
+
/*
* Hash function that's compatible with guc_name_compare
*/
--
2.43.0
v6-0013-PoC-Get-rid-of-strlen-calls-when-using-HASH_STRIN.patchtext/x-patch; charset=US-ASCII; name=v6-0013-PoC-Get-rid-of-strlen-calls-when-using-HASH_STRIN.patchDownload
From 0da8162ca2d28f5aa89ceb257ad3c2dd2a317dff Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sat, 9 Dec 2023 18:06:05 +0700
Subject: [PATCH v6 13/13] PoC: Get rid of strlen() calls when using
HASH_STRINGS
Add cstring_hash, which uses the chunked incremental interface
of fasthash. That way, we don't need know the length of the
key upfront.
Open questions:
- Is performance better?
- Since we have the total length when we reach the end, should
well try to use it in the finalization stage?
- Do we need to keep string_hash around?
---
src/backend/utils/hash/dynahash.c | 49 +++++++++++++++++++++++++++----
1 file changed, 44 insertions(+), 5 deletions(-)
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c
index 012d4a0b1f..ba74126e73 100644
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -98,6 +98,7 @@
#include "access/xact.h"
#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "port/pg_bitutils.h"
#include "storage/shmem.h"
#include "storage/spin.h"
@@ -307,6 +308,44 @@ string_compare(const char *key1, const char *key2, Size keysize)
return strncmp(key1, key2, keysize - 1);
}
+/*
+ * cstring_hash: hash function for keys that are NUL-terminated strings.
+ *
+ * NOTE: this is the default hash function if none is specified.
+ */
+static uint32
+cstring_hash(const void *key, Size keysize)
+{
+ fasthash_state hs;
+ int s_len = 0;
+ const unsigned char *k = (const unsigned char *) key;
+
+ /*
+ * If the string exceeds keysize-1 bytes, we want to hash only that many,
+ * because when it is copied into the hash table it will be truncated at
+ * that length.
+ */
+
+ fasthash_init(&hs, 0, 0, false);
+
+ while (*k && s_len < keysize)
+ {
+ int chunk_len;
+
+ for (chunk_len = 0;
+ chunk_len < FH_SIZEOF_ACCUM && k[chunk_len] != '\0' && s_len < keysize;
+ chunk_len++)
+ {
+ s_len++;
+ }
+
+ fasthash_accum(&hs, k, chunk_len);
+ k += chunk_len;
+ }
+
+ return fasthash_final32(&hs);
+}
+
/************************** CREATE ROUTINES **********************/
@@ -419,7 +458,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
{
/*
* string_hash used to be considered the default hash method, and in a
- * non-assert build it effectively still is. But we now consider it
+ * non-assert build it effectively still was until version 17. Since version 14 we consider it
* an assertion error to not say HASH_STRINGS explicitly. To help
* catch mistaken usage of HASH_STRINGS, we also insist on a
* reasonably long string length: if the keysize is only 4 or 8 bytes,
@@ -428,12 +467,12 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Assert(flags & HASH_STRINGS);
Assert(info->keysize > 8);
- hashp->hash = string_hash;
+ hashp->hash = cstring_hash;
}
/*
* If you don't specify a match function, it defaults to string_compare if
- * you used string_hash, and to memcmp otherwise.
+ * you used cstring_hash, and to memcmp otherwise.
*
* Note: explicitly specifying string_hash is deprecated, because this
* might not work for callers in loadable modules on some platforms due to
@@ -442,7 +481,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
*/
if (flags & HASH_COMPARE)
hashp->match = info->match;
- else if (hashp->hash == string_hash)
+ else if (hashp->hash == cstring_hash)
hashp->match = (HashCompareFunc) string_compare;
else
hashp->match = memcmp;
@@ -452,7 +491,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
*/
if (flags & HASH_KEYCOPY)
hashp->keycopy = info->keycopy;
- else if (hashp->hash == string_hash)
+ else if (hashp->hash == cstring_hash)
{
/*
* The signature of keycopy is meant for memcpy(), which returns
--
2.43.0
v6-0011-Add-abiliy-to-case-fold-with-chunk-8-byte-interfa.patchtext/x-patch; charset=US-ASCII; name=v6-0011-Add-abiliy-to-case-fold-with-chunk-8-byte-interfa.patchDownload
From f8200a0c75abc946eba85f55fa6d111d3d632b88 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sat, 9 Dec 2023 17:58:59 +0700
Subject: [PATCH v6 11/13] Add abiliy to case-fold with chunk (8-byte)
interface
---
src/include/common/hashfn_unstable.h | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index 3c3690d063..6a1f40e70b 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -49,6 +49,7 @@ typedef struct fasthash_state
{
uint64 accum;
#define FH_SIZEOF_ACCUM sizeof(uint64)
+ uint64 overlay; /* used for case-folding */
int8 accum_len;
uint64 hash;
} fasthash_state;
@@ -74,7 +75,7 @@ fasthash_combine(fasthash_state* hs)
}
static inline void
-fasthash_init(fasthash_state *hs, int len, uint64 seed)
+fasthash_init(fasthash_state *hs, int len, uint64 seed, bool case_fold)
{
memset(hs, 0, sizeof(fasthash_state));
@@ -82,6 +83,9 @@ fasthash_init(fasthash_state *hs, int len, uint64 seed)
// handle some other way -- maybe we can accum the length in
// the state and fold it in during the finalizer (cf. xxHash3)
hs->hash = seed ^ (len * UINT64CONST(0x880355f21e6d1965));
+
+ if (case_fold)
+ hs->overlay = UINT64CONST(0x2020202020202020);
}
static inline void
@@ -123,6 +127,8 @@ fasthash_accum(fasthash_state *hs, const unsigned char *k, int len)
case 0:
return;
}
+ /* case-fold, if set */
+ hs->accum |= hs->overlay;
fasthash_combine(hs);
}
@@ -154,7 +160,7 @@ fasthash64(const unsigned char * k, int len, uint64 seed)
{
fasthash_state hs;
- fasthash_init(&hs, len, seed);
+ fasthash_init(&hs, len, seed, false);
while (len >= FH_SIZEOF_ACCUM)
{
--
2.43.0
v6-0012-Use-chunk-interface-for-guc_name_hash.patchtext/x-patch; charset=US-ASCII; name=v6-0012-Use-chunk-interface-for-guc_name_hash.patchDownload
From a96ec013abdd79f2e061fddaa49291f2f16f5e65 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sat, 9 Dec 2023 18:05:27 +0700
Subject: [PATCH v6 12/13] Use chunk interface for guc_name_hash
---
src/backend/utils/misc/guc.c | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 46591172fd..507d35718f 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1351,16 +1351,19 @@ guc_name_hash(const char *name)
{
fasthash_state hs;
- fasthash_init(&hs, 0, 0);
+ fasthash_init(&hs, 0, 0, true);
while (*name)
{
- unsigned char ch = *name++;
+ int chunk_len;
- /* quick and dirty casefolding suitable for hashing */
- ch |= 0x20;
+ for (chunk_len = 0;
+ chunk_len < FH_SIZEOF_ACCUM && name[chunk_len] != '\0';
+ chunk_len++)
+ ;
- fasthash_accum_byte(&hs, ch);
+ fasthash_accum(&hs, (const unsigned char *) name, chunk_len);
+ name += chunk_len;
}
return fasthash_final32(&hs);
}
--
2.43.0
On Sat, 2023-12-09 at 18:52 +0700, John Naylor wrote:
I tested using the new hash function APIs for my search path cache,
and
there's a significant speedup for cases not benefiting from
a86c61c9ee.
It's enough that we almost don't need a86c61c9ee. So a definite +1
to
the new APIs.Do you have a new test?
Still using the same basic test here:
/messages/by-id/04c8592dbd694e4114a3ed87139a7a04e4363030.camel@j-davis.com
What I did is:
a. add your v5 patches
b. disable optimization in a86c61c9ee
c. add attached patch to use new hash APIs
I got a slowdown between (a) and (b), and then (c) closed the gap about
halfway. It started to get close to test noise at that point -- I could
get some better numbers out of it if it's helpful.
Also, what I'm doing in the attached path is using part of the key as
the seed. Is that a good idea or should the seed be zero or come from
somewhere else?
Regards,
Jeff Davis
Attachments:
v1-0001-Use-new-hash-APIs-for-search-path-cache.patchtext/x-patch; charset=UTF-8; name=v1-0001-Use-new-hash-APIs-for-search-path-cache.patchDownload
From a30e5f0ea580fb5038eb90e862f697b557627f32 Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Fri, 8 Dec 2023 12:14:27 -0800
Subject: [PATCH v1] Use new hash APIs for search path cache
---
src/backend/catalog/namespace.c | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index 5027efc91d..af815a889d 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -42,6 +42,7 @@
#include "catalog/pg_type.h"
#include "commands/dbcommands.h"
#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "funcapi.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
@@ -247,11 +248,18 @@ static bool MatchNamedCall(HeapTuple proctup, int nargs, List *argnames,
static inline uint32
spcachekey_hash(SearchPathCacheKey key)
{
- const unsigned char *bytes = (const unsigned char *) key.searchPath;
- int blen = strlen(key.searchPath);
+ const char *buf = key.searchPath;
+ fasthash64_state hs;
- return hash_combine(hash_bytes(bytes, blen),
- hash_uint32(key.roleid));
+ fasthash64_init(&hs, key.roleid);
+
+ while (*buf)
+ {
+ fasthash64_accum_byte(&hs, *buf);
+ buf++;
+ }
+
+ return fasthash64_final32(&hs);
}
static inline bool
--
2.34.1
On Sun, Dec 10, 2023 at 2:18 AM Jeff Davis <pgsql@j-davis.com> wrote:
On Sat, 2023-12-09 at 18:52 +0700, John Naylor wrote:
I tested using the new hash function APIs for my search path cache,
and
there's a significant speedup for cases not benefiting from
a86c61c9ee.
It's enough that we almost don't need a86c61c9ee. So a definite +1
to
the new APIs.
Interesting, thanks for testing! SearchPathCache is a better starting
point than dynahash for removing strlen calls anyway -- it's more
localized, uses simplehash, and we can test it with at-hand tests.
Do you have a new test?
Still using the same basic test here:
/messages/by-id/04c8592dbd694e4114a3ed87139a7a04e4363030.camel@j-davis.com
What I did is:
a. add your v5 patches
b. disable optimization in a86c61c9ee
c. add attached patch to use new hash APIs
Of course, the CF bot doesn't know this, so it crashed and burned
before I had a chance to check how v6 did. I'm attaching v7 which just
improves commit messages for reviewing, and gets rid of git whitespace
errors.
My local branch of master is still at 457428d9e99b6 from Dec 4. That's
before both a86c61c9ee (Optimize SearchPathCache by saving the last
entry.) and 867dd2dc87 (Cache opaque handle for GUC option to avoid
repeasted lookups.). My plan was to keep testing against Dec. 4, but
like you I'm not sure if there is a better GUC test to do now.
I got a slowdown between (a) and (b), and then (c) closed the gap about
halfway. It started to get close to test noise at that point -- I could
get some better numbers out of it if it's helpful.
We can also try (c) with using the "chunked" interface. Also note your
patch may no longer apply on top of v6 or v7.
Also, what I'm doing in the attached path is using part of the key as
the seed. Is that a good idea or should the seed be zero or come from
somewhere else?
I think whether to use part of the key as a seed is a judgment call.
See this part in resowner.c:
/*
* Most resource kinds store a pointer in 'value', and pointers are unique
* all on their own. But some resources store plain integers (Files and
* Buffers as of this writing), so we want to incorporate the 'kind' in
* the hash too, otherwise those resources will collide a lot. But
* because there are only a few resource kinds like that - and only a few
* resource kinds to begin with - we don't need to work too hard to mix
* 'kind' into the hash. Just add it with hash_combine(), it perturbs the
* result enough for our purposes.
*/
#if SIZEOF_DATUM == 8
return hash_combine64(murmurhash64((uint64) value), (uint64) kind);
Given these comments, I'd feel free to use the "kind" as the seed if I
were writing this with fasthash.
The caller-provided seed can probably be zero unless we have a good
reason to, like the above, but with the incremental interface there is
an issue:
hs->hash = seed ^ (len * UINT64CONST(0x880355f21e6d1965));
Passing length 0 will wipe out the internal seed here, and that can't be good.
1) We could by convention pass "1" as the length for strings. That
could be a macro like
#define FH_UNKNOWN_LENGTH 1
...and maybe Assert(len != 0 || seed != 0)
Or 2) we could detect zero and force it to be one, but it's best if
the compiler can always constant-fold that branch. Future work may
invalidate that assumption.
Attachments:
v7-0011-Add-abiliy-to-case-fold-with-chunk-8-byte-interfa.patchtext/x-patch; charset=US-ASCII; name=v7-0011-Add-abiliy-to-case-fold-with-chunk-8-byte-interfa.patchDownload
From b1c63cddb988b6086486cfb739c07d33d872cafe Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sat, 9 Dec 2023 17:58:59 +0700
Subject: [PATCH v7 11/13] Add abiliy to case-fold with chunk (8-byte)
interface
---
src/include/common/hashfn_unstable.h | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index a798c42ba7..6157124cb4 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -49,6 +49,7 @@ typedef struct fasthash_state
{
uint64 accum;
#define FH_SIZEOF_ACCUM sizeof(uint64)
+ uint64 overlay; /* used for case-folding */
int8 accum_len;
uint64 hash;
} fasthash_state;
@@ -74,7 +75,7 @@ fasthash_combine(fasthash_state* hs)
}
static inline void
-fasthash_init(fasthash_state *hs, int len, uint64 seed)
+fasthash_init(fasthash_state *hs, int len, uint64 seed, bool case_fold)
{
memset(hs, 0, sizeof(fasthash_state));
@@ -82,6 +83,9 @@ fasthash_init(fasthash_state *hs, int len, uint64 seed)
// handle some other way -- maybe we can accum the length in
// the state and fold it in during the finalizer (cf. xxHash3)
hs->hash = seed ^ (len * UINT64CONST(0x880355f21e6d1965));
+
+ if (case_fold)
+ hs->overlay = UINT64CONST(0x2020202020202020);
}
static inline void
@@ -123,6 +127,8 @@ fasthash_accum(fasthash_state *hs, const unsigned char *k, int len)
case 0:
return;
}
+ /* case-fold, if set */
+ hs->accum |= hs->overlay;
fasthash_combine(hs);
}
@@ -154,7 +160,7 @@ fasthash64(const unsigned char * k, int len, uint64 seed)
{
fasthash_state hs;
- fasthash_init(&hs, len, seed);
+ fasthash_init(&hs, len, seed, false);
while (len >= FH_SIZEOF_ACCUM)
{
--
2.43.0
v7-0012-Use-chunk-interface-for-guc_name_hash.patchtext/x-patch; charset=US-ASCII; name=v7-0012-Use-chunk-interface-for-guc_name_hash.patchDownload
From ca3030cc73310c728ef973b31f90ba99f0ae1745 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sat, 9 Dec 2023 18:05:27 +0700
Subject: [PATCH v7 12/13] Use chunk interface for guc_name_hash
---
src/backend/utils/misc/guc.c | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 46591172fd..507d35718f 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1351,16 +1351,19 @@ guc_name_hash(const char *name)
{
fasthash_state hs;
- fasthash_init(&hs, 0, 0);
+ fasthash_init(&hs, 0, 0, true);
while (*name)
{
- unsigned char ch = *name++;
+ int chunk_len;
- /* quick and dirty casefolding suitable for hashing */
- ch |= 0x20;
+ for (chunk_len = 0;
+ chunk_len < FH_SIZEOF_ACCUM && name[chunk_len] != '\0';
+ chunk_len++)
+ ;
- fasthash_accum_byte(&hs, ch);
+ fasthash_accum(&hs, (const unsigned char *) name, chunk_len);
+ name += chunk_len;
}
return fasthash_final32(&hs);
}
--
2.43.0
v7-0013-PoC-Get-rid-of-strlen-calls-when-using-HASH_STRIN.patchtext/x-patch; charset=US-ASCII; name=v7-0013-PoC-Get-rid-of-strlen-calls-when-using-HASH_STRIN.patchDownload
From b7ee120e05d48ebacb078e00ffefc5f98052d214 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sat, 9 Dec 2023 18:06:05 +0700
Subject: [PATCH v7 13/13] PoC: Get rid of strlen() calls when using
HASH_STRINGS
Add cstring_hash, which uses the chunked incremental interface
of fasthash. That way, we don't need know the length of the
key upfront.
Open questions:
- Is performance better?
- Since we have the total length when we reach the end, should
well try to use it in the finalization stage?
- Do we need to keep string_hash around?
---
src/backend/utils/hash/dynahash.c | 49 +++++++++++++++++++++++++++----
1 file changed, 44 insertions(+), 5 deletions(-)
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c
index 012d4a0b1f..ba74126e73 100644
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -98,6 +98,7 @@
#include "access/xact.h"
#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "port/pg_bitutils.h"
#include "storage/shmem.h"
#include "storage/spin.h"
@@ -307,6 +308,44 @@ string_compare(const char *key1, const char *key2, Size keysize)
return strncmp(key1, key2, keysize - 1);
}
+/*
+ * cstring_hash: hash function for keys that are NUL-terminated strings.
+ *
+ * NOTE: this is the default hash function if none is specified.
+ */
+static uint32
+cstring_hash(const void *key, Size keysize)
+{
+ fasthash_state hs;
+ int s_len = 0;
+ const unsigned char *k = (const unsigned char *) key;
+
+ /*
+ * If the string exceeds keysize-1 bytes, we want to hash only that many,
+ * because when it is copied into the hash table it will be truncated at
+ * that length.
+ */
+
+ fasthash_init(&hs, 0, 0, false);
+
+ while (*k && s_len < keysize)
+ {
+ int chunk_len;
+
+ for (chunk_len = 0;
+ chunk_len < FH_SIZEOF_ACCUM && k[chunk_len] != '\0' && s_len < keysize;
+ chunk_len++)
+ {
+ s_len++;
+ }
+
+ fasthash_accum(&hs, k, chunk_len);
+ k += chunk_len;
+ }
+
+ return fasthash_final32(&hs);
+}
+
/************************** CREATE ROUTINES **********************/
@@ -419,7 +458,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
{
/*
* string_hash used to be considered the default hash method, and in a
- * non-assert build it effectively still is. But we now consider it
+ * non-assert build it effectively still was until version 17. Since version 14 we consider it
* an assertion error to not say HASH_STRINGS explicitly. To help
* catch mistaken usage of HASH_STRINGS, we also insist on a
* reasonably long string length: if the keysize is only 4 or 8 bytes,
@@ -428,12 +467,12 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Assert(flags & HASH_STRINGS);
Assert(info->keysize > 8);
- hashp->hash = string_hash;
+ hashp->hash = cstring_hash;
}
/*
* If you don't specify a match function, it defaults to string_compare if
- * you used string_hash, and to memcmp otherwise.
+ * you used cstring_hash, and to memcmp otherwise.
*
* Note: explicitly specifying string_hash is deprecated, because this
* might not work for callers in loadable modules on some platforms due to
@@ -442,7 +481,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
*/
if (flags & HASH_COMPARE)
hashp->match = info->match;
- else if (hashp->hash == string_hash)
+ else if (hashp->hash == cstring_hash)
hashp->match = (HashCompareFunc) string_compare;
else
hashp->match = memcmp;
@@ -452,7 +491,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
*/
if (flags & HASH_KEYCOPY)
hashp->keycopy = info->keycopy;
- else if (hashp->hash == string_hash)
+ else if (hashp->hash == cstring_hash)
{
/*
* The signature of keycopy is meant for memcpy(), which returns
--
2.43.0
v7-0009-Convert-GUC-hashtable-to-use-simplehash.patchtext/x-patch; charset=US-ASCII; name=v7-0009-Convert-GUC-hashtable-to-use-simplehash.patchDownload
From f16778d29742d9c13bdc0e22c8c25e3b23e025ab Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Wed, 2 Aug 2023 23:04:06 -0700
Subject: [PATCH v7 09/13] Convert GUC hashtable to use simplehash.
---
src/backend/utils/misc/guc.c | 147 ++++++++++++++---------------------
1 file changed, 59 insertions(+), 88 deletions(-)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 1484e11a42..1d5d144c41 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -203,9 +203,10 @@ typedef struct
{
const char *gucname; /* hash key */
struct config_generic *gucvar; /* -> GUC's defining structure */
-} GUCHashEntry;
-static HTAB *guc_hashtab; /* entries are GUCHashEntrys */
+ /* needed by simplehash */
+ char status;
+} GUCHashEntry;
/*
* In addition to the hash table, variables having certain properties are
@@ -228,8 +229,7 @@ static int GUCNestLevel = 0; /* 1 when in main transaction */
static int guc_var_compare(const void *a, const void *b);
-static uint32 guc_name_hash(const void *key, Size keysize);
-static int guc_name_match(const void *key1, const void *key2, Size keysize);
+static inline uint32 guc_name_hash(const char *name);
static void InitializeGUCOptionsFromEnvironment(void);
static void InitializeOneGUCOption(struct config_generic *gconf);
static void RemoveGUCFromLists(struct config_generic *gconf);
@@ -266,6 +266,18 @@ static bool call_string_check_hook(struct config_string *conf, char **newval,
static bool call_enum_check_hook(struct config_enum *conf, int *newval,
void **extra, GucSource source, int elevel);
+#define SH_PREFIX GUCHash
+#define SH_ELEMENT_TYPE GUCHashEntry
+#define SH_KEY_TYPE const char *
+#define SH_KEY gucname
+#define SH_HASH_KEY(tb, key) guc_name_hash(key)
+#define SH_EQUAL(tb, a, b) (guc_name_compare(a, b) == 0)
+#define SH_SCOPE static inline
+#define SH_DECLARE
+#define SH_DEFINE
+#include "lib/simplehash.h"
+
+static GUCHash_hash *guc_hashtab = NULL; /* entries are GUCHashEntrys */
/*
* This function handles both actual config file (re)loads and execution of
@@ -283,7 +295,7 @@ ProcessConfigFileInternal(GucContext context, bool applySettings, int elevel)
ConfigVariable *item,
*head,
*tail;
- HASH_SEQ_STATUS status;
+ GUCHash_iterator iter;
GUCHashEntry *hentry;
/* Parse the main config file into a list of option names and values */
@@ -359,8 +371,8 @@ ProcessConfigFileInternal(GucContext context, bool applySettings, int elevel)
* need this so that we can tell below which ones have been removed from
* the file since we last processed it.
*/
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
{
struct config_generic *gconf = hentry->gucvar;
@@ -446,8 +458,8 @@ ProcessConfigFileInternal(GucContext context, bool applySettings, int elevel)
* boot-time defaults. If such a variable can't be changed after startup,
* report that and continue.
*/
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
{
struct config_generic *gconf = hentry->gucvar;
GucStack *stack;
@@ -868,17 +880,17 @@ struct config_generic **
get_guc_variables(int *num_vars)
{
struct config_generic **result;
- HASH_SEQ_STATUS status;
+ GUCHash_iterator iter;
GUCHashEntry *hentry;
int i;
- *num_vars = hash_get_num_entries(guc_hashtab);
+ *num_vars = guc_hashtab->members;
result = palloc(sizeof(struct config_generic *) * *num_vars);
/* Extract pointers from the hash table */
i = 0;
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
result[i++] = hentry->gucvar;
Assert(i == *num_vars);
@@ -900,7 +912,6 @@ build_guc_variables(void)
{
int size_vars;
int num_vars = 0;
- HASHCTL hash_ctl;
GUCHashEntry *hentry;
bool found;
int i;
@@ -962,24 +973,14 @@ build_guc_variables(void)
*/
size_vars = num_vars + num_vars / 4;
- hash_ctl.keysize = sizeof(char *);
- hash_ctl.entrysize = sizeof(GUCHashEntry);
- hash_ctl.hash = guc_name_hash;
- hash_ctl.match = guc_name_match;
- hash_ctl.hcxt = GUCMemoryContext;
- guc_hashtab = hash_create("GUC hash table",
- size_vars,
- &hash_ctl,
- HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
+ guc_hashtab = GUCHash_create(GUCMemoryContext, size_vars, NULL);
for (i = 0; ConfigureNamesBool[i].gen.name; i++)
{
struct config_generic *gucvar = &ConfigureNamesBool[i].gen;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &gucvar->name,
- HASH_ENTER,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
Assert(!found);
hentry->gucvar = gucvar;
}
@@ -988,10 +989,8 @@ build_guc_variables(void)
{
struct config_generic *gucvar = &ConfigureNamesInt[i].gen;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &gucvar->name,
- HASH_ENTER,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
Assert(!found);
hentry->gucvar = gucvar;
}
@@ -1000,10 +999,8 @@ build_guc_variables(void)
{
struct config_generic *gucvar = &ConfigureNamesReal[i].gen;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &gucvar->name,
- HASH_ENTER,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
Assert(!found);
hentry->gucvar = gucvar;
}
@@ -1012,10 +1009,8 @@ build_guc_variables(void)
{
struct config_generic *gucvar = &ConfigureNamesString[i].gen;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &gucvar->name,
- HASH_ENTER,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
Assert(!found);
hentry->gucvar = gucvar;
}
@@ -1024,15 +1019,13 @@ build_guc_variables(void)
{
struct config_generic *gucvar = &ConfigureNamesEnum[i].gen;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &gucvar->name,
- HASH_ENTER,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, gucvar->name, &found);
+
Assert(!found);
hentry->gucvar = gucvar;
}
- Assert(num_vars == hash_get_num_entries(guc_hashtab));
+ Assert(num_vars == guc_hashtab->members);
}
/*
@@ -1045,10 +1038,8 @@ add_guc_variable(struct config_generic *var, int elevel)
GUCHashEntry *hentry;
bool found;
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &var->name,
- HASH_ENTER_NULL,
- &found);
+ hentry = GUCHash_insert(guc_hashtab, var->name, &found);
+
if (unlikely(hentry == NULL))
{
ereport(elevel,
@@ -1237,10 +1228,8 @@ find_option(const char *name, bool create_placeholders, bool skip_errors,
Assert(name);
/* Look it up using the hash table. */
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &name,
- HASH_FIND,
- NULL);
+ hentry = GUCHash_lookup(guc_hashtab, name);
+
if (hentry)
return hentry->gucvar;
@@ -1326,39 +1315,25 @@ guc_name_compare(const char *namea, const char *nameb)
/*
* Hash function that's compatible with guc_name_compare
*/
-static uint32
-guc_name_hash(const void *key, Size keysize)
+static inline uint32
+guc_name_hash(const char *name)
{
- const char *name = *(const char *const *) key;
fasthash_state hs;
fasthash_init(&hs, 0, 0);
while (*name)
{
- char ch = *name++;
+ unsigned char ch = *name++;
/* quick and dirty casefolding suitable for hashing */
ch |= 0x20;
- fasthash_accum_byte(&hs, (unsigned char) ch);
+ fasthash_accum_byte(&hs, ch);
}
return fasthash_final32(&hs);
}
-/*
- * Dynahash match function to use in guc_hashtab
- */
-static int
-guc_name_match(const void *key1, const void *key2, Size keysize)
-{
- const char *name1 = *(const char *const *) key1;
- const char *name2 = *(const char *const *) key2;
-
- return guc_name_compare(name1, name2);
-}
-
-
/*
* Convert a GUC name to the form that should be used in pg_parameter_acl.
*
@@ -1528,7 +1503,7 @@ check_GUC_init(struct config_generic *gconf)
void
InitializeGUCOptions(void)
{
- HASH_SEQ_STATUS status;
+ GUCHash_iterator iter;
GUCHashEntry *hentry;
/*
@@ -1546,8 +1521,8 @@ InitializeGUCOptions(void)
* Load all variables with their compiled-in defaults, and initialize
* status fields as needed.
*/
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
{
/* Check mapping between initial and default value */
Assert(check_GUC_init(hentry->gucvar));
@@ -2532,7 +2507,7 @@ AtEOXact_GUC(bool isCommit, int nestLevel)
void
BeginReportingGUCOptions(void)
{
- HASH_SEQ_STATUS status;
+ GUCHash_iterator iter;
GUCHashEntry *hentry;
/*
@@ -2556,8 +2531,8 @@ BeginReportingGUCOptions(void)
PGC_INTERNAL, PGC_S_OVERRIDE);
/* Transmit initial values of interesting variables */
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
{
struct config_generic *conf = hentry->gucvar;
@@ -4813,10 +4788,8 @@ define_custom_variable(struct config_generic *variable)
/*
* See if there's a placeholder by the same name.
*/
- hentry = (GUCHashEntry *) hash_search(guc_hashtab,
- &name,
- HASH_FIND,
- NULL);
+ hentry = GUCHash_lookup(guc_hashtab, name);
+
if (hentry == NULL)
{
/*
@@ -5152,7 +5125,7 @@ void
MarkGUCPrefixReserved(const char *className)
{
int classLen = strlen(className);
- HASH_SEQ_STATUS status;
+ GUCHash_iterator iter;
GUCHashEntry *hentry;
MemoryContext oldcontext;
@@ -5162,8 +5135,8 @@ MarkGUCPrefixReserved(const char *className)
* don't bother trying to free associated memory, since this shouldn't
* happen often.)
*/
- hash_seq_init(&status, guc_hashtab);
- while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL)
+ GUCHash_start_iterate(guc_hashtab, &iter);
+ while ((hentry = GUCHash_iterate(guc_hashtab, &iter)) != NULL)
{
struct config_generic *var = hentry->gucvar;
@@ -5178,10 +5151,8 @@ MarkGUCPrefixReserved(const char *className)
errdetail("\"%s\" is now a reserved prefix.",
className)));
/* Remove it from the hash table */
- hash_search(guc_hashtab,
- &var->name,
- HASH_REMOVE,
- NULL);
+ GUCHash_delete(guc_hashtab, var->name);
+
/* Remove it from any lists it's in, too */
RemoveGUCFromLists(var);
}
@@ -5212,7 +5183,7 @@ get_explain_guc_options(int *num)
* While only a fraction of all the GUC variables are marked GUC_EXPLAIN,
* it doesn't seem worth dynamically resizing this array.
*/
- result = palloc(sizeof(struct config_generic *) * hash_get_num_entries(guc_hashtab));
+ result = palloc(sizeof(struct config_generic *) * guc_hashtab->members);
/* We need only consider GUCs with source not PGC_S_DEFAULT */
dlist_foreach(iter, &guc_nondef_list)
--
2.43.0
v7-0010-Use-inline-equality-function-for-guc-hash.patchtext/x-patch; charset=US-ASCII; name=v7-0010-Use-inline-equality-function-for-guc-hash.patchDownload
From f6a5341b06e5a17f7391b55cb453ca752f077e07 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sat, 9 Dec 2023 17:54:13 +0700
Subject: [PATCH v7 10/13] Use inline equality function for guc hash
---
src/backend/utils/misc/guc.c | 33 ++++++++++++++++++++++++++++++++-
1 file changed, 32 insertions(+), 1 deletion(-)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 1d5d144c41..46591172fd 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -229,6 +229,7 @@ static int GUCNestLevel = 0; /* 1 when in main transaction */
static int guc_var_compare(const void *a, const void *b);
+static inline bool guc_name_eq(const char *namea, const char *nameb);
static inline uint32 guc_name_hash(const char *name);
static void InitializeGUCOptionsFromEnvironment(void);
static void InitializeOneGUCOption(struct config_generic *gconf);
@@ -271,7 +272,7 @@ static bool call_enum_check_hook(struct config_enum *conf, int *newval,
#define SH_KEY_TYPE const char *
#define SH_KEY gucname
#define SH_HASH_KEY(tb, key) guc_name_hash(key)
-#define SH_EQUAL(tb, a, b) (guc_name_compare(a, b) == 0)
+#define SH_EQUAL(tb, a, b) (guc_name_eq(a, b))
#define SH_SCOPE static inline
#define SH_DECLARE
#define SH_DEFINE
@@ -1312,6 +1313,36 @@ guc_name_compare(const char *namea, const char *nameb)
return 0;
}
+static inline bool
+guc_name_eq(const char *namea, const char *nameb)
+{
+ char cha;
+ char chb;
+
+ while (*namea && *nameb)
+ {
+ cha = *namea++;
+ chb = *nameb++;
+
+ if (cha != chb)
+ {
+ /* Casefold lazily since we expect lower case */
+ if (cha >= 'A' && cha <= 'Z')
+ cha += 'a' - 'A';
+ if (chb >= 'A' && chb <= 'Z')
+ chb += 'a' - 'A';
+
+ if (cha != chb)
+ return false;
+ }
+ }
+
+ if (*namea == *nameb)
+ return true;
+ else
+ return false;
+}
+
/*
* Hash function that's compatible with guc_name_compare
*/
--
2.43.0
v7-0006-Add-bytewise-interface.patchtext/x-patch; charset=US-ASCII; name=v7-0006-Add-bytewise-interface.patchDownload
From b413ee15d17f933039ad4eff1bd4aedea0f37d20 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sat, 9 Dec 2023 16:32:05 +0700
Subject: [PATCH v7 06/13] Add bytewise interface
This is useful for hashing values with unknown length,
like NUL-terminated strings. It should be faster than calling
strlen() first and passing the length, which most hash
functions require.
Note: This method can't give the same answer as
regular fasthash, so it will need to be evaluated. It's possible
we need to mix in the length at the finalization step (at which
time can know the length), in order to safeguard against
collisions.
---
src/include/common/hashfn_unstable.h | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index 7ed1e5335a..a798c42ba7 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -49,6 +49,7 @@ typedef struct fasthash_state
{
uint64 accum;
#define FH_SIZEOF_ACCUM sizeof(uint64)
+ int8 accum_len;
uint64 hash;
} fasthash_state;
@@ -69,6 +70,7 @@ fasthash_combine(fasthash_state* hs)
/* reset hash state for next input */
hs->accum = 0;
+ hs->accum_len = 0;
}
static inline void
@@ -82,6 +84,18 @@ fasthash_init(fasthash_state *hs, int len, uint64 seed)
hs->hash = seed ^ (len * UINT64CONST(0x880355f21e6d1965));
}
+static inline void
+fasthash_accum_byte(fasthash_state *hs, const unsigned char ch)
+{
+ hs->accum <<= BITS_PER_BYTE;
+ hs->accum |= ch;
+ hs->accum_len++;
+
+ // wip: is there a better way to get sizeof struct member?
+ if (hs->accum_len == sizeof(((fasthash_state *) 0)->accum))
+ fasthash_combine(hs);
+}
+
static inline void
fasthash_accum(fasthash_state *hs, const unsigned char *k, int len)
{
@@ -117,6 +131,11 @@ fasthash_accum(fasthash_state *hs, const unsigned char *k, int len)
static inline uint64
fasthash_final64(fasthash_state *hs)
{
+ // check for remaining bytes to combine into hash
+ // should only be used by the bytewise interface
+ if (hs->accum_len > 0)
+ fasthash_combine(hs);
+
return fasthash_mix(hs->hash);
}
--
2.43.0
v7-0005-Demonstrate-fasthash32-with-pgstat_hash_hash_key.patchtext/x-patch; charset=US-ASCII; name=v7-0005-Demonstrate-fasthash32-with-pgstat_hash_hash_key.patchDownload
From 60dee25ccb0c904904bb057aa7c7adc8998a9cf0 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sat, 9 Dec 2023 16:24:56 +0700
Subject: [PATCH v7 05/13] Demonstrate fasthash32 with pgstat_hash_hash_key
Currently this calls the 32-bit Murmur finalizer on the
three elements, joined with hash_combine().
This is simpler, has better tested behavior, and probably shaves
a few cycles and some binary space.
Note: We may not need the full 32-bit finalizer reducing step.
It would be slightly cheaper to just use fasthash64 and
then take the lower 32 bits.
---
src/include/utils/pgstat_internal.h | 9 ++-------
1 file changed, 2 insertions(+), 7 deletions(-)
diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h
index 60fbf9394b..df310efee1 100644
--- a/src/include/utils/pgstat_internal.h
+++ b/src/include/utils/pgstat_internal.h
@@ -14,7 +14,7 @@
#define PGSTAT_INTERNAL_H
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "lib/dshash.h"
#include "lib/ilist.h"
#include "pgstat.h"
@@ -777,15 +777,10 @@ static inline uint32
pgstat_hash_hash_key(const void *d, size_t size, void *arg)
{
const PgStat_HashKey *key = (PgStat_HashKey *) d;
- uint32 hash;
Assert(size == sizeof(PgStat_HashKey) && arg == NULL);
- hash = murmurhash32(key->kind);
- hash = hash_combine(hash, murmurhash32(key->dboid));
- hash = hash_combine(hash, murmurhash32(key->objoid));
-
- return hash;
+ return fasthash32((const unsigned char *) key, size, 0);
}
/*
--
2.43.0
v7-0008-Casefold-lazily-in-guc_name_compare.patchtext/x-patch; charset=US-ASCII; name=v7-0008-Casefold-lazily-in-guc_name_compare.patchDownload
From 9be7bb1212e390b356e3310c3e29e6eda7f56e59 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sat, 9 Dec 2023 17:30:39 +0700
Subject: [PATCH v7 08/13] Casefold lazily in guc_name_compare
We can assume that almost all GUC names are lower case,
so compare each byte with strict equality first. If that
fails retry the comparison with down-casing.
TODO: This concept only been tested inlined into simplehash,
so need to see if this makes any difference on its own.
---
src/backend/utils/misc/guc.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 053be81d14..1484e11a42 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1305,12 +1305,16 @@ guc_name_compare(const char *namea, const char *nameb)
char cha = *namea++;
char chb = *nameb++;
- if (cha >= 'A' && cha <= 'Z')
- cha += 'a' - 'A';
- if (chb >= 'A' && chb <= 'Z')
- chb += 'a' - 'A';
if (cha != chb)
- return cha - chb;
+ {
+ /* Casefold lazily since we expect lower case */
+ if (cha >= 'A' && cha <= 'Z')
+ cha += 'a' - 'A';
+ if (chb >= 'A' && chb <= 'Z')
+ chb += 'a' - 'A';
+ if (cha != chb)
+ return cha - chb;
+ }
}
if (*namea)
return 1; /* a is longer */
--
2.43.0
v7-0007-Use-bytewise-fasthash-in-guc_name_hash.patchtext/x-patch; charset=US-ASCII; name=v7-0007-Use-bytewise-fasthash-in-guc_name_hash.patchDownload
From d3a9fdf6cbc1702e40952136b84be08c0089b69d Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sat, 9 Dec 2023 17:19:10 +0700
Subject: [PATCH v7 07/13] Use bytewise fasthash in guc_name_hash
The previous hash function did not have a final bit mixing
step, so only depended on a few characters of the input.
The intermediate mixing steps could result in collisions
even with a finalizer. Also, it's not necessary to branch
depending on the case of the input -- we can borrow a trick
from the keyword hashes and unconditionally bitwise-OR with
0x20. That will do the correct case folding for letters
while leaving most other characters legal in GUC names
unchanged.
---
src/backend/utils/misc/guc.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index e76c083003..053be81d14 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -33,6 +33,7 @@
#include "catalog/objectaccess.h"
#include "catalog/pg_authid.h"
#include "catalog/pg_parameter_acl.h"
+#include "common/hashfn_unstable.h"
#include "guc_internal.h"
#include "libpq/pqformat.h"
#include "parser/scansup.h"
@@ -1324,22 +1325,21 @@ guc_name_compare(const char *namea, const char *nameb)
static uint32
guc_name_hash(const void *key, Size keysize)
{
- uint32 result = 0;
const char *name = *(const char *const *) key;
+ fasthash_state hs;
+
+ fasthash_init(&hs, 0, 0);
while (*name)
{
char ch = *name++;
- /* Case-fold in the same way as guc_name_compare */
- if (ch >= 'A' && ch <= 'Z')
- ch += 'a' - 'A';
+ /* quick and dirty casefolding suitable for hashing */
+ ch |= 0x20;
- /* Merge into hash ... not very bright, but it needn't be */
- result = pg_rotate_left32(result, 5);
- result ^= (uint32) ch;
+ fasthash_accum_byte(&hs, (unsigned char) ch);
}
- return result;
+ return fasthash_final32(&hs);
}
/*
--
2.43.0
v7-0004-Assert-that-the-incremental-fasthash-variants-giv.patchtext/x-patch; charset=US-ASCII; name=v7-0004-Assert-that-the-incremental-fasthash-variants-giv.patchDownload
From d5dfa560eac6ff66b580580383fbcb636b2e15bc Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sat, 9 Dec 2023 16:22:35 +0700
Subject: [PATCH v7 04/13] Assert that the incremental fasthash variants give
the same answer as the original
XXX: Remove this and also the *_orig functions before commit
---
src/common/hashfn.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/src/common/hashfn.c b/src/common/hashfn.c
index 2490607eea..37c8d307c7 100644
--- a/src/common/hashfn.c
+++ b/src/common/hashfn.c
@@ -26,6 +26,7 @@
#include "common/hashfn.h"
#include "port/pg_bitutils.h"
+#include "common/hashfn_unstable.h"
/*
* This hash function was written by Bob Jenkins
@@ -150,6 +151,9 @@ hash_bytes(const unsigned char *k, int keylen)
c,
len;
+ // XXX not for commit
+ Assert(fasthash64_orig((void *) k, keylen, 0) == fasthash64(k, keylen, 0));
+
/* Set up the internal state */
len = keylen;
a = b = c = 0x9e3779b9 + len + 3923095;
--
2.43.0
v7-0003-Add-UINT64CONST-not-sure-when-we-actually-need-th.patchtext/x-patch; charset=US-ASCII; name=v7-0003-Add-UINT64CONST-not-sure-when-we-actually-need-th.patchDownload
From 7416a25418fbc576fc549b9a31cc44d219501c92 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sat, 9 Dec 2023 16:14:04 +0700
Subject: [PATCH v7 03/13] Add UINT64CONST (not sure when we actually need
that)
fasthash*_orig left alone for now.
---
src/include/common/hashfn_unstable.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index fbae7a5522..7ed1e5335a 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -56,7 +56,7 @@ static inline uint64
fasthash_mix(uint64 h)
{
h ^= h >> 23;
- h *= 0x2127599bf4325c37ULL;
+ h *= UINT64CONST(0x2127599bf4325c37);
h ^= h >> 47;
return h;
}
@@ -65,7 +65,7 @@ static inline void
fasthash_combine(fasthash_state* hs)
{
hs->hash ^= fasthash_mix(hs->accum);
- hs->hash *= 0x880355f21e6d1965ULL;
+ hs->hash *= UINT64CONST(0x880355f21e6d1965);
/* reset hash state for next input */
hs->accum = 0;
@@ -79,7 +79,7 @@ fasthash_init(fasthash_state *hs, int len, uint64 seed)
// since we don't know the length for a nul-terminated string
// handle some other way -- maybe we can accum the length in
// the state and fold it in during the finalizer (cf. xxHash3)
- hs->hash = seed ^ (len * 0x880355f21e6d1965ULL);
+ hs->hash = seed ^ (len * UINT64CONST(0x880355f21e6d1965));
}
static inline void
--
2.43.0
v7-0002-Rewrite-fasthash-functions-using-a-homegrown-incr.patchtext/x-patch; charset=US-ASCII; name=v7-0002-Rewrite-fasthash-functions-using-a-homegrown-incr.patchDownload
From 14af760608974246c9c4985e1ec333d2ac4b8820 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sun, 10 Dec 2023 12:11:37 +0700
Subject: [PATCH v7 02/13] Rewrite fasthash functions using a homegrown
incremental interface
The incremental interface will be useful for cases we don't know
the length up front, such as NUL-terminated strings. First, we
need to validate that this interface can give the same answer
as the original functions when we do know the length. A future
commit will add a temporary assert for testing in CI.
---
src/include/common/hashfn_unstable.h | 161 +++++++++++++++++++++++++--
1 file changed, 153 insertions(+), 8 deletions(-)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index a5bf965fa2..fbae7a5522 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -1,3 +1,25 @@
+/*
+Building blocks for creating fast inlineable hash functions. The
+unstable designation is in contrast to hashfn.h, which cannot break
+compatibility because hashes can be writen to disk and so must have
+the same hashes between versions.
+
+ *
+ * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group
+ *
+ * src/include/common/hashfn_unstable.c
+ */
+
+#ifndef HASHFN_UNSTABLE_H
+#define HASHFN_UNSTABLE_H
+
+/*
+ * fasthash is a modification of code taken from
+ * https://code.google.com/archive/p/fast-hash/source/default/source
+ * under the terms of the MIT licencse. The original copyright
+ * notice follows:
+ */
+
/* The MIT License
Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
@@ -23,16 +45,130 @@
SOFTWARE.
*/
-#include "fasthash.h"
+typedef struct fasthash_state
+{
+ uint64 accum;
+#define FH_SIZEOF_ACCUM sizeof(uint64)
+ uint64 hash;
+} fasthash_state;
+
+static inline uint64
+fasthash_mix(uint64 h)
+{
+ h ^= h >> 23;
+ h *= 0x2127599bf4325c37ULL;
+ h ^= h >> 47;
+ return h;
+}
+
+static inline void
+fasthash_combine(fasthash_state* hs)
+{
+ hs->hash ^= fasthash_mix(hs->accum);
+ hs->hash *= 0x880355f21e6d1965ULL;
+
+ /* reset hash state for next input */
+ hs->accum = 0;
+}
+
+static inline void
+fasthash_init(fasthash_state *hs, int len, uint64 seed)
+{
+ memset(hs, 0, sizeof(fasthash_state));
+
+ // since we don't know the length for a nul-terminated string
+ // handle some other way -- maybe we can accum the length in
+ // the state and fold it in during the finalizer (cf. xxHash3)
+ hs->hash = seed ^ (len * 0x880355f21e6d1965ULL);
+}
+
+static inline void
+fasthash_accum(fasthash_state *hs, const unsigned char *k, int len)
+{
+ Assert(hs->accum == 0);
+ Assert(len <= FH_SIZEOF_ACCUM);
+
+ switch (len)
+ {
+ case 8: memcpy(&hs->accum, k, 8);
+ break;
+ case 7: hs->accum |= (uint64) k[6] << 48;
+ /* FALLTHROUGH */
+ case 6: hs->accum |= (uint64) k[5] << 40;
+ /* FALLTHROUGH */
+ case 5: hs->accum |= (uint64) k[4] << 32;
+ /* FALLTHROUGH */
+ case 4: hs->accum |= (uint64) k[3] << 24;
+ /* FALLTHROUGH */
+ case 3: hs->accum |= (uint64) k[2] << 16;
+ /* FALLTHROUGH */
+ case 2: hs->accum |= (uint64) k[1] << 8;
+ /* FALLTHROUGH */
+ case 1: hs->accum |= (uint64) k[0];
+ break;
+ case 0:
+ return;
+ }
+
+ fasthash_combine(hs);
+}
+
+
+static inline uint64
+fasthash_final64(fasthash_state *hs)
+{
+ return fasthash_mix(hs->hash);
+}
+
+static inline uint32
+fasthash_final32(fasthash_state *hs)
+{
+ // the following trick converts the 64-bit hashcode to Fermat
+ // residue, which shall retain information from both the higher
+ // and lower parts of hashcode.
+ uint64 h = fasthash_final64(hs);
+ return h - (h >> 32);
+}
+
+static inline uint64
+fasthash64(const unsigned char * k, int len, uint64 seed)
+{
+ fasthash_state hs;
+
+ fasthash_init(&hs, len, seed);
+
+ while (len >= FH_SIZEOF_ACCUM)
+ {
+ fasthash_accum(&hs, k, FH_SIZEOF_ACCUM);
+ k += FH_SIZEOF_ACCUM;
+ len -= FH_SIZEOF_ACCUM;
+ }
+
+ fasthash_accum(&hs, k, len);
+ return fasthash_final64(&hs);
+}
+
+static inline uint64
+fasthash32(const unsigned char * k, int len, uint64 seed)
+{
+ uint64 h = fasthash64(k, len, seed);
+ return h - (h >> 32);
+}
+
+
+// XXX NOT FOR COMMIT
// Compression function for Merkle-Damgard construction.
// This function is generated using the framework provided.
-#define mix(h) ({ \
- (h) ^= (h) >> 23; \
- (h) *= 0x2127599bf4325c37ULL; \
- (h) ^= (h) >> 47; })
+static inline uint64_t mix(uint64_t h) {
+ h ^= h >> 23;
+ h *= 0x2127599bf4325c37ULL;
+ h ^= h >> 47;
+ return h;
+}
-uint64_t fasthash64(const void *buf, size_t len, uint64_t seed)
+static inline
+uint64_t fasthash64_orig(const void *buf, size_t len, uint64_t seed)
{
const uint64_t m = 0x880355f21e6d1965ULL;
const uint64_t *pos = (const uint64_t *)buf;
@@ -52,11 +188,17 @@ uint64_t fasthash64(const void *buf, size_t len, uint64_t seed)
switch (len & 7) {
case 7: v ^= (uint64_t)pos2[6] << 48;
+ /* FALLTHROUGH */
case 6: v ^= (uint64_t)pos2[5] << 40;
+ /* FALLTHROUGH */
case 5: v ^= (uint64_t)pos2[4] << 32;
+ /* FALLTHROUGH */
case 4: v ^= (uint64_t)pos2[3] << 24;
+ /* FALLTHROUGH */
case 3: v ^= (uint64_t)pos2[2] << 16;
+ /* FALLTHROUGH */
case 2: v ^= (uint64_t)pos2[1] << 8;
+ /* FALLTHROUGH */
case 1: v ^= (uint64_t)pos2[0];
h ^= mix(v);
h *= m;
@@ -65,11 +207,14 @@ uint64_t fasthash64(const void *buf, size_t len, uint64_t seed)
return mix(h);
}
-uint32_t fasthash32(const void *buf, size_t len, uint32_t seed)
+static inline
+uint32_t fasthash32_orig(const void *buf, size_t len, uint32_t seed)
{
// the following trick converts the 64-bit hashcode to Fermat
// residue, which shall retain information from both the higher
// and lower parts of hashcode.
- uint64_t h = fasthash64(buf, len, seed);
+ uint64_t h = fasthash64_orig(buf, len, seed);
return h - (h >> 32);
}
+
+#endif /* HASHFN_UNSTABLE_H */
--
2.43.0
v7-0001-Vendor-fasthash.patchtext/x-patch; charset=US-ASCII; name=v7-0001-Vendor-fasthash.patchDownload
From b5d621087bc96c48fc6bb9f7d86fc000e689059a Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Mon, 27 Nov 2023 17:03:38 +0700
Subject: [PATCH v7 01/13] Vendor fasthash
MIT licensed
---
src/include/common/hashfn_unstable.h | 75 ++++++++++++++++++++++++++++
1 file changed, 75 insertions(+)
create mode 100644 src/include/common/hashfn_unstable.h
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
new file mode 100644
index 0000000000..a5bf965fa2
--- /dev/null
+++ b/src/include/common/hashfn_unstable.h
@@ -0,0 +1,75 @@
+/* The MIT License
+
+ Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
+
+ Permission is hereby granted, free of charge, to any person
+ obtaining a copy of this software and associated documentation
+ files (the "Software"), to deal in the Software without
+ restriction, including without limitation the rights to use, copy,
+ modify, merge, publish, distribute, sublicense, and/or sell copies
+ of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+*/
+
+#include "fasthash.h"
+
+// Compression function for Merkle-Damgard construction.
+// This function is generated using the framework provided.
+#define mix(h) ({ \
+ (h) ^= (h) >> 23; \
+ (h) *= 0x2127599bf4325c37ULL; \
+ (h) ^= (h) >> 47; })
+
+uint64_t fasthash64(const void *buf, size_t len, uint64_t seed)
+{
+ const uint64_t m = 0x880355f21e6d1965ULL;
+ const uint64_t *pos = (const uint64_t *)buf;
+ const uint64_t *end = pos + (len / 8);
+ const unsigned char *pos2;
+ uint64_t h = seed ^ (len * m);
+ uint64_t v;
+
+ while (pos != end) {
+ v = *pos++;
+ h ^= mix(v);
+ h *= m;
+ }
+
+ pos2 = (const unsigned char*)pos;
+ v = 0;
+
+ switch (len & 7) {
+ case 7: v ^= (uint64_t)pos2[6] << 48;
+ case 6: v ^= (uint64_t)pos2[5] << 40;
+ case 5: v ^= (uint64_t)pos2[4] << 32;
+ case 4: v ^= (uint64_t)pos2[3] << 24;
+ case 3: v ^= (uint64_t)pos2[2] << 16;
+ case 2: v ^= (uint64_t)pos2[1] << 8;
+ case 1: v ^= (uint64_t)pos2[0];
+ h ^= mix(v);
+ h *= m;
+ }
+
+ return mix(h);
+}
+
+uint32_t fasthash32(const void *buf, size_t len, uint32_t seed)
+{
+ // the following trick converts the 64-bit hashcode to Fermat
+ // residue, which shall retain information from both the higher
+ // and lower parts of hashcode.
+ uint64_t h = fasthash64(buf, len, seed);
+ return h - (h >> 32);
+}
--
2.43.0
I wrote:
On Sun, Dec 10, 2023 at 2:18 AM Jeff Davis <pgsql@j-davis.com> wrote:
On Sat, 2023-12-09 at 18:52 +0700, John Naylor wrote:
I tested using the new hash function APIs for my search path cache,
and
there's a significant speedup for cases not benefiting from
a86c61c9ee.
It's enough that we almost don't need a86c61c9ee. So a definite +1
to
the new APIs.Interesting, thanks for testing! SearchPathCache is a better starting
point than dynahash for removing strlen calls anyway -- it's more
localized, uses simplehash, and we can test it with at-hand tests.
Since I had to fix a misalignment in the original to keep ubsan from
crashing CI anyway (v8-0005), I thought I'd take the experiment with
search path cache and put the temporary validation of the hash
function output in there (v8-0004). I had to finagle a bit to get the
bytewise interface to give the same answer as the original, but that's
okay: The bytewise interface is intended for when we don't know the
length up front (and therefore the internal seed can't be tweaked with
the length), but it's nice to make sure nothing's broken.
There is also a chunkwise version for search path cache. That might be
a little faster. Perf testing can be done as is, because I put the
validation in assert builds only.
I've left out the GUC stuff for now, just want to get CI green again.
Attachments:
v8-0004-Assert-that-the-fasthash-variants-give-or-can-giv.patchtext/x-patch; charset=US-ASCII; name=v8-0004-Assert-that-the-fasthash-variants-give-or-can-giv.patchDownload
From 90b5aa53284604b7a592f49a7e8424051e681da0 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sun, 10 Dec 2023 15:14:24 +0700
Subject: [PATCH v8 4/5] Assert that the fasthash variants give (or can give)
the same answer as the original
Test that incremental hashing gives the right answer for strings
Use the initial length only for the init step. Test that
we can ignore the length afterwards, and only use the
presence of the NUL terminator to stop iterating. Assert
that this results in the same hash.
Based on "Use new hash APIs for search path cache" by Jeff Davis,
rebased over v7.
---
src/backend/catalog/namespace.c | 56 ++++++++++++++++++++++++++--
src/include/common/hashfn_unstable.h | 18 ++++++++-
2 files changed, 69 insertions(+), 5 deletions(-)
diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index 5027efc91d..afbfaf5dd4 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -42,6 +42,7 @@
#include "catalog/pg_type.h"
#include "commands/dbcommands.h"
#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "funcapi.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
@@ -247,11 +248,60 @@ static bool MatchNamedCall(HeapTuple proctup, int nargs, List *argnames,
static inline uint32
spcachekey_hash(SearchPathCacheKey key)
{
- const unsigned char *bytes = (const unsigned char *) key.searchPath;
+ const char *buf = key.searchPath;
+ fasthash_state hs;
+
+ // XXX not for commit
+#ifdef USE_ASSERT_CHECKING
int blen = strlen(key.searchPath);
- return hash_combine(hash_bytes(bytes, blen),
- hash_uint32(key.roleid));
+ uint64 h_orig = fasthash64_orig(buf, blen, key.roleid);
+
+ // check full function that calls incremental interface
+ Assert(fasthash64((const unsigned char *) buf, blen, key.roleid) == h_orig);
+
+ // Test that bytewise interface can give the same answer,
+ // if we have length up front. We would typically use it
+ // for cases where we don't know, but let's try to make
+ // it as similar as conveniently possible.
+ fasthash_init(&hs, blen, key.roleid);
+ while (*buf)
+ {
+ fasthash_accum_byte(&hs, *buf);
+ buf++;
+ }
+ Assert(fasthash_final64(&hs) == h_orig);
+ buf = key.searchPath; /* reset */
+
+ // Now compare chunked incremental interface
+ fasthash_init(&hs, blen, key.roleid);
+ while (*buf)
+ {
+ int chunk_len = 0;
+
+ while(chunk_len < FH_SIZEOF_ACCUM && buf[chunk_len] != '\0')
+ chunk_len++;
+
+ fasthash_accum(&hs, (const unsigned char *) buf, chunk_len);
+ buf += chunk_len;
+ }
+ Assert(fasthash_final64(&hs) == h_orig);
+ buf = key.searchPath; /* reset */
+#endif
+
+ // TODO: Test if chunked accum has better performance
+
+ // WIP: maybe roleid should be mixed in normally
+ // WIP: For now fake the length to preserve the internal seed
+ fasthash_init(&hs, 1, key.roleid);
+ while (*buf)
+ {
+ fasthash_accum_byte(&hs, *buf);
+ buf++;
+ }
+
+ // WIP: consider returning lower 32 bits, rather than mixing the high bits with the lower
+ return fasthash_final32(&hs);
}
static inline bool
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index 80aec98dc9..13d6d70910 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -13,6 +13,8 @@ the same hashes between versions.
#ifndef HASHFN_UNSTABLE_H
#define HASHFN_UNSTABLE_H
+#include "port/pg_bswap.h"
+
/*
* fasthash is a modification of code taken from
* https://code.google.com/archive/p/fast-hash/source/default/source
@@ -91,9 +93,14 @@ fasthash_accum_byte(fasthash_state *hs, const unsigned char ch)
hs->accum |= ch;
hs->accum_len++;
- // wip: is there a better way to get sizeof struct member?
- if (hs->accum_len == sizeof(((fasthash_state *) 0)->accum))
+ if (hs->accum_len == FH_SIZEOF_ACCUM)
+ {
+#ifdef USE_ASSERT_CHECKING
+ // XXX not for commit
+ hs->accum = pg_bswap64(hs->accum);
+#endif
fasthash_combine(hs);
+ }
}
static inline void
@@ -134,7 +141,14 @@ fasthash_final64(fasthash_state *hs)
// check for remaining bytes to combine into hash
// should only be used by the bytewise interface
if (hs->accum_len > 0)
+ {
+#ifdef USE_ASSERT_CHECKING
+ // XXX not for commit
+ hs->accum <<= ((FH_SIZEOF_ACCUM - hs->accum_len) * BITS_PER_BYTE);
+ hs->accum = pg_bswap64(hs->accum);
+#endif
fasthash_combine(hs);
+ }
return fasthash_mix(hs->hash);
}
--
2.43.0
v8-0005-Fix-alignment-issue-in-the-original-fastash.patchtext/x-patch; charset=US-ASCII; name=v8-0005-Fix-alignment-issue-in-the-original-fastash.patchDownload
From 2f452e621a07433a4f539614d7b7c09eef29b1e8 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sun, 10 Dec 2023 21:27:13 +0700
Subject: [PATCH v8 5/5] Fix alignment issue in the original fastash
Found by UBSan in CI
---
src/include/common/hashfn_unstable.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index 13d6d70910..a13d577965 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -211,9 +211,10 @@ uint64_t fasthash64_orig(const void *buf, size_t len, uint64_t seed)
uint64_t v;
while (pos != end) {
- v = *pos++;
+ memcpy(&v, pos, 8);
h ^= mix(v);
h *= m;
+ pos++;
}
pos2 = (const unsigned char*)pos;
--
2.43.0
v8-0001-Vendor-fasthash.patchtext/x-patch; charset=US-ASCII; name=v8-0001-Vendor-fasthash.patchDownload
From 4258073d786d338da788a9290ddc29aefd1aad78 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Mon, 27 Nov 2023 17:03:38 +0700
Subject: [PATCH v8 1/5] Vendor fasthash
MIT licensed
---
src/include/common/hashfn_unstable.h | 75 ++++++++++++++++++++++++++++
1 file changed, 75 insertions(+)
create mode 100644 src/include/common/hashfn_unstable.h
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
new file mode 100644
index 0000000000..a5bf965fa2
--- /dev/null
+++ b/src/include/common/hashfn_unstable.h
@@ -0,0 +1,75 @@
+/* The MIT License
+
+ Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
+
+ Permission is hereby granted, free of charge, to any person
+ obtaining a copy of this software and associated documentation
+ files (the "Software"), to deal in the Software without
+ restriction, including without limitation the rights to use, copy,
+ modify, merge, publish, distribute, sublicense, and/or sell copies
+ of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+*/
+
+#include "fasthash.h"
+
+// Compression function for Merkle-Damgard construction.
+// This function is generated using the framework provided.
+#define mix(h) ({ \
+ (h) ^= (h) >> 23; \
+ (h) *= 0x2127599bf4325c37ULL; \
+ (h) ^= (h) >> 47; })
+
+uint64_t fasthash64(const void *buf, size_t len, uint64_t seed)
+{
+ const uint64_t m = 0x880355f21e6d1965ULL;
+ const uint64_t *pos = (const uint64_t *)buf;
+ const uint64_t *end = pos + (len / 8);
+ const unsigned char *pos2;
+ uint64_t h = seed ^ (len * m);
+ uint64_t v;
+
+ while (pos != end) {
+ v = *pos++;
+ h ^= mix(v);
+ h *= m;
+ }
+
+ pos2 = (const unsigned char*)pos;
+ v = 0;
+
+ switch (len & 7) {
+ case 7: v ^= (uint64_t)pos2[6] << 48;
+ case 6: v ^= (uint64_t)pos2[5] << 40;
+ case 5: v ^= (uint64_t)pos2[4] << 32;
+ case 4: v ^= (uint64_t)pos2[3] << 24;
+ case 3: v ^= (uint64_t)pos2[2] << 16;
+ case 2: v ^= (uint64_t)pos2[1] << 8;
+ case 1: v ^= (uint64_t)pos2[0];
+ h ^= mix(v);
+ h *= m;
+ }
+
+ return mix(h);
+}
+
+uint32_t fasthash32(const void *buf, size_t len, uint32_t seed)
+{
+ // the following trick converts the 64-bit hashcode to Fermat
+ // residue, which shall retain information from both the higher
+ // and lower parts of hashcode.
+ uint64_t h = fasthash64(buf, len, seed);
+ return h - (h >> 32);
+}
--
2.43.0
v8-0003-Add-bytewise-interface.patchtext/x-patch; charset=US-ASCII; name=v8-0003-Add-bytewise-interface.patchDownload
From 54e6419a632b04d97cad847603035050ab48c84f Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sat, 9 Dec 2023 16:32:05 +0700
Subject: [PATCH v8 3/5] Add bytewise interface
This is useful for hashing values with unknown length,
like NUL-terminated strings. It should be faster than calling
strlen() first and passing the length, which most hash
functions require.
Note: This method can't give the same answer as
regular fasthash, so it will need to be evaluated. It's possible
we need to mix in the length at the finalization step (at which
time can know the length), in order to safeguard against
collisions.
---
src/include/common/hashfn_unstable.h | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index fbae7a5522..80aec98dc9 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -49,6 +49,7 @@ typedef struct fasthash_state
{
uint64 accum;
#define FH_SIZEOF_ACCUM sizeof(uint64)
+ int8 accum_len;
uint64 hash;
} fasthash_state;
@@ -69,6 +70,7 @@ fasthash_combine(fasthash_state* hs)
/* reset hash state for next input */
hs->accum = 0;
+ hs->accum_len = 0;
}
static inline void
@@ -82,6 +84,18 @@ fasthash_init(fasthash_state *hs, int len, uint64 seed)
hs->hash = seed ^ (len * 0x880355f21e6d1965ULL);
}
+static inline void
+fasthash_accum_byte(fasthash_state *hs, const unsigned char ch)
+{
+ hs->accum <<= BITS_PER_BYTE;
+ hs->accum |= ch;
+ hs->accum_len++;
+
+ // wip: is there a better way to get sizeof struct member?
+ if (hs->accum_len == sizeof(((fasthash_state *) 0)->accum))
+ fasthash_combine(hs);
+}
+
static inline void
fasthash_accum(fasthash_state *hs, const unsigned char *k, int len)
{
@@ -117,6 +131,11 @@ fasthash_accum(fasthash_state *hs, const unsigned char *k, int len)
static inline uint64
fasthash_final64(fasthash_state *hs)
{
+ // check for remaining bytes to combine into hash
+ // should only be used by the bytewise interface
+ if (hs->accum_len > 0)
+ fasthash_combine(hs);
+
return fasthash_mix(hs->hash);
}
--
2.43.0
v8-0002-Rewrite-fasthash-functions-using-a-homegrown-incr.patchtext/x-patch; charset=US-ASCII; name=v8-0002-Rewrite-fasthash-functions-using-a-homegrown-incr.patchDownload
From f5ab683d61724e9766d43e58c6f3177a30f708d0 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sun, 10 Dec 2023 12:11:37 +0700
Subject: [PATCH v8 2/5] Rewrite fasthash functions using a homegrown
incremental interface
The incremental interface will be useful for cases we don't know
the length up front, such as NUL-terminated strings. First, we
need to validate that this interface can give the same answer
as the original functions when we do know the length. A future
commit will add a temporary assert for testing in CI.
---
src/include/common/hashfn_unstable.h | 161 +++++++++++++++++++++++++--
1 file changed, 153 insertions(+), 8 deletions(-)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index a5bf965fa2..fbae7a5522 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -1,3 +1,25 @@
+/*
+Building blocks for creating fast inlineable hash functions. The
+unstable designation is in contrast to hashfn.h, which cannot break
+compatibility because hashes can be writen to disk and so must have
+the same hashes between versions.
+
+ *
+ * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group
+ *
+ * src/include/common/hashfn_unstable.c
+ */
+
+#ifndef HASHFN_UNSTABLE_H
+#define HASHFN_UNSTABLE_H
+
+/*
+ * fasthash is a modification of code taken from
+ * https://code.google.com/archive/p/fast-hash/source/default/source
+ * under the terms of the MIT licencse. The original copyright
+ * notice follows:
+ */
+
/* The MIT License
Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
@@ -23,16 +45,130 @@
SOFTWARE.
*/
-#include "fasthash.h"
+typedef struct fasthash_state
+{
+ uint64 accum;
+#define FH_SIZEOF_ACCUM sizeof(uint64)
+ uint64 hash;
+} fasthash_state;
+
+static inline uint64
+fasthash_mix(uint64 h)
+{
+ h ^= h >> 23;
+ h *= 0x2127599bf4325c37ULL;
+ h ^= h >> 47;
+ return h;
+}
+
+static inline void
+fasthash_combine(fasthash_state* hs)
+{
+ hs->hash ^= fasthash_mix(hs->accum);
+ hs->hash *= 0x880355f21e6d1965ULL;
+
+ /* reset hash state for next input */
+ hs->accum = 0;
+}
+
+static inline void
+fasthash_init(fasthash_state *hs, int len, uint64 seed)
+{
+ memset(hs, 0, sizeof(fasthash_state));
+
+ // since we don't know the length for a nul-terminated string
+ // handle some other way -- maybe we can accum the length in
+ // the state and fold it in during the finalizer (cf. xxHash3)
+ hs->hash = seed ^ (len * 0x880355f21e6d1965ULL);
+}
+
+static inline void
+fasthash_accum(fasthash_state *hs, const unsigned char *k, int len)
+{
+ Assert(hs->accum == 0);
+ Assert(len <= FH_SIZEOF_ACCUM);
+
+ switch (len)
+ {
+ case 8: memcpy(&hs->accum, k, 8);
+ break;
+ case 7: hs->accum |= (uint64) k[6] << 48;
+ /* FALLTHROUGH */
+ case 6: hs->accum |= (uint64) k[5] << 40;
+ /* FALLTHROUGH */
+ case 5: hs->accum |= (uint64) k[4] << 32;
+ /* FALLTHROUGH */
+ case 4: hs->accum |= (uint64) k[3] << 24;
+ /* FALLTHROUGH */
+ case 3: hs->accum |= (uint64) k[2] << 16;
+ /* FALLTHROUGH */
+ case 2: hs->accum |= (uint64) k[1] << 8;
+ /* FALLTHROUGH */
+ case 1: hs->accum |= (uint64) k[0];
+ break;
+ case 0:
+ return;
+ }
+
+ fasthash_combine(hs);
+}
+
+
+static inline uint64
+fasthash_final64(fasthash_state *hs)
+{
+ return fasthash_mix(hs->hash);
+}
+
+static inline uint32
+fasthash_final32(fasthash_state *hs)
+{
+ // the following trick converts the 64-bit hashcode to Fermat
+ // residue, which shall retain information from both the higher
+ // and lower parts of hashcode.
+ uint64 h = fasthash_final64(hs);
+ return h - (h >> 32);
+}
+
+static inline uint64
+fasthash64(const unsigned char * k, int len, uint64 seed)
+{
+ fasthash_state hs;
+
+ fasthash_init(&hs, len, seed);
+
+ while (len >= FH_SIZEOF_ACCUM)
+ {
+ fasthash_accum(&hs, k, FH_SIZEOF_ACCUM);
+ k += FH_SIZEOF_ACCUM;
+ len -= FH_SIZEOF_ACCUM;
+ }
+
+ fasthash_accum(&hs, k, len);
+ return fasthash_final64(&hs);
+}
+
+static inline uint64
+fasthash32(const unsigned char * k, int len, uint64 seed)
+{
+ uint64 h = fasthash64(k, len, seed);
+ return h - (h >> 32);
+}
+
+
+// XXX NOT FOR COMMIT
// Compression function for Merkle-Damgard construction.
// This function is generated using the framework provided.
-#define mix(h) ({ \
- (h) ^= (h) >> 23; \
- (h) *= 0x2127599bf4325c37ULL; \
- (h) ^= (h) >> 47; })
+static inline uint64_t mix(uint64_t h) {
+ h ^= h >> 23;
+ h *= 0x2127599bf4325c37ULL;
+ h ^= h >> 47;
+ return h;
+}
-uint64_t fasthash64(const void *buf, size_t len, uint64_t seed)
+static inline
+uint64_t fasthash64_orig(const void *buf, size_t len, uint64_t seed)
{
const uint64_t m = 0x880355f21e6d1965ULL;
const uint64_t *pos = (const uint64_t *)buf;
@@ -52,11 +188,17 @@ uint64_t fasthash64(const void *buf, size_t len, uint64_t seed)
switch (len & 7) {
case 7: v ^= (uint64_t)pos2[6] << 48;
+ /* FALLTHROUGH */
case 6: v ^= (uint64_t)pos2[5] << 40;
+ /* FALLTHROUGH */
case 5: v ^= (uint64_t)pos2[4] << 32;
+ /* FALLTHROUGH */
case 4: v ^= (uint64_t)pos2[3] << 24;
+ /* FALLTHROUGH */
case 3: v ^= (uint64_t)pos2[2] << 16;
+ /* FALLTHROUGH */
case 2: v ^= (uint64_t)pos2[1] << 8;
+ /* FALLTHROUGH */
case 1: v ^= (uint64_t)pos2[0];
h ^= mix(v);
h *= m;
@@ -65,11 +207,14 @@ uint64_t fasthash64(const void *buf, size_t len, uint64_t seed)
return mix(h);
}
-uint32_t fasthash32(const void *buf, size_t len, uint32_t seed)
+static inline
+uint32_t fasthash32_orig(const void *buf, size_t len, uint32_t seed)
{
// the following trick converts the 64-bit hashcode to Fermat
// residue, which shall retain information from both the higher
// and lower parts of hashcode.
- uint64_t h = fasthash64(buf, len, seed);
+ uint64_t h = fasthash64_orig(buf, len, seed);
return h - (h >> 32);
}
+
+#endif /* HASHFN_UNSTABLE_H */
--
2.43.0
On Sun, Dec 10, 2023 at 2:18 AM Jeff Davis <pgsql@j-davis.com> wrote:
On Sat, 2023-12-09 at 18:52 +0700, John Naylor wrote:
I tested using the new hash function APIs for my search path cache,
and
there's a significant speedup for cases not benefiting from
a86c61c9ee.
It's enough that we almost don't need a86c61c9ee. So a definite +1
to
the new APIs.Do you have a new test?
Still using the same basic test here:
/messages/by-id/04c8592dbd694e4114a3ed87139a7a04e4363030.camel@j-davis.com
What I did is:
a. add your v5 patches
b. disable optimization in a86c61c9ee
c. add attached patch to use new hash APIsI got a slowdown between (a) and (b), and then (c) closed the gap about
halfway. It started to get close to test noise at that point -- I could
get some better numbers out of it if it's helpful.
I tried my variant of the same test [1]/messages/by-id/CANWCAZY7Cr-GdUhrCLoR4+JGLChTb0pQxq9ZPi1KTLs+_KDFqg@mail.gmail.com (but only 20 seconds per run),
which uses pgbench to take the average of a few dozen runs, and
doesn't use table I/O (when doing that, it's best to pre-warm the
buffers to reduce noise).
pgbench -n -T 20 -f bench.sql -M prepared
(done three times and take the median, with turbo off)
* master at 457428d9e99b6b from Dec 4:
latency average = 571.413 ms
* v8 (bytewise hash):
latency average = 588.942 ms
This regression is a bit surprising, since there is no strlen call,
and it uses roleid as a seed without a round of mixing (not sure if we
should do that, but just trying to verify results).
* v8 with chunked interface:
latency average = 555.688 ms
This starts to improve things for me.
* v8 with chunked, and return lower 32 bits of full 64-bit hash:
latency average = 556.324 ms
This is within the noise level. There doesn't seem to be much downside
of using a couple cycles for fasthash's 32-bit reduction.
* revert back to master from Dec 4 and then cherry pick a86c61c9ee
(save last entry of SearchPathCache)
latency average = 545.747 ms
So chunked incremental hashing gets within ~2% of that, which is nice.
It seems we should use that when removing strlen, when convenient.
Updated next steps:
* Investigate whether/how to incorporate final length into the
calculation when we don't have the length up front.
* Add some desperately needed explanatory comments.
* Use this in some existing cases where it makes sense.
* Get back to GUC hash and dynahash.
[1]: /messages/by-id/CANWCAZY7Cr-GdUhrCLoR4+JGLChTb0pQxq9ZPi1KTLs+_KDFqg@mail.gmail.com
I wrote:
* v8 with chunked interface:
latency average = 555.688 msThis starts to improve things for me.
* v8 with chunked, and return lower 32 bits of full 64-bit hash:
latency average = 556.324 msThis is within the noise level. There doesn't seem to be much downside
of using a couple cycles for fasthash's 32-bit reduction.* revert back to master from Dec 4 and then cherry pick a86c61c9ee
(save last entry of SearchPathCache)
latency average = 545.747 msSo chunked incremental hashing gets within ~2% of that, which is nice.
It seems we should use that when removing strlen, when convenient.Updated next steps:
* Investigate whether/how to incorporate final length into the
calculation when we don't have the length up front.
* Add some desperately needed explanatory comments.
* Use this in some existing cases where it makes sense.
* Get back to GUC hash and dynahash.
For #1 here, I cloned SMHasher and was dismayed at the complete lack
of documentation, but after some poking around, found how to run the
tests, using the 32-bit hash to save time. It turns out that the input
length is important. I've attached two files of results -- "nolen"
means stop using the initial length to tweak the internal seed. As you
can see, there are 8 failures. "pluslen" means I then incorporated the
length within the finalizer. This *does* pass SMHasher, so that's
good. (of course this way can't produce the same hash as when we know
the length up front, but that's not important). The attached shows how
that would work, further whacking around and testing with Jeff's
prototype for the search path cache hash table. I'll work on code
comments and get it polished.
Attachments:
fasthash32_nolen.txttext/plain; charset=US-ASCII; name=fasthash32_nolen.txtDownload
--- Testing fasthash32 "fast-hash 32bit" with NO LENGTH
[[[ Sanity Tests ]]]
Verification value 0x6A202089 ....... FAIL! (Expected 0xe9481afc)
Running sanity check 1 .......... PASS
Running AppendedZeroesTest .......... PASS
[[[ Keyset 'Sparse' Tests ]]]
Keyset 'Sparse' - 16-bit keys with up to 9 bits set - 50643 keys
Testing collisions ( 32-bit) - Expected 0.3, actual 0 (0.00x)
Testing collisions (high 19-25 bits) - Worst is 20 bits: 1227/1203 (1.02x)
Testing collisions (low 19-25 bits) - Worst is 24 bits: 81/76 (1.06x)
Testing distribution - Worst bias is the 13-bit window at bit 31 - 0.436%
Keyset 'Sparse' - 24-bit keys with up to 8 bits set - 1271626 keys
Testing collisions ( 32-bit) - Expected 188.2, actual 182 (0.97x)
Testing distribution - Worst bias is the 17-bit window at bit 11 - 0.089%
Keyset 'Sparse' - 32-bit keys with up to 7 bits set - 4514873 keys
Testing collisions ( 32-bit) - Expected 2372.2, actual 2286 (0.96x)
Testing distribution - Worst bias is the 19-bit window at bit 23 - 0.044%
Keyset 'Sparse' - 40-bit keys with up to 6 bits set - 4598479 keys
Testing collisions ( 32-bit) - Expected 2460.8, actual 2426 (0.99x) (-34)
Testing distribution - Worst bias is the 19-bit window at bit 25 - 0.041%
Keyset 'Sparse' - 48-bit keys with up to 6 bits set - 14196869 keys
Testing collisions ( 32-bit) - Expected 23437.8, actual 23601 (1.01x) (164)
Testing distribution - Worst bias is the 20-bit window at bit 31 - 0.020%
Keyset 'Sparse' - 56-bit keys with up to 5 bits set - 4216423 keys
Testing collisions ( 32-bit) - Expected 2069.0, actual 1958 (0.95x)
Testing distribution - Worst bias is the 19-bit window at bit 2 - 0.040%
Keyset 'Sparse' - 64-bit keys with up to 5 bits set - 8303633 keys
Testing collisions ( 32-bit) - Expected 8021.7, actual 7994 (1.00x) (-27)
Testing distribution - Worst bias is the 20-bit window at bit 30 - 0.040%
Keyset 'Sparse' - 72-bit keys with up to 5 bits set - 15082603 keys
Testing collisions ( 32-bit) - Expected 26451.8, actual 26503 (1.00x) (52)
Testing distribution - Worst bias is the 20-bit window at bit 11 - 0.021%
Keyset 'Sparse' - 96-bit keys with up to 4 bits set - 3469497 keys
Testing collisions ( 32-bit) - Expected 1401.0, actual 1415 (1.01x) (15)
Testing distribution - Worst bias is the 19-bit window at bit 12 - 0.058%
Keyset 'Sparse' - 160-bit keys with up to 4 bits set - 26977161 keys
Testing collisions ( 32-bit) - Expected 84546.1, actual 84165 (1.00x) (-381)
Testing distribution - Worst bias is the 20-bit window at bit 12 - 0.012%
Keyset 'Sparse' - 256-bit keys with up to 3 bits set - 2796417 keys
Testing collisions ( 32-bit) - Expected 910.2, actual 905 (0.99x) (-5)
Testing distribution - Worst bias is the 18-bit window at bit 21 - 0.056%
Keyset 'Sparse' - 512-bit keys with up to 3 bits set - 22370049 keys
Testing collisions ( 32-bit) - Expected 58155.4, actual 57846 (0.99x) (-309)
Testing distribution - Worst bias is the 20-bit window at bit 6 - 0.013%
Keyset 'Sparse' - 1024-bit keys with up to 2 bits set - 524801 keys
Testing collisions ( 32-bit) - Expected 32.1, actual 35 (1.09x) (3)
Testing distribution - Worst bias is the 16-bit window at bit 13 - 0.118%
Keyset 'Sparse' - 2048-bit keys with up to 2 bits set - 2098177 keys
Testing collisions ( 32-bit) - Expected 512.4, actual 513 (1.00x) (1)
Testing distribution - Worst bias is the 18-bit window at bit 20 - 0.086%
[[[ Keyset 'Permutation' Tests ]]]
Combination Lowbits Tests:
Keyset 'Combination' - up to 7 blocks from a set of 8 - 2396744 keys
Testing collisions ( 32-bit) - Expected 668.6, actual 33953 (50.78x) (33285) !!!!!
*********FAIL*********
Combination Highbits Tests
Keyset 'Combination' - up to 7 blocks from a set of 8 - 2396744 keys
Testing collisions ( 32-bit) - Expected 668.6, actual 33990 (50.84x) (33322) !!!!!
*********FAIL*********
Combination Hi-Lo Tests:
Keyset 'Combination' - up to 6 blocks from a set of 15 - 12204240 keys
Testing collisions ( 32-bit) - Expected 17322.9, actual 777895 (44.91x) (760573) !!!!!
*********FAIL*********
Combination 0x8000000 Tests:
Keyset 'Combination' - up to 22 blocks from a set of 2 - 8388606 keys
Testing collisions ( 32-bit) - Expected 8186.7, actual 2799870 (342.00x) (2791684) !!!!!
*********FAIL*********
Combination 0x0000001 Tests:
Keyset 'Combination' - up to 22 blocks from a set of 2 - 8388606 keys
Testing collisions ( 32-bit) - Expected 8186.7, actual 2799806 (342.00x) (2791620) !!!!!
*********FAIL*********
Combination 0x800000000000000 Tests:
Keyset 'Combination' - up to 22 blocks from a set of 2 - 8388606 keys
Testing collisions ( 32-bit) - Expected 8186.7, actual 8241 (1.01x) (55)
Testing distribution - Worst bias is the 20-bit window at bit 23 - 0.031%
Combination 0x000000000000001 Tests:
Keyset 'Combination' - up to 22 blocks from a set of 2 - 8388606 keys
Testing collisions ( 32-bit) - Expected 8186.7, actual 8150 (1.00x) (-36)
Testing distribution - Worst bias is the 20-bit window at bit 22 - 0.056%
Combination 16-bytes [0-1] Tests:
Keyset 'Combination' - up to 22 blocks from a set of 2 - 8388606 keys
Testing collisions ( 32-bit) - Expected 8186.7, actual 8016 (0.98x)
Testing distribution - Worst bias is the 20-bit window at bit 19 - 0.033%
Combination 16-bytes [0-last] Tests:
Keyset 'Combination' - up to 22 blocks from a set of 2 - 8388606 keys
Testing collisions ( 32-bit) - Expected 8186.7, actual 8250 (1.01x) (64)
Testing distribution - Worst bias is the 20-bit window at bit 17 - 0.029%
Combination 32-bytes [0-1] Tests:
Keyset 'Combination' - up to 22 blocks from a set of 2 - 8388606 keys
Testing collisions ( 32-bit) - Expected 8186.7, actual 8066 (0.99x) (-120)
Testing distribution - Worst bias is the 19-bit window at bit 12 - 0.022%
Combination 32-bytes [0-last] Tests:
Keyset 'Combination' - up to 22 blocks from a set of 2 - 8388606 keys
Testing collisions ( 32-bit) - Expected 8186.7, actual 8265 (1.01x) (79)
Testing distribution - Worst bias is the 20-bit window at bit 10 - 0.044%
Combination 64-bytes [0-1] Tests:
Keyset 'Combination' - up to 22 blocks from a set of 2 - 8388606 keys
Testing collisions ( 32-bit) - Expected 8186.7, actual 8051 (0.98x) (-135)
Testing distribution - Worst bias is the 20-bit window at bit 30 - 0.031%
Combination 64-bytes [0-last] Tests:
Keyset 'Combination' - up to 22 blocks from a set of 2 - 8388606 keys
Testing collisions ( 32-bit) - Expected 8186.7, actual 8122 (0.99x) (-64)
Testing distribution - Worst bias is the 20-bit window at bit 4 - 0.032%
Combination 128-bytes [0-1] Tests:
Keyset 'Combination' - up to 22 blocks from a set of 2 - 8388606 keys
Testing collisions ( 32-bit) - Expected 8186.7, actual 8107 (0.99x) (-79)
Testing distribution - Worst bias is the 20-bit window at bit 9 - 0.038%
Combination 128-bytes [0-last] Tests:
Keyset 'Combination' - up to 22 blocks from a set of 2 - 8388606 keys
Testing collisions ( 32-bit) - Expected 8186.7, actual 8050 (0.98x) (-136)
Testing distribution - Worst bias is the 20-bit window at bit 11 - 0.026%
[[[ Keyset 'Cyclic' Tests ]]]
Keyset 'Cyclic' - 8 cycles of 4 bytes - 1000000 keys
Testing collisions ( 32-bit) - Expected 116.4, actual 125 (1.07x) (9)
Testing distribution - Worst bias is the 17-bit window at bit 2 - 0.141%
Keyset 'Cyclic' - 8 cycles of 5 bytes - 1000000 keys
Testing collisions ( 32-bit) - Expected 116.4, actual 108 (0.93x)
Testing distribution - Worst bias is the 17-bit window at bit 19 - 0.046%
Keyset 'Cyclic' - 8 cycles of 6 bytes - 1000000 keys
Testing collisions ( 32-bit) - Expected 116.4, actual 121 (1.04x) (5)
Testing distribution - Worst bias is the 17-bit window at bit 2 - 0.116%
Keyset 'Cyclic' - 8 cycles of 7 bytes - 1000000 keys
Testing collisions ( 32-bit) - Expected 116.4, actual 104 (0.89x)
Testing distribution - Worst bias is the 15-bit window at bit 26 - 0.068%
Keyset 'Cyclic' - 8 cycles of 8 bytes - 1000000 keys
Testing collisions ( 32-bit) - Expected 116.4, actual 118 (1.01x) (2)
Testing distribution - Worst bias is the 17-bit window at bit 9 - 0.140%
Keyset 'Cyclic' - 8 cycles of 12 bytes - 1000000 keys
Testing collisions ( 32-bit) - Expected 116.4, actual 98 (0.84x)
Testing distribution - Worst bias is the 17-bit window at bit 26 - 0.093%
[[[ Keyset 'TwoBytes' Tests ]]]
Keyset 'TwoBytes' - up-to-4-byte keys, 652545 total keys
Testing collisions ( 32-bit) - Expected 49.6, actual 261390 (5273.28x) (261341) !!!!!
Keyset 'TwoBytes' - up-to-8-byte keys, 5471025 total keys
Testing collisions ( 32-bit) - Expected 3483.1, actual 3648670 (1047.54x) (3645187) !!!!!
Keyset 'TwoBytes' - up-to-12-byte keys, 18616785 total keys
Testing collisions ( 32-bit) - Expected 40289.5, actual 12503722 (310.35x) (12463433) !!!!!
Keyset 'TwoBytes' - up-to-16-byte keys, 44251425 total keys
Testing collisions ( 32-bit) - Expected 227182.3, actual 34632335 (152.44x) (34405153) !!!!!
Keyset 'TwoBytes' - up-to-20-byte keys, 86536545 total keys
Testing collisions ( 32-bit) - Expected 865959.1, actual 64602745 (74.60x) (63736786) !!!!!
Keyset 'TwoBytes' - up-to-24-byte keys, 149633745 total keys
Testing collisions ( 32-bit) - Expected 2576560.5, actual 122139184 (47.40x) (119562624) !!!!!
*********FAIL*********
[[[ Keyset 'Text' Tests ]]]
Keyset 'Text' - keys of form "FooXXXXBar" - 14776336 keys
Testing collisions ( 32-bit) - Expected 25389.0, actual 25539 (1.01x) (150)
Testing distribution - Worst bias is the 20-bit window at bit 3 - 0.024%
Keyset 'Text' - keys of form "FooBarXXXX" - 14776336 keys
Testing collisions ( 32-bit) - Expected 25389.0, actual 25349 (1.00x) (-40)
Testing distribution - Worst bias is the 20-bit window at bit 30 - 0.033%
Keyset 'Text' - keys of form "XXXXFooBar" - 14776336 keys
Testing collisions ( 32-bit) - Expected 25389.0, actual 25368 (1.00x) (-21)
Testing distribution - Worst bias is the 20-bit window at bit 14 - 0.018%
Keyset 'Words' - 4000000 random keys of len 6-16 from alnum charset
Testing collisions ( 32-bit) - Expected 1862.1, actual 1859 (1.00x) (-3)
Testing distribution - Worst bias is the 19-bit window at bit 30 - 0.065%
Keyset 'Words' - 4000000 random keys of len 6-16 from password charset
Testing collisions ( 32-bit) - Expected 1862.1, actual 1818 (0.98x)
Testing distribution - Worst bias is the 18-bit window at bit 14 - 0.042%
Keyset 'Words' - 479826 dict words
Testing collisions ( 32-bit) - Expected 26.8, actual 34 (1.27x) (8)
Testing distribution - Worst bias is the 16-bit window at bit 11 - 0.144%
[[[ Keyset 'Zeroes' Tests ]]]
Keyset 'Zeroes' - 204800 keys
Testing collisions ( 32-bit) - Expected 4.9, actual 179199 (36700.72x) (179195) !!!!!!
*********FAIL*********
[[[ Keyset 'Seed' Tests ]]]
Keyset 'Seed' - 5000000 keys
Testing collisions ( 32-bit) - Expected 2909.3, actual 2886 (0.99x) (-23)
Testing distribution - Worst bias is the 19-bit window at bit 0 - 0.044%
[[[ Keyset 'PerlinNoise' Tests ]]]
Testing 16777216 coordinates (L2) :
Testing collisions ( 32-bit) - Expected 32725.4, actual 32812 (1.00x) (87)
Testing AV variant, 128 count with 4 spacing, 4-12:
Testing collisions ( 32-bit) - Expected 1116.2, actual 2064632 (1849.78x) (2063516)
[[[ DiffDist 'Differential Distribution' Tests ]]]
Testing bit 0
Testing collisions ( 32-bit) - Expected 511.9, actual 516 (1.01x) (5)
Testing distribution - Worst bias is the 18-bit window at bit 28 - 0.060%
Testing bit 1
Testing collisions ( 32-bit) - Expected 511.9, actual 543 (1.06x) (32)
Testing distribution - Worst bias is the 18-bit window at bit 19 - 0.075%
Testing bit 2
Testing collisions ( 32-bit) - Expected 511.9, actual 544 (1.06x) (33)
Testing distribution - Worst bias is the 18-bit window at bit 17 - 0.053%
Testing bit 3
Testing collisions ( 32-bit) - Expected 511.9, actual 487 (0.95x)
Testing distribution - Worst bias is the 18-bit window at bit 26 - 0.064%
Testing bit 4
Testing collisions ( 32-bit) - Expected 511.9, actual 517 (1.01x) (6)
Testing distribution - Worst bias is the 17-bit window at bit 6 - 0.057%
Testing bit 5
Testing collisions ( 32-bit) - Expected 511.9, actual 551 (1.08x) (40)
Testing distribution - Worst bias is the 18-bit window at bit 21 - 0.058%
Testing bit 6
Testing collisions ( 32-bit) - Expected 511.9, actual 531 (1.04x) (20)
Testing distribution - Worst bias is the 18-bit window at bit 30 - 0.053%
Testing bit 7
Testing collisions ( 32-bit) - Expected 511.9, actual 532 (1.04x) (21)
Testing distribution - Worst bias is the 18-bit window at bit 1 - 0.055%
Testing bit 8
Testing collisions ( 32-bit) - Expected 511.9, actual 530 (1.04x) (19)
Testing distribution - Worst bias is the 18-bit window at bit 23 - 0.080%
Testing bit 9
Testing collisions ( 32-bit) - Expected 511.9, actual 511 (1.00x)
Testing distribution - Worst bias is the 18-bit window at bit 5 - 0.065%
Testing bit 10
Testing collisions ( 32-bit) - Expected 511.9, actual 524 (1.02x) (13)
Testing distribution - Worst bias is the 18-bit window at bit 6 - 0.061%
Testing bit 11
Testing collisions ( 32-bit) - Expected 511.9, actual 498 (0.97x)
Testing distribution - Worst bias is the 18-bit window at bit 20 - 0.071%
Testing bit 12
Testing collisions ( 32-bit) - Expected 511.9, actual 504 (0.98x) (-7)
Testing distribution - Worst bias is the 18-bit window at bit 19 - 0.110%
Testing bit 13
Testing collisions ( 32-bit) - Expected 511.9, actual 513 (1.00x) (2)
Testing distribution - Worst bias is the 17-bit window at bit 7 - 0.032%
Testing bit 14
Testing collisions ( 32-bit) - Expected 511.9, actual 526 (1.03x) (15)
Testing distribution - Worst bias is the 18-bit window at bit 20 - 0.050%
Testing bit 15
Testing collisions ( 32-bit) - Expected 511.9, actual 502 (0.98x) (-9)
Testing distribution - Worst bias is the 18-bit window at bit 23 - 0.053%
Testing bit 16
Testing collisions ( 32-bit) - Expected 511.9, actual 493 (0.96x)
Testing distribution - Worst bias is the 18-bit window at bit 17 - 0.071%
Testing bit 17
Testing collisions ( 32-bit) - Expected 511.9, actual 506 (0.99x) (-5)
Testing distribution - Worst bias is the 18-bit window at bit 5 - 0.066%
Testing bit 18
Testing collisions ( 32-bit) - Expected 511.9, actual 504 (0.98x) (-7)
Testing distribution - Worst bias is the 18-bit window at bit 0 - 0.090%
Testing bit 19
Testing collisions ( 32-bit) - Expected 511.9, actual 499 (0.97x)
Testing distribution - Worst bias is the 18-bit window at bit 26 - 0.076%
Testing bit 20
Testing collisions ( 32-bit) - Expected 511.9, actual 534 (1.04x) (23)
Testing distribution - Worst bias is the 18-bit window at bit 5 - 0.109%
Testing bit 21
Testing collisions ( 32-bit) - Expected 511.9, actual 508 (0.99x) (-3)
Testing distribution - Worst bias is the 18-bit window at bit 8 - 0.065%
Testing bit 22
Testing collisions ( 32-bit) - Expected 511.9, actual 506 (0.99x) (-5)
Testing distribution - Worst bias is the 16-bit window at bit 4 - 0.040%
Testing bit 23
Testing collisions ( 32-bit) - Expected 511.9, actual 516 (1.01x) (5)
Testing distribution - Worst bias is the 18-bit window at bit 22 - 0.096%
Testing bit 24
Testing collisions ( 32-bit) - Expected 511.9, actual 496 (0.97x)
Testing distribution - Worst bias is the 18-bit window at bit 17 - 0.066%
Testing bit 25
Testing collisions ( 32-bit) - Expected 511.9, actual 514 (1.00x) (3)
Testing distribution - Worst bias is the 18-bit window at bit 17 - 0.082%
Testing bit 26
Testing collisions ( 32-bit) - Expected 511.9, actual 527 (1.03x) (16)
Testing distribution - Worst bias is the 18-bit window at bit 15 - 0.067%
Testing bit 27
Testing collisions ( 32-bit) - Expected 511.9, actual 500 (0.98x)
Testing distribution - Worst bias is the 18-bit window at bit 3 - 0.042%
Testing bit 28
Testing collisions ( 32-bit) - Expected 511.9, actual 565 (1.10x) (54)
Testing distribution - Worst bias is the 18-bit window at bit 26 - 0.069%
Testing bit 29
Testing collisions ( 32-bit) - Expected 511.9, actual 481 (0.94x)
Testing distribution - Worst bias is the 18-bit window at bit 4 - 0.061%
Testing bit 30
Testing collisions ( 32-bit) - Expected 511.9, actual 518 (1.01x) (7)
Testing distribution - Worst bias is the 18-bit window at bit 1 - 0.063%
Testing bit 31
Testing collisions ( 32-bit) - Expected 511.9, actual 511 (1.00x)
Testing distribution - Worst bias is the 18-bit window at bit 22 - 0.084%
Testing bit 32
Testing collisions ( 32-bit) - Expected 511.9, actual 519 (1.01x) (8)
Testing distribution - Worst bias is the 18-bit window at bit 30 - 0.058%
Testing bit 33
Testing collisions ( 32-bit) - Expected 511.9, actual 532 (1.04x) (21)
Testing distribution - Worst bias is the 18-bit window at bit 30 - 0.058%
Testing bit 34
Testing collisions ( 32-bit) - Expected 511.9, actual 502 (0.98x) (-9)
Testing distribution - Worst bias is the 18-bit window at bit 0 - 0.077%
Testing bit 35
Testing collisions ( 32-bit) - Expected 511.9, actual 479 (0.94x)
Testing distribution - Worst bias is the 18-bit window at bit 29 - 0.063%
Testing bit 36
Testing collisions ( 32-bit) - Expected 511.9, actual 548 (1.07x) (37)
Testing distribution - Worst bias is the 18-bit window at bit 14 - 0.059%
Testing bit 37
Testing collisions ( 32-bit) - Expected 511.9, actual 545 (1.06x) (34)
Testing distribution - Worst bias is the 18-bit window at bit 16 - 0.100%
Testing bit 38
Testing collisions ( 32-bit) - Expected 511.9, actual 532 (1.04x) (21)
Testing distribution - Worst bias is the 18-bit window at bit 3 - 0.052%
Testing bit 39
Testing collisions ( 32-bit) - Expected 511.9, actual 500 (0.98x)
Testing distribution - Worst bias is the 18-bit window at bit 1 - 0.090%
Testing bit 40
Testing collisions ( 32-bit) - Expected 511.9, actual 556 (1.09x) (45)
Testing distribution - Worst bias is the 18-bit window at bit 16 - 0.074%
Testing bit 41
Testing collisions ( 32-bit) - Expected 511.9, actual 542 (1.06x) (31)
Testing distribution - Worst bias is the 18-bit window at bit 15 - 0.032%
Testing bit 42
Testing collisions ( 32-bit) - Expected 511.9, actual 520 (1.02x) (9)
Testing distribution - Worst bias is the 18-bit window at bit 25 - 0.059%
Testing bit 43
Testing collisions ( 32-bit) - Expected 511.9, actual 446 (0.87x)
Testing distribution - Worst bias is the 18-bit window at bit 15 - 0.063%
Testing bit 44
Testing collisions ( 32-bit) - Expected 511.9, actual 515 (1.01x) (4)
Testing distribution - Worst bias is the 18-bit window at bit 31 - 0.063%
Testing bit 45
Testing collisions ( 32-bit) - Expected 511.9, actual 487 (0.95x)
Testing distribution - Worst bias is the 18-bit window at bit 4 - 0.065%
Testing bit 46
Testing collisions ( 32-bit) - Expected 511.9, actual 529 (1.03x) (18)
Testing distribution - Worst bias is the 18-bit window at bit 2 - 0.066%
Testing bit 47
Testing collisions ( 32-bit) - Expected 511.9, actual 525 (1.03x) (14)
Testing distribution - Worst bias is the 18-bit window at bit 0 - 0.084%
Testing bit 48
Testing collisions ( 32-bit) - Expected 511.9, actual 534 (1.04x) (23)
Testing distribution - Worst bias is the 18-bit window at bit 20 - 0.071%
Testing bit 49
Testing collisions ( 32-bit) - Expected 511.9, actual 534 (1.04x) (23)
Testing distribution - Worst bias is the 18-bit window at bit 27 - 0.073%
Testing bit 50
Testing collisions ( 32-bit) - Expected 511.9, actual 550 (1.07x) (39)
Testing distribution - Worst bias is the 18-bit window at bit 23 - 0.080%
Testing bit 51
Testing collisions ( 32-bit) - Expected 511.9, actual 487 (0.95x)
Testing distribution - Worst bias is the 18-bit window at bit 19 - 0.022%
Testing bit 52
Testing collisions ( 32-bit) - Expected 511.9, actual 526 (1.03x) (15)
Testing distribution - Worst bias is the 18-bit window at bit 4 - 0.072%
Testing bit 53
Testing collisions ( 32-bit) - Expected 511.9, actual 491 (0.96x)
Testing distribution - Worst bias is the 18-bit window at bit 11 - 0.063%
Testing bit 54
Testing collisions ( 32-bit) - Expected 511.9, actual 516 (1.01x) (5)
Testing distribution - Worst bias is the 18-bit window at bit 12 - 0.053%
Testing bit 55
Testing collisions ( 32-bit) - Expected 511.9, actual 496 (0.97x)
Testing distribution - Worst bias is the 18-bit window at bit 28 - 0.037%
Testing bit 56
Testing collisions ( 32-bit) - Expected 511.9, actual 488 (0.95x)
Testing distribution - Worst bias is the 18-bit window at bit 27 - 0.054%
Testing bit 57
Testing collisions ( 32-bit) - Expected 511.9, actual 477 (0.93x)
Testing distribution - Worst bias is the 18-bit window at bit 24 - 0.062%
Testing bit 58
Testing collisions ( 32-bit) - Expected 511.9, actual 517 (1.01x) (6)
Testing distribution - Worst bias is the 18-bit window at bit 17 - 0.067%
Testing bit 59
Testing collisions ( 32-bit) - Expected 511.9, actual 518 (1.01x) (7)
Testing distribution - Worst bias is the 18-bit window at bit 10 - 0.057%
Testing bit 60
Testing collisions ( 32-bit) - Expected 511.9, actual 467 (0.91x)
Testing distribution - Worst bias is the 18-bit window at bit 14 - 0.056%
Testing bit 61
Testing collisions ( 32-bit) - Expected 511.9, actual 507 (0.99x) (-4)
Testing distribution - Worst bias is the 18-bit window at bit 31 - 0.058%
Testing bit 62
Testing collisions ( 32-bit) - Expected 511.9, actual 495 (0.97x)
Testing distribution - Worst bias is the 18-bit window at bit 26 - 0.078%
Testing bit 63
Testing collisions ( 32-bit) - Expected 511.9, actual 505 (0.99x) (-6)
Testing distribution - Worst bias is the 18-bit window at bit 8 - 0.046%
Input vcode 0x00000001, Output vcode 0x00000001, Result vcode 0x00000001
Verification value is 0x00000001 - Testing took 264.327292 seconds
[[[ Avalanche Tests ]]]
Testing 24-bit keys -> 32-bit hashes, 300000 reps.......... worst bias is 0.576000%
Testing 32-bit keys -> 32-bit hashes, 300000 reps.......... worst bias is 0.652667%
Testing 40-bit keys -> 32-bit hashes, 300000 reps.......... worst bias is 0.716000%
Testing 48-bit keys -> 32-bit hashes, 300000 reps.......... worst bias is 0.664667%
Testing 56-bit keys -> 32-bit hashes, 300000 reps.......... worst bias is 0.634667%
Testing 64-bit keys -> 32-bit hashes, 300000 reps.......... worst bias is 0.760667%
Testing 72-bit keys -> 32-bit hashes, 300000 reps.......... worst bias is 0.696667%
Testing 80-bit keys -> 32-bit hashes, 300000 reps.......... worst bias is 0.678000%
Testing 96-bit keys -> 32-bit hashes, 300000 reps.......... worst bias is 0.784667%
Testing 112-bit keys -> 32-bit hashes, 300000 reps.......... worst bias is 0.610000%
Testing 128-bit keys -> 32-bit hashes, 300000 reps.......... worst bias is 0.603333%
Testing 160-bit keys -> 32-bit hashes, 300000 reps.......... worst bias is 0.709333%
Testing 512-bit keys -> 32-bit hashes, 300000 reps.......... worst bias is 0.768667%
Testing 1024-bit keys -> 32-bit hashes, 300000 reps.......... worst bias is 0.751333%
[[[ Diff 'Differential' Tests ]]]
Testing 8303632 up-to-5-bit differentials in 64-bit keys -> 32 bit hashes.
1000 reps, 8303632000 total tests, expecting 1.93 random collisions..........
0 total collisions, of which 0 single collisions were ignored
Testing 11017632 up-to-4-bit differentials in 128-bit keys -> 32 bit hashes.
1000 reps, 11017632000 total tests, expecting 2.57 random collisions..........
0 total collisions, of which 0 single collisions were ignored
Testing 2796416 up-to-3-bit differentials in 256-bit keys -> 32 bit hashes.
1000 reps, 2796416000 total tests, expecting 0.65 random collisions..........
0 total collisions, of which 0 single collisions were ignored
[[[ MomentChi2 Tests ]]]
Analyze hashes produced from a serie of linearly increasing numbers of 32-bit, using a step of 2 ...
Target values to approximate : 1391290.000000 - 686.666667
4 threads starting... done
Popcount 1 stats : 1391274.272774 - 687.214286
Popcount 0 stats : 1391326.086340 - 687.394241
MomentChi2 for bits 1 : 0.180034
MomentChi2 for bits 0 : 0.947719
Derivative stats (transition from 2 consecutive values) :
Popcount 1 stats : 1391337.595605 - 687.344446
Popcount 0 stats : 1391285.423503 - 687.295030
MomentChi2 for deriv b1 : 1.64871
MomentChi2 for deriv b0 : 0.0152437
Great
[[[ Prng Tests ]]]
Skipping PRNG test; it is designed for hashes >= 64-bits
[[[ BIC 'Bit Independence Criteria' Tests ]]]
...........
Max bias 0.005424 - ( 1 : 13, 26)
Input vcode 0x00000001, Output vcode 0x00000001, Result vcode 0x00000001
Verification value is 0x00000001 - Testing took 720.371917 seconds
fasthash32_pluslen.txttext/plain; charset=US-ASCII; name=fasthash32_pluslen.txtDownload
--- Testing fasthash32 "fast-hash 32bit" with length in finalizer
[[[ Sanity Tests ]]]
Verification value 0x6A202089 ....... FAIL! (Expected 0xe9481afc) [JCN - won't give the same answer obviously]
Running sanity check 1 .......... PASS
Running AppendedZeroesTest .......... PASS
[[[ Keyset 'Sparse' Tests ]]]
Keyset 'Sparse' - 16-bit keys with up to 9 bits set - 50643 keys
Testing collisions ( 32-bit) - Expected 0.3, actual 0 (0.00x)
Testing collisions (high 19-25 bits) - Worst is 25 bits: 47/38 (1.23x)
Testing collisions (low 19-25 bits) - Worst is 24 bits: 80/76 (1.05x)
Testing distribution - Worst bias is the 13-bit window at bit 2 - 0.495%
Keyset 'Sparse' - 24-bit keys with up to 8 bits set - 1271626 keys
Testing collisions ( 32-bit) - Expected 188.2, actual 183 (0.97x)
Testing distribution - Worst bias is the 17-bit window at bit 30 - 0.081%
Keyset 'Sparse' - 32-bit keys with up to 7 bits set - 4514873 keys
Testing collisions ( 32-bit) - Expected 2372.2, actual 2427 (1.02x) (55)
Testing distribution - Worst bias is the 19-bit window at bit 28 - 0.047%
Keyset 'Sparse' - 40-bit keys with up to 6 bits set - 4598479 keys
Testing collisions ( 32-bit) - Expected 2460.8, actual 2569 (1.04x) (109)
Testing distribution - Worst bias is the 19-bit window at bit 4 - 0.057%
Keyset 'Sparse' - 48-bit keys with up to 6 bits set - 14196869 keys
Testing collisions ( 32-bit) - Expected 23437.8, actual 23175 (0.99x) (-262)
Testing distribution - Worst bias is the 20-bit window at bit 30 - 0.022%
Keyset 'Sparse' - 56-bit keys with up to 5 bits set - 4216423 keys
Testing collisions ( 32-bit) - Expected 2069.0, actual 2093 (1.01x) (25)
Testing distribution - Worst bias is the 19-bit window at bit 11 - 0.049%
Keyset 'Sparse' - 64-bit keys with up to 5 bits set - 8303633 keys
Testing collisions ( 32-bit) - Expected 8021.7, actual 8008 (1.00x) (-13)
Testing distribution - Worst bias is the 20-bit window at bit 31 - 0.033%
Keyset 'Sparse' - 72-bit keys with up to 5 bits set - 15082603 keys
Testing collisions ( 32-bit) - Expected 26451.8, actual 26440 (1.00x) (-11)
Testing distribution - Worst bias is the 20-bit window at bit 1 - 0.018%
Keyset 'Sparse' - 96-bit keys with up to 4 bits set - 3469497 keys
Testing collisions ( 32-bit) - Expected 1401.0, actual 1408 (1.01x) (8)
Testing distribution - Worst bias is the 19-bit window at bit 26 - 0.055%
Keyset 'Sparse' - 160-bit keys with up to 4 bits set - 26977161 keys
Testing collisions ( 32-bit) - Expected 84546.1, actual 84196 (1.00x) (-350)
Testing distribution - Worst bias is the 20-bit window at bit 23 - 0.015%
Keyset 'Sparse' - 256-bit keys with up to 3 bits set - 2796417 keys
Testing collisions ( 32-bit) - Expected 910.2, actual 890 (0.98x)
Testing distribution - Worst bias is the 19-bit window at bit 27 - 0.044%
Keyset 'Sparse' - 512-bit keys with up to 3 bits set - 22370049 keys
Testing collisions ( 32-bit) - Expected 58155.4, actual 57845 (0.99x) (-310)
Testing distribution - Worst bias is the 20-bit window at bit 1 - 0.007%
Keyset 'Sparse' - 1024-bit keys with up to 2 bits set - 524801 keys
Testing collisions ( 32-bit) - Expected 32.1, actual 37 (1.15x) (5)
Testing distribution - Worst bias is the 16-bit window at bit 21 - 0.125%
Keyset 'Sparse' - 2048-bit keys with up to 2 bits set - 2098177 keys
Testing collisions ( 32-bit) - Expected 512.4, actual 547 (1.07x) (35)
Testing distribution - Worst bias is the 18-bit window at bit 14 - 0.063%
[[[ Keyset 'Permutation' Tests ]]]
Combination Lowbits Tests:
Keyset 'Combination' - up to 7 blocks from a set of 8 - 2396744 keys
Testing collisions ( 32-bit) - Expected 668.6, actual 618 (0.92x)
Testing distribution - Worst bias is the 18-bit window at bit 12 - 0.050%
Combination Highbits Tests
Keyset 'Combination' - up to 7 blocks from a set of 8 - 2396744 keys
Testing collisions ( 32-bit) - Expected 668.6, actual 691 (1.03x) (23)
Testing distribution - Worst bias is the 18-bit window at bit 15 - 0.071%
Combination Hi-Lo Tests:
Keyset 'Combination' - up to 6 blocks from a set of 15 - 12204240 keys
Testing collisions ( 32-bit) - Expected 17322.9, actual 17300 (1.00x) (-22)
Testing distribution - Worst bias is the 20-bit window at bit 12 - 0.034%
Combination 0x8000000 Tests:
Keyset 'Combination' - up to 22 blocks from a set of 2 - 8388606 keys
Testing collisions ( 32-bit) - Expected 8186.7, actual 8259 (1.01x) (73)
Testing distribution - Worst bias is the 20-bit window at bit 13 - 0.040%
Combination 0x0000001 Tests:
Keyset 'Combination' - up to 22 blocks from a set of 2 - 8388606 keys
Testing collisions ( 32-bit) - Expected 8186.7, actual 8168 (1.00x) (-18)
Testing distribution - Worst bias is the 20-bit window at bit 15 - 0.022%
Combination 0x800000000000000 Tests:
Keyset 'Combination' - up to 22 blocks from a set of 2 - 8388606 keys
Testing collisions ( 32-bit) - Expected 8186.7, actual 8173 (1.00x) (-13)
Testing distribution - Worst bias is the 20-bit window at bit 31 - 0.027%
Combination 0x000000000000001 Tests:
Keyset 'Combination' - up to 22 blocks from a set of 2 - 8388606 keys
Testing collisions ( 32-bit) - Expected 8186.7, actual 8087 (0.99x) (-99)
Testing distribution - Worst bias is the 20-bit window at bit 5 - 0.015%
Combination 16-bytes [0-1] Tests:
Keyset 'Combination' - up to 22 blocks from a set of 2 - 8388606 keys
Testing collisions ( 32-bit) - Expected 8186.7, actual 8066 (0.99x) (-120)
Testing distribution - Worst bias is the 20-bit window at bit 7 - 0.028%
Combination 16-bytes [0-last] Tests:
Keyset 'Combination' - up to 22 blocks from a set of 2 - 8388606 keys
Testing collisions ( 32-bit) - Expected 8186.7, actual 8314 (1.02x) (128)
Testing distribution - Worst bias is the 20-bit window at bit 21 - 0.030%
Combination 32-bytes [0-1] Tests:
Keyset 'Combination' - up to 22 blocks from a set of 2 - 8388606 keys
Testing collisions ( 32-bit) - Expected 8186.7, actual 8304 (1.01x) (118)
Testing distribution - Worst bias is the 20-bit window at bit 0 - 0.016%
Combination 32-bytes [0-last] Tests:
Keyset 'Combination' - up to 22 blocks from a set of 2 - 8388606 keys
Testing collisions ( 32-bit) - Expected 8186.7, actual 8253 (1.01x) (67)
Testing distribution - Worst bias is the 20-bit window at bit 12 - 0.034%
Combination 64-bytes [0-1] Tests:
Keyset 'Combination' - up to 22 blocks from a set of 2 - 8388606 keys
Testing collisions ( 32-bit) - Expected 8186.7, actual 8108 (0.99x) (-78)
Testing distribution - Worst bias is the 20-bit window at bit 26 - 0.035%
Combination 64-bytes [0-last] Tests:
Keyset 'Combination' - up to 22 blocks from a set of 2 - 8388606 keys
Testing collisions ( 32-bit) - Expected 8186.7, actual 8216 (1.00x) (30)
Testing distribution - Worst bias is the 20-bit window at bit 11 - 0.041%
Combination 128-bytes [0-1] Tests:
Keyset 'Combination' - up to 22 blocks from a set of 2 - 8388606 keys
Testing collisions ( 32-bit) - Expected 8186.7, actual 8073 (0.99x) (-113)
Testing distribution - Worst bias is the 20-bit window at bit 17 - 0.027%
Combination 128-bytes [0-last] Tests:
Keyset 'Combination' - up to 22 blocks from a set of 2 - 8388606 keys
Testing collisions ( 32-bit) - Expected 8186.7, actual 8048 (0.98x) (-138)
Testing distribution - Worst bias is the 19-bit window at bit 5 - 0.026%
[[[ Keyset 'Cyclic' Tests ]]]
Keyset 'Cyclic' - 8 cycles of 4 bytes - 1000000 keys
Testing collisions ( 32-bit) - Expected 116.4, actual 108 (0.93x)
Testing distribution - Worst bias is the 17-bit window at bit 16 - 0.124%
Keyset 'Cyclic' - 8 cycles of 5 bytes - 1000000 keys
Testing collisions ( 32-bit) - Expected 116.4, actual 126 (1.08x) (10)
Testing distribution - Worst bias is the 17-bit window at bit 19 - 0.092%
Keyset 'Cyclic' - 8 cycles of 6 bytes - 1000000 keys
Testing collisions ( 32-bit) - Expected 116.4, actual 111 (0.95x)
Testing distribution - Worst bias is the 17-bit window at bit 10 - 0.087%
Keyset 'Cyclic' - 8 cycles of 7 bytes - 1000000 keys
Testing collisions ( 32-bit) - Expected 116.4, actual 117 (1.01x) (1)
Testing distribution - Worst bias is the 17-bit window at bit 21 - 0.113%
Keyset 'Cyclic' - 8 cycles of 8 bytes - 1000000 keys
Testing collisions ( 32-bit) - Expected 116.4, actual 134 (1.15x) (18)
Testing distribution - Worst bias is the 16-bit window at bit 13 - 0.062%
Keyset 'Cyclic' - 8 cycles of 12 bytes - 1000000 keys
Testing collisions ( 32-bit) - Expected 116.4, actual 127 (1.09x) (11)
Testing distribution - Worst bias is the 17-bit window at bit 25 - 0.091%
[[[ Keyset 'TwoBytes' Tests ]]]
Keyset 'TwoBytes' - up-to-4-byte keys, 652545 total keys
Testing collisions ( 32-bit) - Expected 49.6, actual 50 (1.01x) (1)
Testing distribution - Worst bias is the 16-bit window at bit 21 - 0.089%
Keyset 'TwoBytes' - up-to-8-byte keys, 5471025 total keys
Testing collisions ( 32-bit) - Expected 3483.1, actual 3457 (0.99x) (-26)
Testing distribution - Worst bias is the 20-bit window at bit 9 - 0.094%
Keyset 'TwoBytes' - up-to-12-byte keys, 18616785 total keys
Testing collisions ( 32-bit) - Expected 40289.5, actual 40410 (1.00x) (121)
Testing distribution - Worst bias is the 20-bit window at bit 29 - 0.017%
Keyset 'TwoBytes' - up-to-16-byte keys, 44251425 total keys
Testing collisions ( 32-bit) - Expected 227182.3, actual 227335 (1.00x) (153)
Testing distribution - Worst bias is the 20-bit window at bit 23 - 0.008%
Keyset 'TwoBytes' - up-to-20-byte keys, 86536545 total keys
Testing collisions ( 32-bit) - Expected 865959.1, actual 865658 (1.00x) (-301)
Testing distribution - Worst bias is the 20-bit window at bit 23 - 0.004%
Keyset 'TwoBytes' - up-to-24-byte keys, 149633745 total keys
Testing collisions ( 32-bit) - Expected 2576560.5, actual 2575915 (1.00x) (-645)
Testing distribution - Worst bias is the 20-bit window at bit 23 - 0.002%
[[[ Keyset 'Text' Tests ]]]
Keyset 'Text' - keys of form "FooXXXXBar" - 14776336 keys
Testing collisions ( 32-bit) - Expected 25389.0, actual 25549 (1.01x) (160)
Testing distribution - Worst bias is the 20-bit window at bit 15 - 0.024%
Keyset 'Text' - keys of form "FooBarXXXX" - 14776336 keys
Testing collisions ( 32-bit) - Expected 25389.0, actual 25290 (1.00x) (-99)
Testing distribution - Worst bias is the 20-bit window at bit 23 - 0.014%
Keyset 'Text' - keys of form "XXXXFooBar" - 14776336 keys
Testing collisions ( 32-bit) - Expected 25389.0, actual 25389 (1.00x)
Testing distribution - Worst bias is the 20-bit window at bit 13 - 0.025%
Keyset 'Words' - 4000000 random keys of len 6-16 from alnum charset
Testing collisions ( 32-bit) - Expected 1862.1, actual 1905 (1.02x) (43)
Testing distribution - Worst bias is the 19-bit window at bit 26 - 0.060%
Keyset 'Words' - 4000000 random keys of len 6-16 from password charset
Testing collisions ( 32-bit) - Expected 1862.1, actual 1899 (1.02x) (37)
Testing distribution - Worst bias is the 19-bit window at bit 23 - 0.029%
Keyset 'Words' - 479826 dict words
Testing collisions ( 32-bit) - Expected 26.8, actual 19 (0.71x)
Testing distribution - Worst bias is the 15-bit window at bit 3 - 0.141%
[[[ Keyset 'Zeroes' Tests ]]]
Keyset 'Zeroes' - 204800 keys
Testing collisions ( 32-bit) - Expected 4.9, actual 5 (1.02x) (1)
Testing collisions (high 21-29 bits) - Worst is 26 bits: 329/312 (1.05x)
Testing collisions (low 21-29 bits) - Worst is 26 bits: 310/312 (0.99x)
Testing distribution - Worst bias is the 15-bit window at bit 26 - 0.177%
[[[ Keyset 'Seed' Tests ]]]
Keyset 'Seed' - 5000000 keys
Testing collisions ( 32-bit) - Expected 2909.3, actual 3028 (1.04x) (119)
Testing distribution - Worst bias is the 19-bit window at bit 17 - 0.050%
[[[ Keyset 'PerlinNoise' Tests ]]]
Testing 16777216 coordinates (L2) :
Testing collisions ( 32-bit) - Expected 32725.4, actual 32761 (1.00x) (36)
Testing AV variant, 128 count with 4 spacing, 4-12:
Testing collisions ( 32-bit) - Expected 1116.2, actual 1079 (0.97x)
[[[ DiffDist 'Differential Distribution' Tests ]]]
Testing bit 0
Testing collisions ( 32-bit) - Expected 511.9, actual 517 (1.01x) (6)
Testing distribution - Worst bias is the 18-bit window at bit 7 - 0.073%
Testing bit 1
Testing collisions ( 32-bit) - Expected 511.9, actual 497 (0.97x)
Testing distribution - Worst bias is the 18-bit window at bit 16 - 0.064%
Testing bit 2
Testing collisions ( 32-bit) - Expected 511.9, actual 482 (0.94x)
Testing distribution - Worst bias is the 18-bit window at bit 17 - 0.069%
Testing bit 3
Testing collisions ( 32-bit) - Expected 511.9, actual 484 (0.95x)
Testing distribution - Worst bias is the 18-bit window at bit 11 - 0.055%
Testing bit 4
Testing collisions ( 32-bit) - Expected 511.9, actual 497 (0.97x)
Testing distribution - Worst bias is the 17-bit window at bit 2 - 0.052%
Testing bit 5
Testing collisions ( 32-bit) - Expected 511.9, actual 516 (1.01x) (5)
Testing distribution - Worst bias is the 17-bit window at bit 4 - 0.045%
Testing bit 6
Testing collisions ( 32-bit) - Expected 511.9, actual 496 (0.97x)
Testing distribution - Worst bias is the 18-bit window at bit 13 - 0.096%
Testing bit 7
Testing collisions ( 32-bit) - Expected 511.9, actual 527 (1.03x) (16)
Testing distribution - Worst bias is the 18-bit window at bit 14 - 0.088%
Testing bit 8
Testing collisions ( 32-bit) - Expected 511.9, actual 514 (1.00x) (3)
Testing distribution - Worst bias is the 18-bit window at bit 12 - 0.046%
Testing bit 9
Testing collisions ( 32-bit) - Expected 511.9, actual 482 (0.94x)
Testing distribution - Worst bias is the 18-bit window at bit 31 - 0.063%
Testing bit 10
Testing collisions ( 32-bit) - Expected 511.9, actual 495 (0.97x)
Testing distribution - Worst bias is the 18-bit window at bit 25 - 0.043%
Testing bit 11
Testing collisions ( 32-bit) - Expected 511.9, actual 524 (1.02x) (13)
Testing distribution - Worst bias is the 18-bit window at bit 21 - 0.041%
Testing bit 12
Testing collisions ( 32-bit) - Expected 511.9, actual 505 (0.99x) (-6)
Testing distribution - Worst bias is the 17-bit window at bit 8 - 0.067%
Testing bit 13
Testing collisions ( 32-bit) - Expected 511.9, actual 554 (1.08x) (43)
Testing distribution - Worst bias is the 18-bit window at bit 12 - 0.037%
Testing bit 14
Testing collisions ( 32-bit) - Expected 511.9, actual 502 (0.98x) (-9)
Testing distribution - Worst bias is the 18-bit window at bit 19 - 0.104%
Testing bit 15
Testing collisions ( 32-bit) - Expected 511.9, actual 505 (0.99x) (-6)
Testing distribution - Worst bias is the 18-bit window at bit 24 - 0.065%
Testing bit 16
Testing collisions ( 32-bit) - Expected 511.9, actual 526 (1.03x) (15)
Testing distribution - Worst bias is the 18-bit window at bit 4 - 0.088%
Testing bit 17
Testing collisions ( 32-bit) - Expected 511.9, actual 482 (0.94x)
Testing distribution - Worst bias is the 18-bit window at bit 19 - 0.082%
Testing bit 18
Testing collisions ( 32-bit) - Expected 511.9, actual 556 (1.09x) (45)
Testing distribution - Worst bias is the 17-bit window at bit 1 - 0.067%
Testing bit 19
Testing collisions ( 32-bit) - Expected 511.9, actual 507 (0.99x) (-4)
Testing distribution - Worst bias is the 18-bit window at bit 26 - 0.073%
Testing bit 20
Testing collisions ( 32-bit) - Expected 511.9, actual 506 (0.99x) (-5)
Testing distribution - Worst bias is the 18-bit window at bit 11 - 0.068%
Testing bit 21
Testing collisions ( 32-bit) - Expected 511.9, actual 513 (1.00x) (2)
Testing distribution - Worst bias is the 18-bit window at bit 13 - 0.049%
Testing bit 22
Testing collisions ( 32-bit) - Expected 511.9, actual 518 (1.01x) (7)
Testing distribution - Worst bias is the 18-bit window at bit 26 - 0.057%
Testing bit 23
Testing collisions ( 32-bit) - Expected 511.9, actual 520 (1.02x) (9)
Testing distribution - Worst bias is the 18-bit window at bit 29 - 0.069%
Testing bit 24
Testing collisions ( 32-bit) - Expected 511.9, actual 488 (0.95x)
Testing distribution - Worst bias is the 18-bit window at bit 18 - 0.086%
Testing bit 25
Testing collisions ( 32-bit) - Expected 511.9, actual 575 (1.12x) (64)
Testing distribution - Worst bias is the 18-bit window at bit 28 - 0.087%
Testing bit 26
Testing collisions ( 32-bit) - Expected 511.9, actual 525 (1.03x) (14)
Testing distribution - Worst bias is the 18-bit window at bit 24 - 0.070%
Testing bit 27
Testing collisions ( 32-bit) - Expected 511.9, actual 481 (0.94x)
Testing distribution - Worst bias is the 18-bit window at bit 20 - 0.068%
Testing bit 28
Testing collisions ( 32-bit) - Expected 511.9, actual 496 (0.97x)
Testing distribution - Worst bias is the 18-bit window at bit 17 - 0.084%
Testing bit 29
Testing collisions ( 32-bit) - Expected 511.9, actual 509 (0.99x) (-2)
Testing distribution - Worst bias is the 18-bit window at bit 30 - 0.073%
Testing bit 30
Testing collisions ( 32-bit) - Expected 511.9, actual 509 (0.99x) (-2)
Testing distribution - Worst bias is the 18-bit window at bit 28 - 0.051%
Testing bit 31
Testing collisions ( 32-bit) - Expected 511.9, actual 516 (1.01x) (5)
Testing distribution - Worst bias is the 17-bit window at bit 25 - 0.064%
Testing bit 32
Testing collisions ( 32-bit) - Expected 511.9, actual 499 (0.97x)
Testing distribution - Worst bias is the 17-bit window at bit 12 - 0.055%
Testing bit 33
Testing collisions ( 32-bit) - Expected 511.9, actual 500 (0.98x)
Testing distribution - Worst bias is the 18-bit window at bit 12 - 0.118%
Testing bit 34
Testing collisions ( 32-bit) - Expected 511.9, actual 506 (0.99x) (-5)
Testing distribution - Worst bias is the 18-bit window at bit 25 - 0.050%
Testing bit 35
Testing collisions ( 32-bit) - Expected 511.9, actual 489 (0.96x)
Testing distribution - Worst bias is the 18-bit window at bit 1 - 0.084%
Testing bit 36
Testing collisions ( 32-bit) - Expected 511.9, actual 532 (1.04x) (21)
Testing distribution - Worst bias is the 18-bit window at bit 11 - 0.072%
Testing bit 37
Testing collisions ( 32-bit) - Expected 511.9, actual 532 (1.04x) (21)
Testing distribution - Worst bias is the 18-bit window at bit 4 - 0.064%
Testing bit 38
Testing collisions ( 32-bit) - Expected 511.9, actual 506 (0.99x) (-5)
Testing distribution - Worst bias is the 18-bit window at bit 2 - 0.071%
Testing bit 39
Testing collisions ( 32-bit) - Expected 511.9, actual 530 (1.04x) (19)
Testing distribution - Worst bias is the 18-bit window at bit 23 - 0.053%
Testing bit 40
Testing collisions ( 32-bit) - Expected 511.9, actual 511 (1.00x)
Testing distribution - Worst bias is the 18-bit window at bit 13 - 0.060%
Testing bit 41
Testing collisions ( 32-bit) - Expected 511.9, actual 539 (1.05x) (28)
Testing distribution - Worst bias is the 18-bit window at bit 29 - 0.083%
Testing bit 42
Testing collisions ( 32-bit) - Expected 511.9, actual 510 (1.00x) (-1)
Testing distribution - Worst bias is the 18-bit window at bit 16 - 0.063%
Testing bit 43
Testing collisions ( 32-bit) - Expected 511.9, actual 519 (1.01x) (8)
Testing distribution - Worst bias is the 18-bit window at bit 13 - 0.079%
Testing bit 44
Testing collisions ( 32-bit) - Expected 511.9, actual 523 (1.02x) (12)
Testing distribution - Worst bias is the 18-bit window at bit 16 - 0.084%
Testing bit 45
Testing collisions ( 32-bit) - Expected 511.9, actual 542 (1.06x) (31)
Testing distribution - Worst bias is the 18-bit window at bit 31 - 0.076%
Testing bit 46
Testing collisions ( 32-bit) - Expected 511.9, actual 543 (1.06x) (32)
Testing distribution - Worst bias is the 18-bit window at bit 20 - 0.088%
Testing bit 47
Testing collisions ( 32-bit) - Expected 511.9, actual 547 (1.07x) (36)
Testing distribution - Worst bias is the 18-bit window at bit 16 - 0.105%
Testing bit 48
Testing collisions ( 32-bit) - Expected 511.9, actual 512 (1.00x) (1)
Testing distribution - Worst bias is the 18-bit window at bit 8 - 0.068%
Testing bit 49
Testing collisions ( 32-bit) - Expected 511.9, actual 505 (0.99x) (-6)
Testing distribution - Worst bias is the 18-bit window at bit 3 - 0.076%
Testing bit 50
Testing collisions ( 32-bit) - Expected 511.9, actual 496 (0.97x)
Testing distribution - Worst bias is the 18-bit window at bit 0 - 0.071%
Testing bit 51
Testing collisions ( 32-bit) - Expected 511.9, actual 487 (0.95x)
Testing distribution - Worst bias is the 18-bit window at bit 25 - 0.083%
Testing bit 52
Testing collisions ( 32-bit) - Expected 511.9, actual 514 (1.00x) (3)
Testing distribution - Worst bias is the 18-bit window at bit 16 - 0.055%
Testing bit 53
Testing collisions ( 32-bit) - Expected 511.9, actual 492 (0.96x)
Testing distribution - Worst bias is the 18-bit window at bit 18 - 0.049%
Testing bit 54
Testing collisions ( 32-bit) - Expected 511.9, actual 494 (0.97x)
Testing distribution - Worst bias is the 18-bit window at bit 4 - 0.076%
Testing bit 55
Testing collisions ( 32-bit) - Expected 511.9, actual 503 (0.98x) (-8)
Testing distribution - Worst bias is the 17-bit window at bit 30 - 0.044%
Testing bit 56
Testing collisions ( 32-bit) - Expected 511.9, actual 491 (0.96x)
Testing distribution - Worst bias is the 18-bit window at bit 13 - 0.051%
Testing bit 57
Testing collisions ( 32-bit) - Expected 511.9, actual 502 (0.98x) (-9)
Testing distribution - Worst bias is the 18-bit window at bit 1 - 0.098%
Testing bit 58
Testing collisions ( 32-bit) - Expected 511.9, actual 524 (1.02x) (13)
Testing distribution - Worst bias is the 18-bit window at bit 18 - 0.064%
Testing bit 59
Testing collisions ( 32-bit) - Expected 511.9, actual 519 (1.01x) (8)
Testing distribution - Worst bias is the 18-bit window at bit 25 - 0.068%
Testing bit 60
Testing collisions ( 32-bit) - Expected 511.9, actual 541 (1.06x) (30)
Testing distribution - Worst bias is the 18-bit window at bit 13 - 0.034%
Testing bit 61
Testing collisions ( 32-bit) - Expected 511.9, actual 472 (0.92x)
Testing distribution - Worst bias is the 18-bit window at bit 10 - 0.065%
Testing bit 62
Testing collisions ( 32-bit) - Expected 511.9, actual 507 (0.99x) (-4)
Testing distribution - Worst bias is the 18-bit window at bit 1 - 0.069%
Testing bit 63
Testing collisions ( 32-bit) - Expected 511.9, actual 482 (0.94x)
Testing distribution - Worst bias is the 18-bit window at bit 22 - 0.077%
Input vcode 0x00000001, Output vcode 0x00000001, Result vcode 0x00000001
Verification value is 0x00000001 - Testing took 350.971248 seconds
[[[ Avalanche Tests ]]]
Testing 24-bit keys -> 32-bit hashes, 300000 reps.......... worst bias is 0.576000%
Testing 32-bit keys -> 32-bit hashes, 300000 reps.......... worst bias is 0.652667%
Testing 40-bit keys -> 32-bit hashes, 300000 reps.......... worst bias is 0.716000%
Testing 48-bit keys -> 32-bit hashes, 300000 reps.......... worst bias is 0.664667%
Testing 56-bit keys -> 32-bit hashes, 300000 reps.......... worst bias is 0.634667%
Testing 64-bit keys -> 32-bit hashes, 300000 reps.......... worst bias is 0.760667%
Testing 72-bit keys -> 32-bit hashes, 300000 reps.......... worst bias is 0.696667%
Testing 80-bit keys -> 32-bit hashes, 300000 reps.......... worst bias is 0.678000%
Testing 96-bit keys -> 32-bit hashes, 300000 reps.......... worst bias is 0.784667%
Testing 112-bit keys -> 32-bit hashes, 300000 reps.......... worst bias is 0.610000%
Testing 128-bit keys -> 32-bit hashes, 300000 reps.......... worst bias is 0.603333%
Testing 160-bit keys -> 32-bit hashes, 300000 reps.......... worst bias is 0.709333%
Testing 512-bit keys -> 32-bit hashes, 300000 reps.......... worst bias is 0.768667%
Testing 1024-bit keys -> 32-bit hashes, 300000 reps.......... worst bias is 0.751333%
[[[ Diff 'Differential' Tests ]]]
Testing 8303632 up-to-5-bit differentials in 64-bit keys -> 32 bit hashes.
1000 reps, 8303632000 total tests, expecting 1.93 random collisions..........
0 total collisions, of which 0 single collisions were ignored
Testing 11017632 up-to-4-bit differentials in 128-bit keys -> 32 bit hashes.
1000 reps, 11017632000 total tests, expecting 2.57 random collisions..........
0 total collisions, of which 0 single collisions were ignored
Testing 2796416 up-to-3-bit differentials in 256-bit keys -> 32 bit hashes.
1000 reps, 2796416000 total tests, expecting 0.65 random collisions..........
0 total collisions, of which 0 single collisions were ignored
[[[ MomentChi2 Tests ]]]
Analyze hashes produced from a serie of linearly increasing numbers of 32-bit, using a step of 2 ...
Target values to approximate : 1391290.000000 - 686.666667
4 threads starting... done
Popcount 1 stats : 1391274.272774 - 687.214286
Popcount 0 stats : 1391326.086340 - 687.394241
MomentChi2 for bits 1 : 0.180034
MomentChi2 for bits 0 : 0.947719
Derivative stats (transition from 2 consecutive values) :
Popcount 1 stats : 1391337.595605 - 687.344446
Popcount 0 stats : 1391285.423503 - 687.295030
MomentChi2 for deriv b1 : 1.64871
MomentChi2 for deriv b0 : 0.0152437
Great
[[[ Prng Tests ]]]
Skipping PRNG test; it is designed for hashes >= 64-bits
[[[ BIC 'Bit Independence Criteria' Tests ]]]
...........
Max bias 0.005424 - ( 1 : 13, 26)
Input vcode 0x00000001, Output vcode 0x00000001, Result vcode 0x00000001
Verification value is 0x00000001 - Testing took 718.518900 seconds
v9-0004-Assert-that-incremental-fasthash-variants-give-th.patchapplication/x-patch; name=v9-0004-Assert-that-incremental-fasthash-variants-give-th.patchDownload
From eacb3fccb25c10696334def8a8346257958ec3dd Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sun, 10 Dec 2023 15:14:24 +0700
Subject: [PATCH v9 4/6] Assert that incremental fasthash variants give the
same answer as the original
Test that incremental hashing gives the right answer for strings.
Use the initial length only for the init step. Test that
we can ignore the length afterwards, and only use the
presence of the NUL terminator to stop iterating. Assert
that this results in the same hash.
Based on "Use new hash APIs for search path cache" by Jeff Davis,
rebased over v7.
---
src/backend/catalog/namespace.c | 48 ++++++++++++++++++++++++++++++---
1 file changed, 44 insertions(+), 4 deletions(-)
diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index 5027efc91d..6bb28aecfc 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -41,7 +41,7 @@
#include "catalog/pg_ts_template.h"
#include "catalog/pg_type.h"
#include "commands/dbcommands.h"
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "funcapi.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
@@ -247,11 +247,51 @@ static bool MatchNamedCall(HeapTuple proctup, int nargs, List *argnames,
static inline uint32
spcachekey_hash(SearchPathCacheKey key)
{
- const unsigned char *bytes = (const unsigned char *) key.searchPath;
+ const char *buf = key.searchPath;
+ fasthash_state hs;
+
+ // XXX not for commit
+#ifdef USE_ASSERT_CHECKING
int blen = strlen(key.searchPath);
- return hash_combine(hash_bytes(bytes, blen),
- hash_uint32(key.roleid));
+ uint64 h_orig = fasthash64_orig(buf, blen, key.roleid);
+
+ // check full function that calls incremental interface
+ Assert(fasthash64((const unsigned char *) buf, blen, key.roleid) == h_orig);
+
+ // Test that chunked interface can give the same answer,
+ // if we have length up front. We would typically use it
+ // for cases where we don't know, but let's try to make
+ // it as similar as conveniently possible.
+ fasthash_init(&hs, blen, key.roleid);
+ while (*buf)
+ {
+ int chunk_len = 0;
+
+ while(chunk_len < FH_SIZEOF_ACCUM && buf[chunk_len] != '\0')
+ chunk_len++;
+
+ fasthash_accum(&hs, (const unsigned char *) buf, chunk_len);
+ buf += chunk_len;
+ }
+ Assert(fasthash_final64(&hs) == h_orig);
+ buf = key.searchPath; /* reset */
+#endif
+
+ // WIP: maybe roleid should be mixed in normally
+ // WIP: For now fake the length to preserve the internal seed
+ fasthash_init(&hs, 1, key.roleid);
+ while (*buf)
+ {
+ int chunk_len = 0;
+
+ while(chunk_len < FH_SIZEOF_ACCUM && buf[chunk_len] != '\0')
+ chunk_len++;
+
+ fasthash_accum(&hs, (const unsigned char *) buf, chunk_len);
+ buf += chunk_len;
+ }
+ return fasthash_final32(&hs);
}
static inline bool
--
2.43.0
v9-0006-Add-optional-tweak-to-finalizer.patchapplication/x-patch; name=v9-0006-Add-optional-tweak-to-finalizer.patchDownload
From 3d77e7effda9360315ab4af330839eb82ed97925 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Thu, 14 Dec 2023 19:41:17 +0700
Subject: [PATCH v9 6/6] Add optional tweak to finalizer
For hashing strings, we need to incorporate the length of the
input into the final hash when we call the finalizer. This is
necessary to pass SMHasher.
To reduce the number of places that need to know about this,
it seemed best to make the finalizer a void function, and to
make the 32-bit reducer able to accept any uint64 input.
---
src/backend/catalog/namespace.c | 11 +++++++++--
src/include/common/hashfn_unstable.h | 21 ++++++++++-----------
2 files changed, 19 insertions(+), 13 deletions(-)
diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index 6bb28aecfc..3a3cb8ef70 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -247,6 +247,7 @@ static bool MatchNamedCall(HeapTuple proctup, int nargs, List *argnames,
static inline uint32
spcachekey_hash(SearchPathCacheKey key)
{
+ const char * const start = key.searchPath;
const char *buf = key.searchPath;
fasthash_state hs;
@@ -274,7 +275,10 @@ spcachekey_hash(SearchPathCacheKey key)
fasthash_accum(&hs, (const unsigned char *) buf, chunk_len);
buf += chunk_len;
}
- Assert(fasthash_final64(&hs) == h_orig);
+
+ // We passed the length to fasthash_init, so no tweak for assert testing
+ fasthash_final64(&hs, 0);
+ Assert(hs.hash == h_orig);
buf = key.searchPath; /* reset */
#endif
@@ -291,7 +295,10 @@ spcachekey_hash(SearchPathCacheKey key)
fasthash_accum(&hs, (const unsigned char *) buf, chunk_len);
buf += chunk_len;
}
- return fasthash_final32(&hs);
+
+ /* pass the length to tweak the final mix */
+ fasthash_final64(&hs, buf - start);
+ return fasthash_reduce32(hs.hash);
}
static inline bool
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index e8ca39fed2..578a4d0a83 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -53,9 +53,9 @@ typedef struct fasthash_state
} fasthash_state;
static inline uint64
-fasthash_mix(uint64 h)
+fasthash_mix(uint64 h, uint64 tweak)
{
- h ^= h >> 23;
+ h ^= (h >> 23) + tweak;
h *= 0x2127599bf4325c37;
h ^= h >> 47;
return h;
@@ -64,7 +64,7 @@ fasthash_mix(uint64 h)
static inline void
fasthash_combine(fasthash_state* hs)
{
- hs->hash ^= fasthash_mix(hs->accum);
+ hs->hash ^= fasthash_mix(hs->accum, 0);
hs->hash *= 0x880355f21e6d1965;
/* reset hash state for next input */
@@ -114,19 +114,18 @@ fasthash_accum(fasthash_state *hs, const unsigned char *k, int len)
}
-static inline uint64
-fasthash_final64(fasthash_state *hs)
+static inline void
+fasthash_final64(fasthash_state *hs, uint64 tweak)
{
- return fasthash_mix(hs->hash);
+ hs->hash = fasthash_mix(hs->hash, tweak);
}
static inline uint32
-fasthash_final32(fasthash_state *hs)
+fasthash_reduce32(uint64 h)
{
// the following trick converts the 64-bit hashcode to Fermat
// residue, which shall retain information from both the higher
// and lower parts of hashcode.
- uint64 h = fasthash_final64(hs);
return h - (h >> 32);
}
@@ -145,14 +144,14 @@ fasthash64(const unsigned char * k, int len, uint64 seed)
}
fasthash_accum(&hs, k, len);
- return fasthash_final64(&hs);
+ fasthash_final64(&hs, 0);
+ return hs.hash;
}
static inline uint64
fasthash32(const unsigned char * k, int len, uint64 seed)
{
- uint64 h = fasthash64(k, len, seed);
- return h - (h >> 32);
+ return fasthash_reduce32(fasthash64(k, len, seed));
}
--
2.43.0
v9-0002-Rewrite-fasthash-functions-using-a-homegrown-incr.patchapplication/x-patch; name=v9-0002-Rewrite-fasthash-functions-using-a-homegrown-incr.patchDownload
From 4e1c6836894c5bed8315ac52c6f0c1ff5439076a Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sun, 10 Dec 2023 12:11:37 +0700
Subject: [PATCH v9 2/6] Rewrite fasthash functions using a homegrown
incremental interface
The incremental interface will be useful for cases we don't know
the length up front, such as NUL-terminated strings. First, we
need to validate that this interface can give the same answer
as the original functions when we do know the length. A future
commit will add a temporary assert for testing in CI.
---
src/include/common/hashfn_unstable.h | 161 +++++++++++++++++++++++++--
1 file changed, 153 insertions(+), 8 deletions(-)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index a5bf965fa2..fbae7a5522 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -1,3 +1,25 @@
+/*
+Building blocks for creating fast inlineable hash functions. The
+unstable designation is in contrast to hashfn.h, which cannot break
+compatibility because hashes can be writen to disk and so must have
+the same hashes between versions.
+
+ *
+ * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group
+ *
+ * src/include/common/hashfn_unstable.c
+ */
+
+#ifndef HASHFN_UNSTABLE_H
+#define HASHFN_UNSTABLE_H
+
+/*
+ * fasthash is a modification of code taken from
+ * https://code.google.com/archive/p/fast-hash/source/default/source
+ * under the terms of the MIT licencse. The original copyright
+ * notice follows:
+ */
+
/* The MIT License
Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
@@ -23,16 +45,130 @@
SOFTWARE.
*/
-#include "fasthash.h"
+typedef struct fasthash_state
+{
+ uint64 accum;
+#define FH_SIZEOF_ACCUM sizeof(uint64)
+ uint64 hash;
+} fasthash_state;
+
+static inline uint64
+fasthash_mix(uint64 h)
+{
+ h ^= h >> 23;
+ h *= 0x2127599bf4325c37ULL;
+ h ^= h >> 47;
+ return h;
+}
+
+static inline void
+fasthash_combine(fasthash_state* hs)
+{
+ hs->hash ^= fasthash_mix(hs->accum);
+ hs->hash *= 0x880355f21e6d1965ULL;
+
+ /* reset hash state for next input */
+ hs->accum = 0;
+}
+
+static inline void
+fasthash_init(fasthash_state *hs, int len, uint64 seed)
+{
+ memset(hs, 0, sizeof(fasthash_state));
+
+ // since we don't know the length for a nul-terminated string
+ // handle some other way -- maybe we can accum the length in
+ // the state and fold it in during the finalizer (cf. xxHash3)
+ hs->hash = seed ^ (len * 0x880355f21e6d1965ULL);
+}
+
+static inline void
+fasthash_accum(fasthash_state *hs, const unsigned char *k, int len)
+{
+ Assert(hs->accum == 0);
+ Assert(len <= FH_SIZEOF_ACCUM);
+
+ switch (len)
+ {
+ case 8: memcpy(&hs->accum, k, 8);
+ break;
+ case 7: hs->accum |= (uint64) k[6] << 48;
+ /* FALLTHROUGH */
+ case 6: hs->accum |= (uint64) k[5] << 40;
+ /* FALLTHROUGH */
+ case 5: hs->accum |= (uint64) k[4] << 32;
+ /* FALLTHROUGH */
+ case 4: hs->accum |= (uint64) k[3] << 24;
+ /* FALLTHROUGH */
+ case 3: hs->accum |= (uint64) k[2] << 16;
+ /* FALLTHROUGH */
+ case 2: hs->accum |= (uint64) k[1] << 8;
+ /* FALLTHROUGH */
+ case 1: hs->accum |= (uint64) k[0];
+ break;
+ case 0:
+ return;
+ }
+
+ fasthash_combine(hs);
+}
+
+
+static inline uint64
+fasthash_final64(fasthash_state *hs)
+{
+ return fasthash_mix(hs->hash);
+}
+
+static inline uint32
+fasthash_final32(fasthash_state *hs)
+{
+ // the following trick converts the 64-bit hashcode to Fermat
+ // residue, which shall retain information from both the higher
+ // and lower parts of hashcode.
+ uint64 h = fasthash_final64(hs);
+ return h - (h >> 32);
+}
+
+static inline uint64
+fasthash64(const unsigned char * k, int len, uint64 seed)
+{
+ fasthash_state hs;
+
+ fasthash_init(&hs, len, seed);
+
+ while (len >= FH_SIZEOF_ACCUM)
+ {
+ fasthash_accum(&hs, k, FH_SIZEOF_ACCUM);
+ k += FH_SIZEOF_ACCUM;
+ len -= FH_SIZEOF_ACCUM;
+ }
+
+ fasthash_accum(&hs, k, len);
+ return fasthash_final64(&hs);
+}
+
+static inline uint64
+fasthash32(const unsigned char * k, int len, uint64 seed)
+{
+ uint64 h = fasthash64(k, len, seed);
+ return h - (h >> 32);
+}
+
+
+// XXX NOT FOR COMMIT
// Compression function for Merkle-Damgard construction.
// This function is generated using the framework provided.
-#define mix(h) ({ \
- (h) ^= (h) >> 23; \
- (h) *= 0x2127599bf4325c37ULL; \
- (h) ^= (h) >> 47; })
+static inline uint64_t mix(uint64_t h) {
+ h ^= h >> 23;
+ h *= 0x2127599bf4325c37ULL;
+ h ^= h >> 47;
+ return h;
+}
-uint64_t fasthash64(const void *buf, size_t len, uint64_t seed)
+static inline
+uint64_t fasthash64_orig(const void *buf, size_t len, uint64_t seed)
{
const uint64_t m = 0x880355f21e6d1965ULL;
const uint64_t *pos = (const uint64_t *)buf;
@@ -52,11 +188,17 @@ uint64_t fasthash64(const void *buf, size_t len, uint64_t seed)
switch (len & 7) {
case 7: v ^= (uint64_t)pos2[6] << 48;
+ /* FALLTHROUGH */
case 6: v ^= (uint64_t)pos2[5] << 40;
+ /* FALLTHROUGH */
case 5: v ^= (uint64_t)pos2[4] << 32;
+ /* FALLTHROUGH */
case 4: v ^= (uint64_t)pos2[3] << 24;
+ /* FALLTHROUGH */
case 3: v ^= (uint64_t)pos2[2] << 16;
+ /* FALLTHROUGH */
case 2: v ^= (uint64_t)pos2[1] << 8;
+ /* FALLTHROUGH */
case 1: v ^= (uint64_t)pos2[0];
h ^= mix(v);
h *= m;
@@ -65,11 +207,14 @@ uint64_t fasthash64(const void *buf, size_t len, uint64_t seed)
return mix(h);
}
-uint32_t fasthash32(const void *buf, size_t len, uint32_t seed)
+static inline
+uint32_t fasthash32_orig(const void *buf, size_t len, uint32_t seed)
{
// the following trick converts the 64-bit hashcode to Fermat
// residue, which shall retain information from both the higher
// and lower parts of hashcode.
- uint64_t h = fasthash64(buf, len, seed);
+ uint64_t h = fasthash64_orig(buf, len, seed);
return h - (h >> 32);
}
+
+#endif /* HASHFN_UNSTABLE_H */
--
2.43.0
v9-0003-Fix-alignment-issue-in-the-original-fastash.patchapplication/x-patch; name=v9-0003-Fix-alignment-issue-in-the-original-fastash.patchDownload
From 4f875eb84567ccaa31cfc4e01ccd61acb43f9a58 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sun, 10 Dec 2023 21:27:13 +0700
Subject: [PATCH v9 3/6] Fix alignment issue in the original fastash
Found by UBSan in CI
---
src/include/common/hashfn_unstable.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index fbae7a5522..a167681c86 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -178,9 +178,10 @@ uint64_t fasthash64_orig(const void *buf, size_t len, uint64_t seed)
uint64_t v;
while (pos != end) {
- v = *pos++;
+ memcpy(&v, pos, 8);
h ^= mix(v);
h *= m;
+ pos++;
}
pos2 = (const unsigned char*)pos;
--
2.43.0
v9-0005-Remove-ULL.patchapplication/x-patch; name=v9-0005-Remove-ULL.patchDownload
From 63b25e788c81bc47342f28e1e3e489fae8c61d26 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Thu, 14 Dec 2023 19:03:12 +0700
Subject: [PATCH v9 5/6] Remove ULL
---
src/include/common/hashfn_unstable.h | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index a167681c86..e8ca39fed2 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -56,7 +56,7 @@ static inline uint64
fasthash_mix(uint64 h)
{
h ^= h >> 23;
- h *= 0x2127599bf4325c37ULL;
+ h *= 0x2127599bf4325c37;
h ^= h >> 47;
return h;
}
@@ -65,7 +65,7 @@ static inline void
fasthash_combine(fasthash_state* hs)
{
hs->hash ^= fasthash_mix(hs->accum);
- hs->hash *= 0x880355f21e6d1965ULL;
+ hs->hash *= 0x880355f21e6d1965;
/* reset hash state for next input */
hs->accum = 0;
@@ -79,7 +79,7 @@ fasthash_init(fasthash_state *hs, int len, uint64 seed)
// since we don't know the length for a nul-terminated string
// handle some other way -- maybe we can accum the length in
// the state and fold it in during the finalizer (cf. xxHash3)
- hs->hash = seed ^ (len * 0x880355f21e6d1965ULL);
+ hs->hash = seed ^ (len * 0x880355f21e6d1965);
}
static inline void
@@ -162,7 +162,7 @@ fasthash32(const unsigned char * k, int len, uint64 seed)
// This function is generated using the framework provided.
static inline uint64_t mix(uint64_t h) {
h ^= h >> 23;
- h *= 0x2127599bf4325c37ULL;
+ h *= 0x2127599bf4325c37;
h ^= h >> 47;
return h;
}
@@ -170,7 +170,7 @@ static inline uint64_t mix(uint64_t h) {
static inline
uint64_t fasthash64_orig(const void *buf, size_t len, uint64_t seed)
{
- const uint64_t m = 0x880355f21e6d1965ULL;
+ const uint64_t m = 0x880355f21e6d1965;
const uint64_t *pos = (const uint64_t *)buf;
const uint64_t *end = pos + (len / 8);
const unsigned char *pos2;
--
2.43.0
v9-0001-Vendor-fasthash.patchapplication/x-patch; name=v9-0001-Vendor-fasthash.patchDownload
From 6bd53c5ce11a44d39ad1deccc6c84e32b0d9a9fd Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Mon, 27 Nov 2023 17:03:38 +0700
Subject: [PATCH v9 1/6] Vendor fasthash
MIT licensed
---
src/include/common/hashfn_unstable.h | 75 ++++++++++++++++++++++++++++
1 file changed, 75 insertions(+)
create mode 100644 src/include/common/hashfn_unstable.h
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
new file mode 100644
index 0000000000..a5bf965fa2
--- /dev/null
+++ b/src/include/common/hashfn_unstable.h
@@ -0,0 +1,75 @@
+/* The MIT License
+
+ Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
+
+ Permission is hereby granted, free of charge, to any person
+ obtaining a copy of this software and associated documentation
+ files (the "Software"), to deal in the Software without
+ restriction, including without limitation the rights to use, copy,
+ modify, merge, publish, distribute, sublicense, and/or sell copies
+ of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+*/
+
+#include "fasthash.h"
+
+// Compression function for Merkle-Damgard construction.
+// This function is generated using the framework provided.
+#define mix(h) ({ \
+ (h) ^= (h) >> 23; \
+ (h) *= 0x2127599bf4325c37ULL; \
+ (h) ^= (h) >> 47; })
+
+uint64_t fasthash64(const void *buf, size_t len, uint64_t seed)
+{
+ const uint64_t m = 0x880355f21e6d1965ULL;
+ const uint64_t *pos = (const uint64_t *)buf;
+ const uint64_t *end = pos + (len / 8);
+ const unsigned char *pos2;
+ uint64_t h = seed ^ (len * m);
+ uint64_t v;
+
+ while (pos != end) {
+ v = *pos++;
+ h ^= mix(v);
+ h *= m;
+ }
+
+ pos2 = (const unsigned char*)pos;
+ v = 0;
+
+ switch (len & 7) {
+ case 7: v ^= (uint64_t)pos2[6] << 48;
+ case 6: v ^= (uint64_t)pos2[5] << 40;
+ case 5: v ^= (uint64_t)pos2[4] << 32;
+ case 4: v ^= (uint64_t)pos2[3] << 24;
+ case 3: v ^= (uint64_t)pos2[2] << 16;
+ case 2: v ^= (uint64_t)pos2[1] << 8;
+ case 1: v ^= (uint64_t)pos2[0];
+ h ^= mix(v);
+ h *= m;
+ }
+
+ return mix(h);
+}
+
+uint32_t fasthash32(const void *buf, size_t len, uint32_t seed)
+{
+ // the following trick converts the 64-bit hashcode to Fermat
+ // residue, which shall retain information from both the higher
+ // and lower parts of hashcode.
+ uint64_t h = fasthash64(buf, len, seed);
+ return h - (h >> 32);
+}
--
2.43.0
I wrote:
Updated next steps:
* Add some desperately needed explanatory comments.
There is a draft of this in v10-0001. I also removed the validation
scaffolding and ran pgindent. This could use some review and/or
bikeshedding, in particular on the name hashfn_unstable.h. I also
considered *_volatile.h or *_inmemory.h, but nothing stands out as
more clear.
* Use this in some existing cases where it makes sense.
For now just two:
v10-0002 is Jeff's change to the search path cache, but with the
chunked interface that I found to be faster.
v10-0003 is a copy of something buried in an earlier version: use in
pgstat. Looks nicer, but not yet tested.
Attachments:
v10-0003-Use-fasthash32-for-pgstat_hash_hash_key.patchtext/x-patch; charset=US-ASCII; name=v10-0003-Use-fasthash32-for-pgstat_hash_hash_key.patchDownload
From 9999aecbc51780ada4634855727c50e3b85a8f7f Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sat, 9 Dec 2023 16:24:56 +0700
Subject: [PATCH v10 3/3] Use fasthash32 for pgstat_hash_hash_key
Currently this calls the 32-bit Murmur finalizer on the three elements,
then joined with hash_combine(). This is simpler and has better
collision guarantees.
WIP: Make sure performance is at least comparable.
WIP: We may not need the full 32-bit finalizer reducing step.
It would be slightly cheaper to just use fasthash64 and then take
the lower 32 bits.
Discussion: (none yet, buried in a related patchset)
---
src/include/utils/pgstat_internal.h | 9 ++-------
1 file changed, 2 insertions(+), 7 deletions(-)
diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h
index 60fbf9394b..ecc46bef04 100644
--- a/src/include/utils/pgstat_internal.h
+++ b/src/include/utils/pgstat_internal.h
@@ -14,7 +14,7 @@
#define PGSTAT_INTERNAL_H
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "lib/dshash.h"
#include "lib/ilist.h"
#include "pgstat.h"
@@ -777,15 +777,10 @@ static inline uint32
pgstat_hash_hash_key(const void *d, size_t size, void *arg)
{
const PgStat_HashKey *key = (PgStat_HashKey *) d;
- uint32 hash;
Assert(size == sizeof(PgStat_HashKey) && arg == NULL);
- hash = murmurhash32(key->kind);
- hash = hash_combine(hash, murmurhash32(key->dboid));
- hash = hash_combine(hash, murmurhash32(key->objoid));
-
- return hash;
+ return fasthash32((const char *) key, size, 0);
}
/*
--
2.43.0
v10-0002-Use-fasthash-for-the-search-path-cache.patchtext/x-patch; charset=US-ASCII; name=v10-0002-Use-fasthash-for-the-search-path-cache.patchDownload
From c7bd727b24a8935343df6fb24d10948fa6d4d57c Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Mon, 18 Dec 2023 11:10:28 +0700
Subject: [PATCH v10 2/3] Use fasthash for the search path cache
This serves to demonstrate the incremental API, allowing inlined
hash calculation without a strlen call. This brings the general case
performance closer to the optimization done in commit a86c61c9ee.
WIP: roleid should be mixed in normally, unless we have
reason to just use it as a seed.
Jeff Davis, with switch to chunked interface by me
Discussion: https://www.postgresql.org/message-id/b40292c99e623defe5eadedab1d438cf51a4107c.camel%40j-davis.com
---
src/backend/catalog/namespace.c | 24 +++++++++++++++++++-----
1 file changed, 19 insertions(+), 5 deletions(-)
diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index 5027efc91d..7fe2fd1fd4 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -41,7 +41,7 @@
#include "catalog/pg_ts_template.h"
#include "catalog/pg_type.h"
#include "commands/dbcommands.h"
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "funcapi.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
@@ -247,11 +247,25 @@ static bool MatchNamedCall(HeapTuple proctup, int nargs, List *argnames,
static inline uint32
spcachekey_hash(SearchPathCacheKey key)
{
- const unsigned char *bytes = (const unsigned char *) key.searchPath;
- int blen = strlen(key.searchPath);
+ const char *const start = key.searchPath;
+ const char *buf = key.searchPath;
+ fasthash_state hs;
- return hash_combine(hash_bytes(bytes, blen),
- hash_uint32(key.roleid));
+ /* WIP: maybe roleid should be mixed in normally */
+ fasthash_init(&hs, FH_UNKNOWN_LENGTH, key.roleid);
+ while (*buf)
+ {
+ int chunk_len = 0;
+
+ while (chunk_len < FH_SIZEOF_ACCUM && buf[chunk_len] != '\0')
+ chunk_len++;
+
+ fasthash_accum(&hs, buf, chunk_len);
+ buf += chunk_len;
+ }
+
+ /* pass the length to tweak the final mix */
+ return fasthash_final32(&hs, buf - start);
}
static inline bool
--
2.43.0
v10-0001-Add-inlineable-incremental-hash-functions-for-in.patchtext/x-patch; charset=US-ASCII; name=v10-0001-Add-inlineable-incremental-hash-functions-for-in.patchDownload
From a990c20cab3c293a514b0c5120dfb83a3258e666 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Mon, 27 Nov 2023 17:03:38 +0700
Subject: [PATCH v10 1/3] Add inlineable, incremental hash functions for
in-memory use
A number of places hash NUL-termminated strings. Currently, we need
to call strlen first because hash_bytes needs the length. For short
strings the C library call has a large overhead, and strlen calls
show up prominently in profiles.
Per suggestion from Andres Freund, add hash functions with an
incremental interface. Instead of trying to whack around hash_bytes
while maintaining its current behavior on all platforms, we base
this work on fasthash (MIT licensed) which is simple, faster than
hash_bytes for inputs over 12 bytes long, and also passes the hash
function testing suite SMHasher.
The original functions have been reimplemented using our new
incremental interface to validate that this method will still give the
same answer, provided we have the input length ahead of time. Future
work will use these for some existing uses of simplehash and dynahash.
The new functionality lives in a new header hashfn_unstable.h. The
name implies we have the freedom to change things across versions that
would be unacceptable for our other hash functions that are used for
e.g. hash indexes and hash partitioning. As such, these should only
be used for in-memory data structures like hash tables. There is also
no guarantee of being endian-independent.
Reviewed (in an earlier version) by Heikki Linnakangas
Discussion: https://www.postgresql.org/message-id/20231122223432.lywt4yz2bn7tlp27%40awork3.anarazel.de
---
src/include/common/hashfn_unstable.h | 213 +++++++++++++++++++++++++++
src/tools/pgindent/typedefs.list | 1 +
2 files changed, 214 insertions(+)
create mode 100644 src/include/common/hashfn_unstable.h
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
new file mode 100644
index 0000000000..bf1dbee28d
--- /dev/null
+++ b/src/include/common/hashfn_unstable.h
@@ -0,0 +1,213 @@
+/*
+Building blocks for creating fast inlineable hash functions. The
+unstable designation is in contrast to hashfn.h, which cannot break
+compatibility because hashes can be writen to disk and so must produce
+the same hashes between versions.
+
+ *
+ * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group
+ *
+ * src/include/common/hashfn_unstable.c
+ */
+
+#ifndef HASHFN_UNSTABLE_H
+#define HASHFN_UNSTABLE_H
+
+/*
+ * fasthash is a modification of code taken from
+ * https://code.google.com/archive/p/fast-hash/source/default/source
+ * under the terms of the MIT licencse. The original copyright
+ * notice follows:
+ */
+
+/* The MIT License
+
+ Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
+
+ Permission is hereby granted, free of charge, to any person
+ obtaining a copy of this software and associated documentation
+ files (the "Software"), to deal in the Software without
+ restriction, including without limitation the rights to use, copy,
+ modify, merge, publish, distribute, sublicense, and/or sell copies
+ of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+*/
+
+/*
+ * There are two interfaces available. Examples assume a 32-bit hash:
+ *
+ * 1) When the length is known ahead of time, use fasthash32().
+ * 2) When the length is not known, use the incremental interface. To
+ * ensure good results, keep track of the length and pass it to the finalizer:
+
+fasthash_state hs;
+fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0);
+fasthash_accum(&hs, <pointer to a chunk of the input>, <chunk length, up to 8>);
+return fasthash_final32(&hs, <final length>);
+
+*/
+
+
+typedef struct fasthash_state
+{
+ uint64 accum;
+#define FH_SIZEOF_ACCUM sizeof(uint64)
+
+ uint64 hash;
+} fasthash_state;
+
+
+#define FH_UNKNOWN_LENGTH 1
+
+/*
+ * Initialize the hash state.
+ *
+ * "len" is the length of the input, if known ahead of time.
+ * If that is not known, pass FH_UNKNOWN_LENGTH.
+ * "seed" can be zero.
+ */
+static inline void
+fasthash_init(fasthash_state *hs, int len, uint64 seed)
+{
+ memset(hs, 0, sizeof(fasthash_state));
+ hs->hash = seed ^ (len * 0x880355f21e6d1965);
+}
+
+/* Both the finalizer and part of the combining step */
+static inline uint64
+fasthash_mix(uint64 h, uint64 tweak)
+{
+ h ^= (h >> 23) + tweak;
+ h *= 0x2127599bf4325c37;
+ h ^= h >> 47;
+ return h;
+}
+
+static inline void
+fasthash_combine(fasthash_state *hs)
+{
+ hs->hash ^= fasthash_mix(hs->accum, 0);
+ hs->hash *= 0x880355f21e6d1965;
+
+ /* reset hash state for next input */
+ hs->accum = 0;
+}
+
+/* Accumulate up to 8 bytes of input and combine it into the hash */
+static inline void
+fasthash_accum(fasthash_state *hs, const char *k, int len)
+{
+ Assert(hs->accum == 0);
+ Assert(len <= FH_SIZEOF_ACCUM);
+
+ switch (len)
+ {
+ case 8:
+ memcpy(&hs->accum, k, 8);
+ break;
+ case 7:
+ hs->accum |= (uint64) k[6] << 48;
+ /* FALLTHROUGH */
+ case 6:
+ hs->accum |= (uint64) k[5] << 40;
+ /* FALLTHROUGH */
+ case 5:
+ hs->accum |= (uint64) k[4] << 32;
+ /* FALLTHROUGH */
+ case 4:
+ hs->accum |= (uint64) k[3] << 24;
+ /* FALLTHROUGH */
+ case 3:
+ hs->accum |= (uint64) k[2] << 16;
+ /* FALLTHROUGH */
+ case 2:
+ hs->accum |= (uint64) k[1] << 8;
+ /* FALLTHROUGH */
+ case 1:
+ hs->accum |= (uint64) k[0];
+ break;
+ case 0:
+ return;
+ }
+
+ fasthash_combine(hs);
+}
+
+/*
+ * The finalizer
+ *
+ * "tweak" is the input length when the caller doesn't know
+ * the length ahead of time, such as for NUL-terminated
+ * strings, otherwise zero.
+ */
+static inline uint64
+fasthash_final64(fasthash_state *hs, uint64 tweak)
+{
+ return fasthash_mix(hs->hash, tweak);
+}
+
+/*
+ * Reduce a 64-bit hash to a 32-bit hash.
+ *
+ * This provides a bit more additional mixing compared to
+ * just taking the lower 32-bits.
+ */
+static inline uint32
+fasthash_reduce32(uint64 h)
+{
+ /*
+ * The following trick converts the 64-bit hashcode to Fermat residue,
+ * which shall retain information from both the higher and lower parts of
+ * hashcode.
+ */
+ return h - (h >> 32);
+}
+
+static inline uint32
+fasthash_final32(fasthash_state *hs, uint64 tweak)
+{
+ return fasthash_reduce32(fasthash_final64(hs, tweak));
+}
+
+/*
+ * The original fasthash64 function, re-implemented using
+ * the incremental interface.
+ */
+static inline uint64
+fasthash64(const char *k, int len, uint64 seed)
+{
+ fasthash_state hs;
+
+ fasthash_init(&hs, len, seed);
+
+ while (len >= FH_SIZEOF_ACCUM)
+ {
+ fasthash_accum(&hs, k, FH_SIZEOF_ACCUM);
+ k += FH_SIZEOF_ACCUM;
+ len -= FH_SIZEOF_ACCUM;
+ }
+
+ fasthash_accum(&hs, k, len);
+ return fasthash_final64(&hs, 0);
+}
+
+/* Like fasthash64, but returns a 32-bit hash */
+static inline uint64
+fasthash32(const char *k, int len, uint64 seed)
+{
+ return fasthash_reduce32(fasthash64(k, len, seed));
+}
+
+#endif /* HASHFN_UNSTABLE_H */
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index d659adbfd6..4038d07458 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3321,6 +3321,7 @@ exec_thread_arg
execution_state
explain_get_index_name_hook_type
f_smgr
+fasthash_state
fd_set
fe_scram_state
fe_scram_state_enum
--
2.43.0
On Mon, 2023-12-18 at 13:39 +0700, John Naylor wrote:
For now just two:
v10-0002 is Jeff's change to the search path cache, but with the
chunked interface that I found to be faster.
Did you consider specializing for the case of an aligned pointer? If
it's a string (c string or byte string) it's almost always going to be
aligned, right?
I hacked up a patch (attached). I lost track of which benchmark we're
using to test the performance, but when I test in a loop it seems
substantially faster.
It reads past the NUL byte, but only to the next alignment boundary,
which I think is OK (though I think I'd need to fix the patch for when
maxalign < 8).
Regards,
Jeff Davis
Attachments:
v10jd-0004-Optimize-hash-function-further.patchtext/x-patch; charset=UTF-8; name=v10jd-0004-Optimize-hash-function-further.patchDownload
From 055d5cc24404584fd98109fabdcf83348e5c49b4 Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Mon, 18 Dec 2023 16:44:27 -0800
Subject: [PATCH v10jd] Optimize hash function further.
---
src/backend/catalog/namespace.c | 46 +++++++++++++++++++++++++---
src/include/common/hashfn_unstable.h | 9 ++++++
2 files changed, 51 insertions(+), 4 deletions(-)
diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index c23f46aca3..368a7fabec 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -245,15 +245,44 @@ static bool MatchNamedCall(HeapTuple proctup, int nargs, List *argnames,
* offers a more convenient API.
*/
+/* From: https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord */
+#define haszero64(v) \
+ (((v) - 0x0101010101010101UL) & ~(v) & 0x8080808080808080UL)
+
static inline uint32
-spcachekey_hash(SearchPathCacheKey key)
+cstring_hash_aligned(const char *str, uint64 seed)
+{
+ const char *const start = str;
+ const char *buf = start;
+ int chunk_len = 0;
+ fasthash_state hs;
+
+ fasthash_init(&hs, FH_UNKNOWN_LENGTH, seed);
+
+ Assert(PointerIsAligned(start, uint64));
+ while (!haszero64(*(uint64 *)buf))
+ {
+ fasthash_accum64(&hs, buf);
+ buf += sizeof(uint64);
+ }
+
+ while (buf[chunk_len] != '\0')
+ chunk_len++;
+ fasthash_accum(&hs, buf, chunk_len);
+ buf += chunk_len;
+
+ return fasthash_final32(&hs, buf - start);
+}
+
+static inline uint32
+cstring_hash_unaligned(const char *str, uint64 seed)
{
- const char *const start = key.searchPath;
- const char *buf = key.searchPath;
+ const char *const start = str;
+ const char *buf = str;
fasthash_state hs;
/* WIP: maybe roleid should be mixed in normally */
- fasthash_init(&hs, FH_UNKNOWN_LENGTH, key.roleid);
+ fasthash_init(&hs, FH_UNKNOWN_LENGTH, seed);
while (*buf)
{
int chunk_len = 0;
@@ -269,6 +298,15 @@ spcachekey_hash(SearchPathCacheKey key)
return fasthash_final32(&hs, buf - start);
}
+static inline uint32
+spcachekey_hash(SearchPathCacheKey key)
+{
+ if (PointerIsAligned(key.searchPath, uint64))
+ return cstring_hash_aligned(key.searchPath, key.roleid);
+ else
+ return cstring_hash_unaligned(key.searchPath, key.roleid);
+}
+
static inline bool
spcachekey_equal(SearchPathCacheKey a, SearchPathCacheKey b)
{
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index bf1dbee28d..553fab0415 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -105,6 +105,15 @@ fasthash_combine(fasthash_state *hs)
hs->accum = 0;
}
+/* Accumulate 8 bytes from aligned pointer and combine it into the hash */
+static inline void
+fasthash_accum64(fasthash_state *hs, const char *ptr)
+{
+ Assert(PointerIsAligned(ptr, uint64));
+ hs->accum = *(uint64 *)ptr;
+ fasthash_combine(hs);
+}
+
/* Accumulate up to 8 bytes of input and combine it into the hash */
static inline void
fasthash_accum(fasthash_state *hs, const char *k, int len)
--
2.34.1
On Tue, Dec 19, 2023 at 2:32 PM Jeff Davis <pgsql@j-davis.com> wrote:
On Mon, 2023-12-18 at 13:39 +0700, John Naylor wrote:
For now just two:
v10-0002 is Jeff's change to the search path cache, but with the
chunked interface that I found to be faster.Did you consider specializing for the case of an aligned pointer? If
it's a string (c string or byte string) it's almost always going to be
aligned, right?
That wasn't the next place I thought to look (that would be the strcmp
call), but something like this could be worthwhile.
If we went this far, I'd like to get more use out of it than one call
site. I think a few other places have as their hash key a string along
with other values, so maybe we can pass an initialized hash state for
strings separately from combining in the other values. Dynahash will
still need to deal with truncation, so would need duplicate coding,
but I'm guessing with that truncation check it's makes an optimization
like you propose even more worthwhile.
I hacked up a patch (attached). I lost track of which benchmark we're
using to test the performance, but when I test in a loop it seems
substantially faster.
That's interesting. Note that there is no need for a new
fasthash_accum64(), since we can do
fasthash_accum(&hs, buf, FH_SIZEOF_ACCUM);
...and the compiler should elide the switch statement.
It reads past the NUL byte, but only to the next alignment boundary,
which I think is OK (though I think I'd need to fix the patch for when
maxalign < 8).
Seems like it, on both accounts.
On Tue, 2023-12-19 at 16:23 +0700, John Naylor wrote:
That wasn't the next place I thought to look (that would be the
strcmp
call), but something like this could be worthwhile.
The reason I looked here is that the inner while statement (to find the
chunk size) looked out of place and possibly slow, and there's a
bitwise trick we can use instead.
My original test case is a bit too "macro" of a benchmark at this
point, so I'm not sure it's a good guide for these individual micro-
optimizations.
Regards,
Jeff Davis
On Wed, Dec 20, 2023 at 3:23 AM Jeff Davis <pgsql@j-davis.com> wrote:
On Tue, 2023-12-19 at 16:23 +0700, John Naylor wrote:
That wasn't the next place I thought to look (that would be the
strcmp
call), but something like this could be worthwhile.The reason I looked here is that the inner while statement (to find the
chunk size) looked out of place and possibly slow, and there's a
bitwise trick we can use instead.
There are other bit tricks we can use. In v11-0005 Just for fun, I
translated a couple more into C from
https://github.com/openbsd/src/blob/master/lib/libc/arch/amd64/string/strlen.S
Attachments:
v11-0003-Use-fasthash32-for-pgstat_hash_hash_key.patchtext/x-patch; charset=US-ASCII; name=v11-0003-Use-fasthash32-for-pgstat_hash_hash_key.patchDownload
From 9999aecbc51780ada4634855727c50e3b85a8f7f Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sat, 9 Dec 2023 16:24:56 +0700
Subject: [PATCH v11 3/5] Use fasthash32 for pgstat_hash_hash_key
Currently this calls the 32-bit Murmur finalizer on the three elements,
then joined with hash_combine(). This is simpler and has better
collision guarantees.
WIP: Make sure performance is at least comparable.
WIP: We may not need the full 32-bit finalizer reducing step.
It would be slightly cheaper to just use fasthash64 and then take
the lower 32 bits.
Discussion: (none yet, buried in a related patchset)
---
src/include/utils/pgstat_internal.h | 9 ++-------
1 file changed, 2 insertions(+), 7 deletions(-)
diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h
index 60fbf9394b..ecc46bef04 100644
--- a/src/include/utils/pgstat_internal.h
+++ b/src/include/utils/pgstat_internal.h
@@ -14,7 +14,7 @@
#define PGSTAT_INTERNAL_H
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "lib/dshash.h"
#include "lib/ilist.h"
#include "pgstat.h"
@@ -777,15 +777,10 @@ static inline uint32
pgstat_hash_hash_key(const void *d, size_t size, void *arg)
{
const PgStat_HashKey *key = (PgStat_HashKey *) d;
- uint32 hash;
Assert(size == sizeof(PgStat_HashKey) && arg == NULL);
- hash = murmurhash32(key->kind);
- hash = hash_combine(hash, murmurhash32(key->dboid));
- hash = hash_combine(hash, murmurhash32(key->objoid));
-
- return hash;
+ return fasthash32((const char *) key, size, 0);
}
/*
--
2.43.0
v11-0002-Use-fasthash-for-the-search-path-cache.patchtext/x-patch; charset=US-ASCII; name=v11-0002-Use-fasthash-for-the-search-path-cache.patchDownload
From c7bd727b24a8935343df6fb24d10948fa6d4d57c Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Mon, 18 Dec 2023 11:10:28 +0700
Subject: [PATCH v11 2/5] Use fasthash for the search path cache
This serves to demonstrate the incremental API, allowing inlined
hash calculation without a strlen call. This brings the general case
performance closer to the optimization done in commit a86c61c9ee.
WIP: roleid should be mixed in normally, unless we have
reason to just use it as a seed.
Jeff Davis, with switch to chunked interface by me
Discussion: https://www.postgresql.org/message-id/b40292c99e623defe5eadedab1d438cf51a4107c.camel%40j-davis.com
---
src/backend/catalog/namespace.c | 24 +++++++++++++++++++-----
1 file changed, 19 insertions(+), 5 deletions(-)
diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index 5027efc91d..7fe2fd1fd4 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -41,7 +41,7 @@
#include "catalog/pg_ts_template.h"
#include "catalog/pg_type.h"
#include "commands/dbcommands.h"
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "funcapi.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
@@ -247,11 +247,25 @@ static bool MatchNamedCall(HeapTuple proctup, int nargs, List *argnames,
static inline uint32
spcachekey_hash(SearchPathCacheKey key)
{
- const unsigned char *bytes = (const unsigned char *) key.searchPath;
- int blen = strlen(key.searchPath);
+ const char *const start = key.searchPath;
+ const char *buf = key.searchPath;
+ fasthash_state hs;
- return hash_combine(hash_bytes(bytes, blen),
- hash_uint32(key.roleid));
+ /* WIP: maybe roleid should be mixed in normally */
+ fasthash_init(&hs, FH_UNKNOWN_LENGTH, key.roleid);
+ while (*buf)
+ {
+ int chunk_len = 0;
+
+ while (chunk_len < FH_SIZEOF_ACCUM && buf[chunk_len] != '\0')
+ chunk_len++;
+
+ fasthash_accum(&hs, buf, chunk_len);
+ buf += chunk_len;
+ }
+
+ /* pass the length to tweak the final mix */
+ return fasthash_final32(&hs, buf - start);
}
static inline bool
--
2.43.0
v11-0005-Optimize-tail-with-inspiration-from-OpenBSD.patchtext/x-patch; charset=US-ASCII; name=v11-0005-Optimize-tail-with-inspiration-from-OpenBSD.patchDownload
From ec447cc9a9718421883d9619e9dde1b5df3ada9c Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Wed, 20 Dec 2023 13:08:46 +0700
Subject: [PATCH v11 5/5] Optimize tail with inspiration from OpenBSD
This only works on little endian, so add guard for that and
for 64-bit. Word-at-a-time NUL checks are not worth the
extra complexity for 32-bit platforms. There is an algorithm
that works for big-endian, but this is all just demonstration
anyway.
---
src/backend/catalog/namespace.c | 24 +++++++++++++++++++++---
1 file changed, 21 insertions(+), 3 deletions(-)
diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index cb840ce9dd..2046d6788d 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -253,20 +253,36 @@ cstring_hash_aligned(const char *str, uint64 seed)
{
const char *const start = str;
const char *buf = start;
+ // todo: this is now really "remainder"
int chunk_len = 0;
+ uint64 zero_bytes, chunk;
fasthash_state hs;
fasthash_init(&hs, FH_UNKNOWN_LENGTH, seed);
+ // WIP: if this is the common case, we could have an "unlikely" bytewise preamble
Assert(PointerIsAligned(start, uint64));
- while (!haszero64(*(uint64 *)buf))
+ while (true)
{
+ chunk = *(uint64 *)buf;
+ zero_bytes = (chunk - 0x0101010101010101UL) & 0x8080808080808080UL;
+
+ // WIP: this is from OpenBSD strlen -- the extra branch is probably not worth it for short strings
+ if (zero_bytes)
+ {
+ // only needed if the input can have the high bit set
+ zero_bytes &= ~chunk;
+ if (zero_bytes)
+ break;
+ }
+ // WIP: since we have the chunk already, maybe just combine it directly?
fasthash_accum64(&hs, buf);
buf += sizeof(uint64);
}
- while (buf[chunk_len] != '\0')
- chunk_len++;
+ // XXX this only works for little endian machines. See
+ // https://github.com/openbsd/src/blob/master/lib/libc/arch/amd64/string/strlen.S
+ chunk_len = (63 - pg_leftmost_one_pos64(zero_bytes)) / BITS_PER_BYTE;
fasthash_accum(&hs, buf, chunk_len);
buf += chunk_len;
@@ -300,9 +316,11 @@ cstring_hash_unaligned(const char *str, uint64 seed)
static inline uint32
spcachekey_hash(SearchPathCacheKey key)
{
+#if ((SIZEOF_VOIDP == 8) && !defined(WORDS_BIGENDIAN))
if (PointerIsAligned(key.searchPath, uint64))
return cstring_hash_aligned(key.searchPath, key.roleid);
else
+#endif
return cstring_hash_unaligned(key.searchPath, key.roleid);
}
--
2.43.0
v11-0001-Add-inlineable-incremental-hash-functions-for-in.patchtext/x-patch; charset=US-ASCII; name=v11-0001-Add-inlineable-incremental-hash-functions-for-in.patchDownload
From a990c20cab3c293a514b0c5120dfb83a3258e666 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Mon, 27 Nov 2023 17:03:38 +0700
Subject: [PATCH v11 1/5] Add inlineable, incremental hash functions for
in-memory use
A number of places hash NUL-termminated strings. Currently, we need
to call strlen first because hash_bytes needs the length. For short
strings the C library call has a large overhead, and strlen calls
show up prominently in profiles.
Per suggestion from Andres Freund, add hash functions with an
incremental interface. Instead of trying to whack around hash_bytes
while maintaining its current behavior on all platforms, we base
this work on fasthash (MIT licensed) which is simple, faster than
hash_bytes for inputs over 12 bytes long, and also passes the hash
function testing suite SMHasher.
The original functions have been reimplemented using our new
incremental interface to validate that this method will still give the
same answer, provided we have the input length ahead of time. Future
work will use these for some existing uses of simplehash and dynahash.
The new functionality lives in a new header hashfn_unstable.h. The
name implies we have the freedom to change things across versions that
would be unacceptable for our other hash functions that are used for
e.g. hash indexes and hash partitioning. As such, these should only
be used for in-memory data structures like hash tables. There is also
no guarantee of being endian-independent.
Reviewed (in an earlier version) by Heikki Linnakangas
Discussion: https://www.postgresql.org/message-id/20231122223432.lywt4yz2bn7tlp27%40awork3.anarazel.de
---
src/include/common/hashfn_unstable.h | 213 +++++++++++++++++++++++++++
src/tools/pgindent/typedefs.list | 1 +
2 files changed, 214 insertions(+)
create mode 100644 src/include/common/hashfn_unstable.h
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
new file mode 100644
index 0000000000..bf1dbee28d
--- /dev/null
+++ b/src/include/common/hashfn_unstable.h
@@ -0,0 +1,213 @@
+/*
+Building blocks for creating fast inlineable hash functions. The
+unstable designation is in contrast to hashfn.h, which cannot break
+compatibility because hashes can be writen to disk and so must produce
+the same hashes between versions.
+
+ *
+ * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group
+ *
+ * src/include/common/hashfn_unstable.c
+ */
+
+#ifndef HASHFN_UNSTABLE_H
+#define HASHFN_UNSTABLE_H
+
+/*
+ * fasthash is a modification of code taken from
+ * https://code.google.com/archive/p/fast-hash/source/default/source
+ * under the terms of the MIT licencse. The original copyright
+ * notice follows:
+ */
+
+/* The MIT License
+
+ Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
+
+ Permission is hereby granted, free of charge, to any person
+ obtaining a copy of this software and associated documentation
+ files (the "Software"), to deal in the Software without
+ restriction, including without limitation the rights to use, copy,
+ modify, merge, publish, distribute, sublicense, and/or sell copies
+ of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+*/
+
+/*
+ * There are two interfaces available. Examples assume a 32-bit hash:
+ *
+ * 1) When the length is known ahead of time, use fasthash32().
+ * 2) When the length is not known, use the incremental interface. To
+ * ensure good results, keep track of the length and pass it to the finalizer:
+
+fasthash_state hs;
+fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0);
+fasthash_accum(&hs, <pointer to a chunk of the input>, <chunk length, up to 8>);
+return fasthash_final32(&hs, <final length>);
+
+*/
+
+
+typedef struct fasthash_state
+{
+ uint64 accum;
+#define FH_SIZEOF_ACCUM sizeof(uint64)
+
+ uint64 hash;
+} fasthash_state;
+
+
+#define FH_UNKNOWN_LENGTH 1
+
+/*
+ * Initialize the hash state.
+ *
+ * "len" is the length of the input, if known ahead of time.
+ * If that is not known, pass FH_UNKNOWN_LENGTH.
+ * "seed" can be zero.
+ */
+static inline void
+fasthash_init(fasthash_state *hs, int len, uint64 seed)
+{
+ memset(hs, 0, sizeof(fasthash_state));
+ hs->hash = seed ^ (len * 0x880355f21e6d1965);
+}
+
+/* Both the finalizer and part of the combining step */
+static inline uint64
+fasthash_mix(uint64 h, uint64 tweak)
+{
+ h ^= (h >> 23) + tweak;
+ h *= 0x2127599bf4325c37;
+ h ^= h >> 47;
+ return h;
+}
+
+static inline void
+fasthash_combine(fasthash_state *hs)
+{
+ hs->hash ^= fasthash_mix(hs->accum, 0);
+ hs->hash *= 0x880355f21e6d1965;
+
+ /* reset hash state for next input */
+ hs->accum = 0;
+}
+
+/* Accumulate up to 8 bytes of input and combine it into the hash */
+static inline void
+fasthash_accum(fasthash_state *hs, const char *k, int len)
+{
+ Assert(hs->accum == 0);
+ Assert(len <= FH_SIZEOF_ACCUM);
+
+ switch (len)
+ {
+ case 8:
+ memcpy(&hs->accum, k, 8);
+ break;
+ case 7:
+ hs->accum |= (uint64) k[6] << 48;
+ /* FALLTHROUGH */
+ case 6:
+ hs->accum |= (uint64) k[5] << 40;
+ /* FALLTHROUGH */
+ case 5:
+ hs->accum |= (uint64) k[4] << 32;
+ /* FALLTHROUGH */
+ case 4:
+ hs->accum |= (uint64) k[3] << 24;
+ /* FALLTHROUGH */
+ case 3:
+ hs->accum |= (uint64) k[2] << 16;
+ /* FALLTHROUGH */
+ case 2:
+ hs->accum |= (uint64) k[1] << 8;
+ /* FALLTHROUGH */
+ case 1:
+ hs->accum |= (uint64) k[0];
+ break;
+ case 0:
+ return;
+ }
+
+ fasthash_combine(hs);
+}
+
+/*
+ * The finalizer
+ *
+ * "tweak" is the input length when the caller doesn't know
+ * the length ahead of time, such as for NUL-terminated
+ * strings, otherwise zero.
+ */
+static inline uint64
+fasthash_final64(fasthash_state *hs, uint64 tweak)
+{
+ return fasthash_mix(hs->hash, tweak);
+}
+
+/*
+ * Reduce a 64-bit hash to a 32-bit hash.
+ *
+ * This provides a bit more additional mixing compared to
+ * just taking the lower 32-bits.
+ */
+static inline uint32
+fasthash_reduce32(uint64 h)
+{
+ /*
+ * The following trick converts the 64-bit hashcode to Fermat residue,
+ * which shall retain information from both the higher and lower parts of
+ * hashcode.
+ */
+ return h - (h >> 32);
+}
+
+static inline uint32
+fasthash_final32(fasthash_state *hs, uint64 tweak)
+{
+ return fasthash_reduce32(fasthash_final64(hs, tweak));
+}
+
+/*
+ * The original fasthash64 function, re-implemented using
+ * the incremental interface.
+ */
+static inline uint64
+fasthash64(const char *k, int len, uint64 seed)
+{
+ fasthash_state hs;
+
+ fasthash_init(&hs, len, seed);
+
+ while (len >= FH_SIZEOF_ACCUM)
+ {
+ fasthash_accum(&hs, k, FH_SIZEOF_ACCUM);
+ k += FH_SIZEOF_ACCUM;
+ len -= FH_SIZEOF_ACCUM;
+ }
+
+ fasthash_accum(&hs, k, len);
+ return fasthash_final64(&hs, 0);
+}
+
+/* Like fasthash64, but returns a 32-bit hash */
+static inline uint64
+fasthash32(const char *k, int len, uint64 seed)
+{
+ return fasthash_reduce32(fasthash64(k, len, seed));
+}
+
+#endif /* HASHFN_UNSTABLE_H */
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index d659adbfd6..4038d07458 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3321,6 +3321,7 @@ exec_thread_arg
execution_state
explain_get_index_name_hook_type
f_smgr
+fasthash_state
fd_set
fe_scram_state
fe_scram_state_enum
--
2.43.0
v11-0004-Jeff-Davis-v10jd-0004.patchtext/x-patch; charset=US-ASCII; name=v11-0004-Jeff-Davis-v10jd-0004.patchDownload
From d5ec732a544ad6f6de0a42622d1656003b3dc351 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Wed, 20 Dec 2023 11:40:11 +0700
Subject: [PATCH v11 4/5] Jeff Davis v10jd-0004
---
src/backend/catalog/namespace.c | 46 +++++++++++++++++++++++++---
src/include/common/hashfn_unstable.h | 9 ++++++
2 files changed, 51 insertions(+), 4 deletions(-)
diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index 7fe2fd1fd4..cb840ce9dd 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -244,15 +244,44 @@ static bool MatchNamedCall(HeapTuple proctup, int nargs, List *argnames,
* to initialize a key, and also offers a more convenient API.
*/
+/* From: https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord */
+#define haszero64(v) \
+ (((v) - 0x0101010101010101UL) & ~(v) & 0x8080808080808080UL)
+
static inline uint32
-spcachekey_hash(SearchPathCacheKey key)
+cstring_hash_aligned(const char *str, uint64 seed)
+{
+ const char *const start = str;
+ const char *buf = start;
+ int chunk_len = 0;
+ fasthash_state hs;
+
+ fasthash_init(&hs, FH_UNKNOWN_LENGTH, seed);
+
+ Assert(PointerIsAligned(start, uint64));
+ while (!haszero64(*(uint64 *)buf))
+ {
+ fasthash_accum64(&hs, buf);
+ buf += sizeof(uint64);
+ }
+
+ while (buf[chunk_len] != '\0')
+ chunk_len++;
+ fasthash_accum(&hs, buf, chunk_len);
+ buf += chunk_len;
+
+ return fasthash_final32(&hs, buf - start);
+}
+
+static inline uint32
+cstring_hash_unaligned(const char *str, uint64 seed)
{
- const char *const start = key.searchPath;
- const char *buf = key.searchPath;
+ const char *const start = str;
+ const char *buf = str;
fasthash_state hs;
/* WIP: maybe roleid should be mixed in normally */
- fasthash_init(&hs, FH_UNKNOWN_LENGTH, key.roleid);
+ fasthash_init(&hs, FH_UNKNOWN_LENGTH, seed);
while (*buf)
{
int chunk_len = 0;
@@ -268,6 +297,15 @@ spcachekey_hash(SearchPathCacheKey key)
return fasthash_final32(&hs, buf - start);
}
+static inline uint32
+spcachekey_hash(SearchPathCacheKey key)
+{
+ if (PointerIsAligned(key.searchPath, uint64))
+ return cstring_hash_aligned(key.searchPath, key.roleid);
+ else
+ return cstring_hash_unaligned(key.searchPath, key.roleid);
+}
+
static inline bool
spcachekey_equal(SearchPathCacheKey a, SearchPathCacheKey b)
{
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index bf1dbee28d..553fab0415 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -105,6 +105,15 @@ fasthash_combine(fasthash_state *hs)
hs->accum = 0;
}
+/* Accumulate 8 bytes from aligned pointer and combine it into the hash */
+static inline void
+fasthash_accum64(fasthash_state *hs, const char *ptr)
+{
+ Assert(PointerIsAligned(ptr, uint64));
+ hs->accum = *(uint64 *)ptr;
+ fasthash_combine(hs);
+}
+
/* Accumulate up to 8 bytes of input and combine it into the hash */
static inline void
fasthash_accum(fasthash_state *hs, const char *k, int len)
--
2.43.0
On Wed, Dec 20, 2023 at 1:48 PM John Naylor <johncnaylorls@gmail.com> wrote:
On Wed, Dec 20, 2023 at 3:23 AM Jeff Davis <pgsql@j-davis.com> wrote:
The reason I looked here is that the inner while statement (to find the
chunk size) looked out of place and possibly slow, and there's a
bitwise trick we can use instead.There are other bit tricks we can use. In v11-0005 Just for fun, I
translated a couple more into C fromhttps://github.com/openbsd/src/blob/master/lib/libc/arch/amd64/string/strlen.S
I wanted to see if this gets us anything so ran a couple microbenchmarks.
0001-0003 are same as earlier
0004 takes Jeff's idea and adds in an optimization from NetBSD's
strlen (I said OpenBSD earlier, but it goes back further). I added
stub code to simulate big-endian when requested at compile time, but a
later patch removes it. Since it benched well, I made the extra effort
to generalize it for other callers. After adding to the hash state, it
returns the length so the caller can pass it to the finalizer.
0005 is the benchmark (not for commit) -- I took the parser keyword
list and added enough padding to make every string aligned when the
whole thing is copied to an alloc'd area.
Each of the bench_*.sql files named below are just running the
similarly-named function, all with the same argument, e.g. "select *
from bench_pgstat_hash_fh(100000);", so not attached.
Strings:
-- strlen + hash_bytes
pgbench -n -T 20 -f bench_hash_bytes.sql -M prepared | grep latency
latency average = 1036.732 ms
-- word-at-a-time hashing, with bytewise lookahead
pgbench -n -T 20 -f bench_cstr_unaligned.sql -M prepared | grep latency
latency average = 664.632 ms
-- word-at-a-time for both hashing and lookahead (Jeff's aligned
coding plus a technique from NetBSD strlen)
pgbench -n -T 20 -f bench_cstr_aligned.sql -M prepared | grep latency
latency average = 436.701 ms
So, the fully optimized aligned case is worth it if it's convenient.
0006 adds a byteswap for big-endian so we can reuse little endian
coding for the lookahead.
0007 - I also wanted to put numbers to 0003 (pgstat hash). While the
motivation for that was cleanup, I had a hunch it would shave cycles
and take up less binary space. It does on both accounts:
-- 3x murmur + hash_combine
pgbench -n -T 20 -f bench_pgstat_orig.sql -M prepared | grep latency
latency average = 333.540 ms
-- fasthash32 (simple call, no state setup and final needed for a single value)
pgbench -n -T 20 -f bench_pgstat_fh.sql -M prepared | grep latency
latency average = 277.591 ms
0008 - We can optimize the tail load when it's 4 bytes -- to save
loads, shifts, and OR's. My compiler can't figure this out for the
pgstat hash, with its fixed 4-byte tail. It's pretty simple and should
help other cases:
pgbench -n -T 20 -f bench_pgstat_fh.sql -M prepared | grep latency
latency average = 226.113 ms
Attachments:
v11-0005-Add-benchmark-for-hashing-C-strings.patchapplication/x-patch; name=v11-0005-Add-benchmark-for-hashing-C-strings.patchDownload
From 778e3bdfc761dace149cd6c136e4c2847f793c61 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sun, 24 Dec 2023 09:46:44 +0700
Subject: [PATCH v11 5/8] Add benchmark for hashing C strings
---
contrib/bench_hash/Makefile | 23 +
contrib/bench_hash/aligned_keywords.h | 991 +++++++++++++++++++++++++
contrib/bench_hash/bench_hash--1.0.sql | 21 +
contrib/bench_hash/bench_hash.c | 103 +++
contrib/bench_hash/bench_hash.control | 5 +
contrib/bench_hash/meson.build | 19 +
contrib/meson.build | 1 +
7 files changed, 1163 insertions(+)
create mode 100644 contrib/bench_hash/Makefile
create mode 100644 contrib/bench_hash/aligned_keywords.h
create mode 100644 contrib/bench_hash/bench_hash--1.0.sql
create mode 100644 contrib/bench_hash/bench_hash.c
create mode 100644 contrib/bench_hash/bench_hash.control
create mode 100644 contrib/bench_hash/meson.build
diff --git a/contrib/bench_hash/Makefile b/contrib/bench_hash/Makefile
new file mode 100644
index 0000000000..5327080376
--- /dev/null
+++ b/contrib/bench_hash/Makefile
@@ -0,0 +1,23 @@
+# src/test/modules/test_parser/Makefile
+
+MODULE_big = test_parser
+OBJS = \
+ $(WIN32RES) \
+ test_parser.o
+PGFILEDESC = "test_parser - example of a custom parser for full-text search"
+
+EXTENSION = test_parser
+DATA = test_parser--1.0.sql
+
+REGRESS = test_parser
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = src/test/modules/test_parser
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/bench_hash/aligned_keywords.h b/contrib/bench_hash/aligned_keywords.h
new file mode 100644
index 0000000000..c2bd67c856
--- /dev/null
+++ b/contrib/bench_hash/aligned_keywords.h
@@ -0,0 +1,991 @@
+/* created by copying from kwlist_d.h with this patch:
+
+--- a/src/tools/gen_keywordlist.pl
++++ b/src/tools/gen_keywordlist.pl
+@@ -97,7 +97,9 @@ while (<$kif>)
+ {
+ if (/^PG_KEYWORD\("(\w+)"/)
+ {
+- push @keywords, $1;
++ my $len = length($1) + 1;
++ my $aligned = $1 . "\\0" . "_" x ( ($len % 8) == 0 ? 0 : (8-($len % 8)) );
++ push @keywords, $aligned;
+ }
+ }
+
+@@ -127,7 +129,7 @@ for my $i (0 .. $#keywords - 1)
+ # Emit the string containing all the keywords.
+
+ printf $kwdef qq|static const char %s_kw_string[] =\n\t"|, $varname;
+-print $kwdef join qq|\\0"\n\t"|, @keywords;
++print $kwdef join qq|"\n\t"|, @keywords;
+ print $kwdef qq|";\n\n|;
+
+ # Emit an array of numerical offsets which will be used to index into the
+@@ -145,7 +147,7 @@ foreach my $name (@keywords)
+
+ # Calculate the cumulative offset of the next keyword,
+ # taking into account the null terminator.
+- $offset += $this_length + 1;
++ $offset += $this_length -1;
+
+ # Update max keyword length.
+ $max_len = $this_length if $max_len < $this_length;
+
+*/
+
+
+static const char aligned_words[] =
+ "abort\0__"
+ "absent\0_"
+ "absolute\0_______"
+ "access\0_"
+ "action\0_"
+ "add\0____"
+ "admin\0__"
+ "after\0__"
+ "aggregate\0______"
+ "all\0____"
+ "also\0___"
+ "alter\0__"
+ "always\0_"
+ "analyse\0"
+ "analyze\0"
+ "and\0____"
+ "any\0____"
+ "array\0__"
+ "as\0_____"
+ "asc\0____"
+ "asensitive\0_____"
+ "assertion\0______"
+ "assignment\0_____"
+ "asymmetric\0_____"
+ "at\0_____"
+ "atomic\0_"
+ "attach\0_"
+ "attribute\0______"
+ "authorization\0__"
+ "backward\0_______"
+ "before\0_"
+ "begin\0__"
+ "between\0"
+ "bigint\0_"
+ "binary\0_"
+ "bit\0____"
+ "boolean\0"
+ "both\0___"
+ "breadth\0"
+ "by\0_____"
+ "cache\0__"
+ "call\0___"
+ "called\0_"
+ "cascade\0"
+ "cascaded\0_______"
+ "case\0___"
+ "cast\0___"
+ "catalog\0"
+ "chain\0__"
+ "char\0___"
+ "character\0______"
+ "characteristics\0"
+ "check\0__"
+ "checkpoint\0_____"
+ "class\0__"
+ "close\0__"
+ "cluster\0"
+ "coalesce\0_______"
+ "collate\0"
+ "collation\0______"
+ "column\0_"
+ "columns\0"
+ "comment\0"
+ "comments\0_______"
+ "commit\0_"
+ "committed\0______"
+ "compression\0____"
+ "concurrently\0___"
+ "configuration\0__"
+ "conflict\0_______"
+ "connection\0_____"
+ "constraint\0_____"
+ "constraints\0____"
+ "content\0"
+ "continue\0_______"
+ "conversion\0_____"
+ "copy\0___"
+ "cost\0___"
+ "create\0_"
+ "cross\0__"
+ "csv\0____"
+ "cube\0___"
+ "current\0"
+ "current_catalog\0"
+ "current_date\0___"
+ "current_role\0___"
+ "current_schema\0_"
+ "current_time\0___"
+ "current_timestamp\0______"
+ "current_user\0___"
+ "cursor\0_"
+ "cycle\0__"
+ "data\0___"
+ "database\0_______"
+ "day\0____"
+ "deallocate\0_____"
+ "dec\0____"
+ "decimal\0"
+ "declare\0"
+ "default\0"
+ "defaults\0_______"
+ "deferrable\0_____"
+ "deferred\0_______"
+ "definer\0"
+ "delete\0_"
+ "delimiter\0______"
+ "delimiters\0_____"
+ "depends\0"
+ "depth\0__"
+ "desc\0___"
+ "detach\0_"
+ "dictionary\0_____"
+ "disable\0"
+ "discard\0"
+ "distinct\0_______"
+ "do\0_____"
+ "document\0_______"
+ "domain\0_"
+ "double\0_"
+ "drop\0___"
+ "each\0___"
+ "else\0___"
+ "enable\0_"
+ "encoding\0_______"
+ "encrypted\0______"
+ "end\0____"
+ "enum\0___"
+ "escape\0_"
+ "event\0__"
+ "except\0_"
+ "exclude\0"
+ "excluding\0______"
+ "exclusive\0______"
+ "execute\0"
+ "exists\0_"
+ "explain\0"
+ "expression\0_____"
+ "extension\0______"
+ "external\0_______"
+ "extract\0"
+ "false\0__"
+ "family\0_"
+ "fetch\0__"
+ "filter\0_"
+ "finalize\0_______"
+ "first\0__"
+ "float\0__"
+ "following\0______"
+ "for\0____"
+ "force\0__"
+ "foreign\0"
+ "format\0_"
+ "forward\0"
+ "freeze\0_"
+ "from\0___"
+ "full\0___"
+ "function\0_______"
+ "functions\0______"
+ "generated\0______"
+ "global\0_"
+ "grant\0__"
+ "granted\0"
+ "greatest\0_______"
+ "group\0__"
+ "grouping\0_______"
+ "groups\0_"
+ "handler\0"
+ "having\0_"
+ "header\0_"
+ "hold\0___"
+ "hour\0___"
+ "identity\0_______"
+ "if\0_____"
+ "ilike\0__"
+ "immediate\0______"
+ "immutable\0______"
+ "implicit\0_______"
+ "import\0_"
+ "in\0_____"
+ "include\0"
+ "including\0______"
+ "increment\0______"
+ "indent\0_"
+ "index\0__"
+ "indexes\0"
+ "inherit\0"
+ "inherits\0_______"
+ "initially\0______"
+ "inline\0_"
+ "inner\0__"
+ "inout\0__"
+ "input\0__"
+ "insensitive\0____"
+ "insert\0_"
+ "instead\0"
+ "int\0____"
+ "integer\0"
+ "intersect\0______"
+ "interval\0_______"
+ "into\0___"
+ "invoker\0"
+ "is\0_____"
+ "isnull\0_"
+ "isolation\0______"
+ "join\0___"
+ "json\0___"
+ "json_array\0_____"
+ "json_arrayagg\0__"
+ "json_object\0____"
+ "json_objectagg\0_"
+ "json_scalar\0____"
+ "json_serialize\0_"
+ "key\0____"
+ "keys\0___"
+ "label\0__"
+ "language\0_______"
+ "large\0__"
+ "last\0___"
+ "lateral\0"
+ "leading\0"
+ "leakproof\0______"
+ "least\0__"
+ "left\0___"
+ "level\0__"
+ "like\0___"
+ "limit\0__"
+ "listen\0_"
+ "load\0___"
+ "local\0__"
+ "localtime\0______"
+ "localtimestamp\0_"
+ "location\0_______"
+ "lock\0___"
+ "locked\0_"
+ "logged\0_"
+ "mapping\0"
+ "match\0__"
+ "matched\0"
+ "materialized\0___"
+ "maxvalue\0_______"
+ "merge\0__"
+ "method\0_"
+ "minute\0_"
+ "minvalue\0_______"
+ "mode\0___"
+ "month\0__"
+ "move\0___"
+ "name\0___"
+ "names\0__"
+ "national\0_______"
+ "natural\0"
+ "nchar\0__"
+ "new\0____"
+ "next\0___"
+ "nfc\0____"
+ "nfd\0____"
+ "nfkc\0___"
+ "nfkd\0___"
+ "no\0_____"
+ "none\0___"
+ "normalize\0______"
+ "normalized\0_____"
+ "not\0____"
+ "nothing\0"
+ "notify\0_"
+ "notnull\0"
+ "nowait\0_"
+ "null\0___"
+ "nullif\0_"
+ "nulls\0__"
+ "numeric\0"
+ "object\0_"
+ "of\0_____"
+ "off\0____"
+ "offset\0_"
+ "oids\0___"
+ "old\0____"
+ "on\0_____"
+ "only\0___"
+ "operator\0_______"
+ "option\0_"
+ "options\0"
+ "or\0_____"
+ "order\0__"
+ "ordinality\0_____"
+ "others\0_"
+ "out\0____"
+ "outer\0__"
+ "over\0___"
+ "overlaps\0_______"
+ "overlay\0"
+ "overriding\0_____"
+ "owned\0__"
+ "owner\0__"
+ "parallel\0_______"
+ "parameter\0______"
+ "parser\0_"
+ "partial\0"
+ "partition\0______"
+ "passing\0"
+ "password\0_______"
+ "placing\0"
+ "plans\0__"
+ "policy\0_"
+ "position\0_______"
+ "preceding\0______"
+ "precision\0______"
+ "prepare\0"
+ "prepared\0_______"
+ "preserve\0_______"
+ "primary\0"
+ "prior\0__"
+ "privileges\0_____"
+ "procedural\0_____"
+ "procedure\0______"
+ "procedures\0_____"
+ "program\0"
+ "publication\0____"
+ "quote\0__"
+ "range\0__"
+ "read\0___"
+ "real\0___"
+ "reassign\0_______"
+ "recheck\0"
+ "recursive\0______"
+ "ref\0____"
+ "references\0_____"
+ "referencing\0____"
+ "refresh\0"
+ "reindex\0"
+ "relative\0_______"
+ "release\0"
+ "rename\0_"
+ "repeatable\0_____"
+ "replace\0"
+ "replica\0"
+ "reset\0__"
+ "restart\0"
+ "restrict\0_______"
+ "return\0_"
+ "returning\0______"
+ "returns\0"
+ "revoke\0_"
+ "right\0__"
+ "role\0___"
+ "rollback\0_______"
+ "rollup\0_"
+ "routine\0"
+ "routines\0_______"
+ "row\0____"
+ "rows\0___"
+ "rule\0___"
+ "savepoint\0______"
+ "scalar\0_"
+ "schema\0_"
+ "schemas\0"
+ "scroll\0_"
+ "search\0_"
+ "second\0_"
+ "security\0_______"
+ "select\0_"
+ "sequence\0_______"
+ "sequences\0______"
+ "serializable\0___"
+ "server\0_"
+ "session\0"
+ "session_user\0___"
+ "set\0____"
+ "setof\0__"
+ "sets\0___"
+ "share\0__"
+ "show\0___"
+ "similar\0"
+ "simple\0_"
+ "skip\0___"
+ "smallint\0_______"
+ "snapshot\0_______"
+ "some\0___"
+ "sql\0____"
+ "stable\0_"
+ "standalone\0_____"
+ "start\0__"
+ "statement\0______"
+ "statistics\0_____"
+ "stdin\0__"
+ "stdout\0_"
+ "storage\0"
+ "stored\0_"
+ "strict\0_"
+ "strip\0__"
+ "subscription\0___"
+ "substring\0______"
+ "support\0"
+ "symmetric\0______"
+ "sysid\0__"
+ "system\0_"
+ "system_user\0____"
+ "table\0__"
+ "tables\0_"
+ "tablesample\0____"
+ "tablespace\0_____"
+ "temp\0___"
+ "template\0_______"
+ "temporary\0______"
+ "text\0___"
+ "then\0___"
+ "ties\0___"
+ "time\0___"
+ "timestamp\0______"
+ "to\0_____"
+ "trailing\0_______"
+ "transaction\0____"
+ "transform\0______"
+ "treat\0__"
+ "trigger\0"
+ "trim\0___"
+ "true\0___"
+ "truncate\0_______"
+ "trusted\0"
+ "type\0___"
+ "types\0__"
+ "uescape\0"
+ "unbounded\0______"
+ "uncommitted\0____"
+ "unencrypted\0____"
+ "union\0__"
+ "unique\0_"
+ "unknown\0"
+ "unlisten\0_______"
+ "unlogged\0_______"
+ "until\0__"
+ "update\0_"
+ "user\0___"
+ "using\0__"
+ "vacuum\0_"
+ "valid\0__"
+ "validate\0_______"
+ "validator\0______"
+ "value\0__"
+ "values\0_"
+ "varchar\0"
+ "variadic\0_______"
+ "varying\0"
+ "verbose\0"
+ "version\0"
+ "view\0___"
+ "views\0__"
+ "volatile\0_______"
+ "when\0___"
+ "where\0__"
+ "whitespace\0_____"
+ "window\0_"
+ "with\0___"
+ "within\0_"
+ "without\0"
+ "work\0___"
+ "wrapper\0"
+ "write\0__"
+ "xml\0____"
+ "xmlattributes\0__"
+ "xmlconcat\0______"
+ "xmlelement\0_____"
+ "xmlexists\0______"
+ "xmlforest\0______"
+ "xmlnamespaces\0__"
+ "xmlparse\0_______"
+ "xmlpi\0__"
+ "xmlroot\0"
+ "xmlserialize\0___"
+ "xmltable\0_______"
+ "year\0___"
+ "yes\0____"
+ "zone\0___";
+
+static const uint16 word_offsets[] = {
+ 0,
+ 8,
+ 16,
+ 32,
+ 40,
+ 48,
+ 56,
+ 64,
+ 72,
+ 88,
+ 96,
+ 104,
+ 112,
+ 120,
+ 128,
+ 136,
+ 144,
+ 152,
+ 160,
+ 168,
+ 176,
+ 192,
+ 208,
+ 224,
+ 240,
+ 248,
+ 256,
+ 264,
+ 280,
+ 296,
+ 312,
+ 320,
+ 328,
+ 336,
+ 344,
+ 352,
+ 360,
+ 368,
+ 376,
+ 384,
+ 392,
+ 400,
+ 408,
+ 416,
+ 424,
+ 440,
+ 448,
+ 456,
+ 464,
+ 472,
+ 480,
+ 496,
+ 512,
+ 520,
+ 536,
+ 544,
+ 552,
+ 560,
+ 576,
+ 584,
+ 600,
+ 608,
+ 616,
+ 624,
+ 640,
+ 648,
+ 664,
+ 680,
+ 696,
+ 712,
+ 728,
+ 744,
+ 760,
+ 776,
+ 784,
+ 800,
+ 816,
+ 824,
+ 832,
+ 840,
+ 848,
+ 856,
+ 864,
+ 872,
+ 888,
+ 904,
+ 920,
+ 936,
+ 952,
+ 976,
+ 992,
+ 1000,
+ 1008,
+ 1016,
+ 1032,
+ 1040,
+ 1056,
+ 1064,
+ 1072,
+ 1080,
+ 1088,
+ 1104,
+ 1120,
+ 1136,
+ 1144,
+ 1152,
+ 1168,
+ 1184,
+ 1192,
+ 1200,
+ 1208,
+ 1216,
+ 1232,
+ 1240,
+ 1248,
+ 1264,
+ 1272,
+ 1288,
+ 1296,
+ 1304,
+ 1312,
+ 1320,
+ 1328,
+ 1336,
+ 1352,
+ 1368,
+ 1376,
+ 1384,
+ 1392,
+ 1400,
+ 1408,
+ 1416,
+ 1432,
+ 1448,
+ 1456,
+ 1464,
+ 1472,
+ 1488,
+ 1504,
+ 1520,
+ 1528,
+ 1536,
+ 1544,
+ 1552,
+ 1560,
+ 1576,
+ 1584,
+ 1592,
+ 1608,
+ 1616,
+ 1624,
+ 1632,
+ 1640,
+ 1648,
+ 1656,
+ 1664,
+ 1672,
+ 1688,
+ 1704,
+ 1720,
+ 1728,
+ 1736,
+ 1744,
+ 1760,
+ 1768,
+ 1784,
+ 1792,
+ 1800,
+ 1808,
+ 1816,
+ 1824,
+ 1832,
+ 1848,
+ 1856,
+ 1864,
+ 1880,
+ 1896,
+ 1912,
+ 1920,
+ 1928,
+ 1936,
+ 1952,
+ 1968,
+ 1976,
+ 1984,
+ 1992,
+ 2000,
+ 2016,
+ 2032,
+ 2040,
+ 2048,
+ 2056,
+ 2064,
+ 2080,
+ 2088,
+ 2096,
+ 2104,
+ 2112,
+ 2128,
+ 2144,
+ 2152,
+ 2160,
+ 2168,
+ 2176,
+ 2192,
+ 2200,
+ 2208,
+ 2224,
+ 2240,
+ 2256,
+ 2272,
+ 2288,
+ 2304,
+ 2312,
+ 2320,
+ 2328,
+ 2344,
+ 2352,
+ 2360,
+ 2368,
+ 2376,
+ 2392,
+ 2400,
+ 2408,
+ 2416,
+ 2424,
+ 2432,
+ 2440,
+ 2448,
+ 2456,
+ 2472,
+ 2488,
+ 2504,
+ 2512,
+ 2520,
+ 2528,
+ 2536,
+ 2544,
+ 2552,
+ 2568,
+ 2584,
+ 2592,
+ 2600,
+ 2608,
+ 2624,
+ 2632,
+ 2640,
+ 2648,
+ 2656,
+ 2664,
+ 2680,
+ 2688,
+ 2696,
+ 2704,
+ 2712,
+ 2720,
+ 2728,
+ 2736,
+ 2744,
+ 2752,
+ 2760,
+ 2776,
+ 2792,
+ 2800,
+ 2808,
+ 2816,
+ 2824,
+ 2832,
+ 2840,
+ 2848,
+ 2856,
+ 2864,
+ 2872,
+ 2880,
+ 2888,
+ 2896,
+ 2904,
+ 2912,
+ 2920,
+ 2928,
+ 2944,
+ 2952,
+ 2960,
+ 2968,
+ 2976,
+ 2992,
+ 3000,
+ 3008,
+ 3016,
+ 3024,
+ 3040,
+ 3048,
+ 3064,
+ 3072,
+ 3080,
+ 3096,
+ 3112,
+ 3120,
+ 3128,
+ 3144,
+ 3152,
+ 3168,
+ 3176,
+ 3184,
+ 3192,
+ 3208,
+ 3224,
+ 3240,
+ 3248,
+ 3264,
+ 3280,
+ 3288,
+ 3296,
+ 3312,
+ 3328,
+ 3344,
+ 3360,
+ 3368,
+ 3384,
+ 3392,
+ 3400,
+ 3408,
+ 3416,
+ 3432,
+ 3440,
+ 3456,
+ 3464,
+ 3480,
+ 3496,
+ 3504,
+ 3512,
+ 3528,
+ 3536,
+ 3544,
+ 3560,
+ 3568,
+ 3576,
+ 3584,
+ 3592,
+ 3608,
+ 3616,
+ 3632,
+ 3640,
+ 3648,
+ 3656,
+ 3664,
+ 3680,
+ 3688,
+ 3696,
+ 3712,
+ 3720,
+ 3728,
+ 3736,
+ 3752,
+ 3760,
+ 3768,
+ 3776,
+ 3784,
+ 3792,
+ 3800,
+ 3816,
+ 3824,
+ 3840,
+ 3856,
+ 3872,
+ 3880,
+ 3888,
+ 3904,
+ 3912,
+ 3920,
+ 3928,
+ 3936,
+ 3944,
+ 3952,
+ 3960,
+ 3968,
+ 3984,
+ 4000,
+ 4008,
+ 4016,
+ 4024,
+ 4040,
+ 4048,
+ 4064,
+ 4080,
+ 4088,
+ 4096,
+ 4104,
+ 4112,
+ 4120,
+ 4128,
+ 4144,
+ 4160,
+ 4168,
+ 4184,
+ 4192,
+ 4200,
+ 4216,
+ 4224,
+ 4232,
+ 4248,
+ 4264,
+ 4272,
+ 4288,
+ 4304,
+ 4312,
+ 4320,
+ 4328,
+ 4336,
+ 4352,
+ 4360,
+ 4376,
+ 4392,
+ 4408,
+ 4416,
+ 4424,
+ 4432,
+ 4440,
+ 4456,
+ 4464,
+ 4472,
+ 4480,
+ 4488,
+ 4504,
+ 4520,
+ 4536,
+ 4544,
+ 4552,
+ 4560,
+ 4576,
+ 4592,
+ 4600,
+ 4608,
+ 4616,
+ 4624,
+ 4632,
+ 4640,
+ 4656,
+ 4672,
+ 4680,
+ 4688,
+ 4696,
+ 4712,
+ 4720,
+ 4728,
+ 4736,
+ 4744,
+ 4752,
+ 4768,
+ 4776,
+ 4784,
+ 4800,
+ 4808,
+ 4816,
+ 4824,
+ 4832,
+ 4840,
+ 4848,
+ 4856,
+ 4864,
+ 4880,
+ 4896,
+ 4912,
+ 4928,
+ 4944,
+ 4960,
+ 4976,
+ 4984,
+ 4992,
+ 5008,
+ 5024,
+ 5032,
+ 5040,
+};
+
+
+#define SCANKEYWORDS_NUM_KEYWORDS 473
+
diff --git a/contrib/bench_hash/bench_hash--1.0.sql b/contrib/bench_hash/bench_hash--1.0.sql
new file mode 100644
index 0000000000..b3a5747432
--- /dev/null
+++ b/contrib/bench_hash/bench_hash--1.0.sql
@@ -0,0 +1,21 @@
+/* src/test/modules/bench_hash/bench_hash--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION bench_hash" to load this file. \quit
+
+CREATE FUNCTION bench_string_hash(int4)
+RETURNS int
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
+
+
+CREATE FUNCTION bench_cstring_hash_unaligned(int4)
+RETURNS int
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION bench_cstring_hash_aligned(int4)
+RETURNS int
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
+
diff --git a/contrib/bench_hash/bench_hash.c b/contrib/bench_hash/bench_hash.c
new file mode 100644
index 0000000000..9c9dba93f0
--- /dev/null
+++ b/contrib/bench_hash/bench_hash.c
@@ -0,0 +1,103 @@
+/*-------------------------------------------------------------------------
+ *
+ * bench_hash.c
+ *
+ * Copyright (c) 2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/test/modules/bench_hash/bench_hash.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "fmgr.h"
+
+PG_MODULE_MAGIC;
+
+#include "aligned_keywords.h"
+
+#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
+#include "miscadmin.h"
+#include "utils/memutils.h"
+
+
+PG_FUNCTION_INFO_V1(bench_string_hash);
+Datum
+bench_string_hash(PG_FUNCTION_ARGS)
+{
+ int32 count = PG_GETARG_INT32(0);
+ uint32 hash = 0;
+
+ while (count-- > 0)
+ {
+ for (int i=0; i< SCANKEYWORDS_NUM_KEYWORDS; i++)
+ {
+ int idx = word_offsets[i];
+ int s_len = strlen(&aligned_words[idx]);
+ hash += hash_bytes((const unsigned char *) &aligned_words[idx], s_len);
+ }
+ CHECK_FOR_INTERRUPTS();
+ }
+
+ PG_RETURN_INT32(hash);
+}
+
+
+PG_FUNCTION_INFO_V1(bench_cstring_hash_unaligned);
+Datum
+bench_cstring_hash_unaligned(PG_FUNCTION_ARGS)
+{
+ int32 count = PG_GETARG_INT32(0);
+ uint32 hash = 0;
+
+ char* p = (char*) palloc(5048);
+ memcpy(p, aligned_words, 5048);
+
+ while (count-- > 0)
+ {
+ for (int i=0; i< SCANKEYWORDS_NUM_KEYWORDS; i++)
+ {
+ int idx = word_offsets[i];
+ int s_len;
+ fasthash_state hs;
+
+ fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0);
+ s_len = fasthash_accum_cstring_unaligned(&hs, &p[idx]);
+ hash += fasthash_final32(&hs, s_len);
+ }
+ CHECK_FOR_INTERRUPTS();
+ }
+
+ PG_RETURN_INT32(hash);
+}
+
+
+PG_FUNCTION_INFO_V1(bench_cstring_hash_aligned);
+Datum
+bench_cstring_hash_aligned(PG_FUNCTION_ARGS)
+{
+ int32 count = PG_GETARG_INT32(0);
+ uint32 hash = 0;
+
+ char* p = (char*) palloc(5048);
+ memcpy(p, aligned_words, 5048);
+
+ while (count-- > 0)
+ {
+ for (int i=0; i< SCANKEYWORDS_NUM_KEYWORDS; i++)
+ {
+ int idx = word_offsets[i];
+ int s_len;
+ fasthash_state hs;
+
+ fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0);
+ s_len = fasthash_accum_cstring_aligned(&hs, &p[idx]);
+ hash += fasthash_final32(&hs, s_len);
+ }
+ CHECK_FOR_INTERRUPTS();
+ }
+
+ PG_RETURN_INT32(hash);
+}
diff --git a/contrib/bench_hash/bench_hash.control b/contrib/bench_hash/bench_hash.control
new file mode 100644
index 0000000000..ffc63858d2
--- /dev/null
+++ b/contrib/bench_hash/bench_hash.control
@@ -0,0 +1,5 @@
+# bench_hash extension
+comment = 'benchmark some hash functions'
+default_version = '1.0'
+module_pathname = '$libdir/bench_hash'
+relocatable = true
diff --git a/contrib/bench_hash/meson.build b/contrib/bench_hash/meson.build
new file mode 100644
index 0000000000..f8d88d8b5c
--- /dev/null
+++ b/contrib/bench_hash/meson.build
@@ -0,0 +1,19 @@
+# Copyright (c) 2022-2023, PostgreSQL Global Development Group
+
+bench_hash_sources = files(
+ 'bench_hash.c',
+)
+
+bench_hash = shared_module('bench_hash',
+ bench_hash_sources,
+# link_with: pgport_srv,
+ kwargs: contrib_mod_args,
+)
+contrib_targets += bench_hash
+
+install_data(
+ 'bench_hash.control',
+ 'bench_hash--1.0.sql',
+ kwargs: contrib_data_args,
+)
+
diff --git a/contrib/meson.build b/contrib/meson.build
index c0b267c632..0e99195476 100644
--- a/contrib/meson.build
+++ b/contrib/meson.build
@@ -12,6 +12,7 @@ contrib_doc_args = {
'install_dir': contrib_doc_dir,
}
+subdir('bench_hash')
subdir('adminpack')
subdir('amcheck')
subdir('auth_delay')
--
2.43.0
v11-0004-Add-optimized-string-hashing-to-hashfn_unstable..patchapplication/x-patch; name=v11-0004-Add-optimized-string-hashing-to-hashfn_unstable..patchDownload
From 3dd1cdb0b322fdec0955c999bbffc8bf86e5a941 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Wed, 20 Dec 2023 11:40:11 +0700
Subject: [PATCH v11 4/8] Add optimized string hashing to hashfn_unstable.h
Given an already-initialized hash state and a C-string,
accumulate the hash of the string into the hash state
and return the length for the caller to save for the
finalizer. This avoids a strlen call.
If the string pointer is aligned, we can use a word-
at-a-time algorithm for the NUL check and for computing
the remainder length up to the NUL. This is only used on 64-bit,
since it's not worth the extra complexity for 32-bit platforms.
The big-endian case is simulated, and this will be rationalized
in a later commit.
Based on Jeff Davis's v10jd-0004, with
optimized tail inspired by NetBSD's strlen.
simulate big endian coding
---
src/backend/catalog/namespace.c | 30 ++++---
src/include/common/hashfn_unstable.h | 116 ++++++++++++++++++++++++++-
2 files changed, 133 insertions(+), 13 deletions(-)
diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index 7fe2fd1fd4..32597bea20 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -247,25 +247,31 @@ static bool MatchNamedCall(HeapTuple proctup, int nargs, List *argnames,
static inline uint32
spcachekey_hash(SearchPathCacheKey key)
{
- const char *const start = key.searchPath;
- const char *buf = key.searchPath;
fasthash_state hs;
+ int sp_len;
/* WIP: maybe roleid should be mixed in normally */
- fasthash_init(&hs, FH_UNKNOWN_LENGTH, key.roleid);
- while (*buf)
- {
- int chunk_len = 0;
+ uint64 seed = key.roleid;
- while (chunk_len < FH_SIZEOF_ACCUM && buf[chunk_len] != '\0')
- chunk_len++;
+ // XXX not for commit
+#ifdef USE_ASSERT_CHECKING
- fasthash_accum(&hs, buf, chunk_len);
- buf += chunk_len;
- }
+ int blen = strlen(key.searchPath);
+
+ uint64 h_orig = fasthash64(key.searchPath, blen, key.roleid);
+
+ // Compare orig to optimized string interface
+ fasthash_init(&hs, blen, key.roleid);
+ (void) fasthash_accum_cstring(&hs, key.searchPath);
+ Assert(fasthash_final64(&hs, 0) == h_orig);
+#endif
+
+ fasthash_init(&hs, FH_UNKNOWN_LENGTH, seed);
+
+ sp_len = fasthash_accum_cstring(&hs, key.searchPath);
/* pass the length to tweak the final mix */
- return fasthash_final32(&hs, buf - start);
+ return fasthash_final32(&hs, sp_len);
}
static inline bool
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index bf1dbee28d..4fc9edba6e 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -13,6 +13,8 @@ the same hashes between versions.
#ifndef HASHFN_UNSTABLE_H
#define HASHFN_UNSTABLE_H
+#include "port/pg_bitutils.h"
+
/*
* fasthash is a modification of code taken from
* https://code.google.com/archive/p/fast-hash/source/default/source
@@ -63,11 +65,12 @@ return fasthash_final32(&hs, <final length>);
typedef struct fasthash_state
{
uint64 accum;
-#define FH_SIZEOF_ACCUM sizeof(uint64)
uint64 hash;
} fasthash_state;
+#define FH_SIZEOF_ACCUM 8
+StaticAssertDecl(sizeof(((fasthash_state*) 0)->accum) == FH_SIZEOF_ACCUM, "wrong size for size macro");
#define FH_UNKNOWN_LENGTH 1
@@ -145,6 +148,117 @@ fasthash_accum(fasthash_state *hs, const char *k, int len)
fasthash_combine(hs);
}
+/* From: https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord */
+#define haszero64(v) \
+ (((v) - 0x0101010101010101UL) & ~(v) & 0x8080808080808080UL)
+
+#define SIM_BE 1
+#include "port/pg_bswap.h"
+
+/*
+ * With an aligned pointer, we consume the string a word at a time. Loading
+ * the word containing the NUL terminator cannot segfault since page boundaries
+ * are MAXALIGN'd. For that last word, only use bytes up to the NUL for the hash.
+ * The algorithm was adopted from NetBSD's strlen.
+ */
+static inline int
+fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
+{
+ const char *const start = str;
+ const char *buf = start;
+ int remainder;
+ uint64 zero_bytes;
+
+ Assert(PointerIsAligned(start, uint64));
+ while (true)
+ {
+ uint64 chunk = *(uint64 *)buf;
+#ifdef SIM_BE
+ uint64 low_bits = 0x7F7F7F7F7F7F7F7F;
+
+ chunk = pg_bswap64(chunk); /* simulate BE */
+
+ /*
+ * This expression evaluates has the useful property that all bytes in the result word
+ * that correspond to non-zero bytes in the original word have
+ * the value 0x00, while all bytes corresponding to zero bytes have
+ * the value 0x80.
+ */
+ zero_bytes = ~(((chunk & low_bits) + low_bits) | chunk | low_bits);
+#else
+ /*
+ * On little endian machines, we can use a slightly faster calculation,
+ * which sets bits in the first byte in the result word
+ * that corresponds to a zero byte in the original word.
+ * The rest of the bytes are indeterminate, so cannot be used
+ * on big-endian machines unless we resort to a bytewise check.
+ */
+ zero_bytes = haszero64(chunk);
+#endif
+ if (zero_bytes)
+ break;
+
+#ifdef SIM_BE
+ hs->accum = pg_bswap64(chunk); /* not needed with real BE, because we won't need the same answer */
+#else
+ hs->accum = chunk;
+#endif
+ fasthash_combine(hs);
+ buf += FH_SIZEOF_ACCUM;
+ }
+
+ /*
+ * Bytes with set bits will be 0x80, so
+ * calculate the first occurrence of a zero byte within the input word
+ * by counting the number of leading (on BE) or trailing (on LE)
+ * zeros and dividing the result by 8.
+ */
+#ifdef SIM_BE
+ remainder = (63 - pg_leftmost_one_pos64(zero_bytes)) / BITS_PER_BYTE;
+#else
+ remainder = pg_rightmost_one_pos64(zero_bytes) / BITS_PER_BYTE;
+#endif
+ fasthash_accum(hs, buf, remainder);
+ buf += remainder;
+
+ return buf - start;
+}
+
+static inline int
+fasthash_accum_cstring_unaligned(fasthash_state *hs, const char *str)
+{
+ const char *const start = str;
+ const char *buf = str;
+
+ while (*buf)
+ {
+ int chunk_len = 0;
+
+ while (chunk_len < FH_SIZEOF_ACCUM && buf[chunk_len] != '\0')
+ chunk_len++;
+
+ fasthash_accum(hs, buf, chunk_len);
+ buf += chunk_len;
+ }
+
+ return buf - start;
+}
+
+/*
+ * Accumulate the input into the hash state
+ * and return the length of the string.
+ */
+static inline int
+fasthash_accum_cstring(fasthash_state *hs, const char *str)
+{
+#if SIZEOF_VOID_P >= FH_SIZEOF_ACCUM
+ if (PointerIsAligned(str, uint64))
+ return fasthash_accum_cstring_aligned(hs, str);
+ else
+#endif
+ return fasthash_accum_cstring_unaligned(hs, str);
+}
+
/*
* The finalizer
*
--
2.43.0
v11-0007-Add-bench-for-pgstat.patchapplication/x-patch; name=v11-0007-Add-bench-for-pgstat.patchDownload
From 3a2bee493bb30f3c2f253ab27a715d3ca1262111 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Tue, 26 Dec 2023 12:18:22 +0700
Subject: [PATCH v11 7/8] Add bench for pgstat
---
contrib/bench_hash/bench_hash--1.0.sql | 9 ++++
contrib/bench_hash/bench_hash.c | 66 ++++++++++++++++++++++++++
2 files changed, 75 insertions(+)
diff --git a/contrib/bench_hash/bench_hash--1.0.sql b/contrib/bench_hash/bench_hash--1.0.sql
index b3a5747432..43ce946bf6 100644
--- a/contrib/bench_hash/bench_hash--1.0.sql
+++ b/contrib/bench_hash/bench_hash--1.0.sql
@@ -19,3 +19,12 @@ RETURNS int
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT;
+CREATE FUNCTION bench_pgstat_hash(int4)
+RETURNS int
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION bench_pgstat_hash_FH(int4)
+RETURNS int
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
diff --git a/contrib/bench_hash/bench_hash.c b/contrib/bench_hash/bench_hash.c
index 9c9dba93f0..017cf333ce 100644
--- a/contrib/bench_hash/bench_hash.c
+++ b/contrib/bench_hash/bench_hash.c
@@ -21,6 +21,7 @@ PG_MODULE_MAGIC;
#include "common/hashfn_unstable.h"
#include "miscadmin.h"
#include "utils/memutils.h"
+#include "utils/pgstat_internal.h"
PG_FUNCTION_INFO_V1(bench_string_hash);
@@ -101,3 +102,68 @@ bench_cstring_hash_aligned(PG_FUNCTION_ARGS)
PG_RETURN_INT32(hash);
}
+
+static inline uint32
+pgstat_hash_hash_key_orig(const void *d, size_t size, void *arg)
+{
+ const PgStat_HashKey *key = (PgStat_HashKey *) d;
+ uint32 hash;
+
+ Assert(size == sizeof(PgStat_HashKey) && arg == NULL);
+
+ hash = murmurhash32(key->kind);
+ hash = hash_combine(hash, murmurhash32(key->dboid));
+ hash = hash_combine(hash, murmurhash32(key->objoid));
+
+ return hash;
+}
+
+static inline uint32
+pgstat_hash_hash_key_FH(const void *d, size_t size, void *arg)
+{
+ const PgStat_HashKey *key = (PgStat_HashKey *) d;
+
+ Assert(size == sizeof(PgStat_HashKey) && arg == NULL);
+
+ return fasthash32((const char *) key, size, 0);
+}
+
+PG_FUNCTION_INFO_V1(bench_pgstat_hash);
+Datum
+bench_pgstat_hash(PG_FUNCTION_ARGS)
+{
+ int32 count = PG_GETARG_INT32(0);
+ uint32 hash = 0;
+
+ while (count-- > 0)
+ {
+ for (int i=0; i< SCANKEYWORDS_NUM_KEYWORDS - 3; i++)
+ {
+ int idx = word_offsets[i];
+ hash += pgstat_hash_hash_key_orig((const void *) &aligned_words[idx], sizeof(PgStat_HashKey), NULL);
+ }
+ CHECK_FOR_INTERRUPTS();
+ }
+
+ PG_RETURN_INT32(hash);
+}
+
+PG_FUNCTION_INFO_V1(bench_pgstat_hash_fh);
+Datum
+bench_pgstat_hash_fh(PG_FUNCTION_ARGS)
+{
+ int32 count = PG_GETARG_INT32(0);
+ uint32 hash = 0;
+
+ while (count-- > 0)
+ {
+ for (int i=0; i< SCANKEYWORDS_NUM_KEYWORDS - 3; i++)
+ {
+ int idx = word_offsets[i];
+ hash += pgstat_hash_hash_key_FH((const void *) &aligned_words[idx], sizeof(PgStat_HashKey), NULL);
+ }
+ CHECK_FOR_INTERRUPTS();
+ }
+
+ PG_RETURN_INT32(hash);
+}
--
2.43.0
v11-0008-Optimize-loading-tail-when-4-bytes.patchapplication/x-patch; name=v11-0008-Optimize-loading-tail-when-4-bytes.patchDownload
From 59c7f7933b7cf65222bda047f2cf915fdae8b5fd Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Tue, 26 Dec 2023 13:10:26 +0700
Subject: [PATCH v11 8/8] Optimize loading tail when >= 4 bytes
---
src/include/common/hashfn_unstable.h | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index 5bc1fc88ec..79defbfeb5 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -113,6 +113,8 @@ fasthash_combine(fasthash_state *hs)
static inline void
fasthash_accum(fasthash_state *hs, const char *k, int len)
{
+ uint32 lower_four;
+
Assert(hs->accum == 0);
Assert(len <= FH_SIZEOF_ACCUM);
@@ -131,8 +133,9 @@ fasthash_accum(fasthash_state *hs, const char *k, int len)
hs->accum |= (uint64) k[4] << 32;
/* FALLTHROUGH */
case 4:
- hs->accum |= (uint64) k[3] << 24;
- /* FALLTHROUGH */
+ memcpy(&lower_four, k, sizeof(lower_four));
+ hs->accum |= lower_four;
+ break;
case 3:
hs->accum |= (uint64) k[2] << 16;
/* FALLTHROUGH */
--
2.43.0
v11-0006-Try-simply-byte-swapping-on-BE-machines-and-then.patchapplication/x-patch; name=v11-0006-Try-simply-byte-swapping-on-BE-machines-and-then.patchDownload
From 02875939fa45246140b34554c23eedccc66ba972 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sun, 24 Dec 2023 15:14:46 +0700
Subject: [PATCH v11 6/8] Try simply byte-swapping on BE machines and then
handling like LE
---
src/include/common/hashfn_unstable.h | 36 +++++++---------------------
1 file changed, 9 insertions(+), 27 deletions(-)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index 4fc9edba6e..5bc1fc88ec 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -14,6 +14,7 @@ the same hashes between versions.
#define HASHFN_UNSTABLE_H
#include "port/pg_bitutils.h"
+#include "port/pg_bswap.h"
/*
* fasthash is a modification of code taken from
@@ -152,9 +153,6 @@ fasthash_accum(fasthash_state *hs, const char *k, int len)
#define haszero64(v) \
(((v) - 0x0101010101010101UL) & ~(v) & 0x8080808080808080UL)
-#define SIM_BE 1
-#include "port/pg_bswap.h"
-
/*
* With an aligned pointer, we consume the string a word at a time. Loading
* the word containing the NUL terminator cannot segfault since page boundaries
@@ -170,39 +168,27 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
uint64 zero_bytes;
Assert(PointerIsAligned(start, uint64));
- while (true)
+ for (;;)
{
uint64 chunk = *(uint64 *)buf;
-#ifdef SIM_BE
- uint64 low_bits = 0x7F7F7F7F7F7F7F7F;
- chunk = pg_bswap64(chunk); /* simulate BE */
+#ifdef WORDS_BIGENDIAN
+ /* switch to little endian, to make later calculations easier */
+ chunk = pg_bswap64(chunk);
+#endif
/*
- * This expression evaluates has the useful property that all bytes in the result word
- * that correspond to non-zero bytes in the original word have
- * the value 0x00, while all bytes corresponding to zero bytes have
- * the value 0x80.
- */
- zero_bytes = ~(((chunk & low_bits) + low_bits) | chunk | low_bits);
-#else
- /*
- * On little endian machines, we can use a slightly faster calculation,
+ * With little-endian representation, we can use this calculation,
* which sets bits in the first byte in the result word
* that corresponds to a zero byte in the original word.
* The rest of the bytes are indeterminate, so cannot be used
- * on big-endian machines unless we resort to a bytewise check.
+ * on big-endian machines without either swapping or a bytewise check.
*/
zero_bytes = haszero64(chunk);
-#endif
if (zero_bytes)
break;
-#ifdef SIM_BE
- hs->accum = pg_bswap64(chunk); /* not needed with real BE, because we won't need the same answer */
-#else
hs->accum = chunk;
-#endif
fasthash_combine(hs);
buf += FH_SIZEOF_ACCUM;
}
@@ -210,14 +196,10 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
/*
* Bytes with set bits will be 0x80, so
* calculate the first occurrence of a zero byte within the input word
- * by counting the number of leading (on BE) or trailing (on LE)
+ * by counting the number of trailing (for LE)
* zeros and dividing the result by 8.
*/
-#ifdef SIM_BE
- remainder = (63 - pg_leftmost_one_pos64(zero_bytes)) / BITS_PER_BYTE;
-#else
remainder = pg_rightmost_one_pos64(zero_bytes) / BITS_PER_BYTE;
-#endif
fasthash_accum(hs, buf, remainder);
buf += remainder;
--
2.43.0
v11-0001-Add-inlineable-incremental-hash-functions-for-in.patchapplication/x-patch; name=v11-0001-Add-inlineable-incremental-hash-functions-for-in.patchDownload
From 291dc5818022d142b3b6cff5b503465f4acc5de9 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Mon, 27 Nov 2023 17:03:38 +0700
Subject: [PATCH v11 1/8] Add inlineable, incremental hash functions for
in-memory use
A number of places hash NUL-termminated strings. Currently, we need
to call strlen first because hash_bytes needs the length. For short
strings the C library call has a large overhead, and strlen calls
show up prominently in profiles.
Per suggestion from Andres Freund, add hash functions with an
incremental interface. Instead of trying to whack around hash_bytes
while maintaining its current behavior on all platforms, we base
this work on fasthash (MIT licensed) which is simple, faster than
hash_bytes for inputs over 12 bytes long, and also passes the hash
function testing suite SMHasher.
The original functions have been reimplemented using our new
incremental interface to validate that this method will still give the
same answer, provided we have the input length ahead of time. Future
work will use these for some existing uses of simplehash and dynahash.
The new functionality lives in a new header hashfn_unstable.h. The
name implies we have the freedom to change things across versions that
would be unacceptable for our other hash functions that are used for
e.g. hash indexes and hash partitioning. As such, these should only
be used for in-memory data structures like hash tables. There is also
no guarantee of being endian-independent.
Reviewed (in an earlier version) by Heikki Linnakangas
Discussion: https://www.postgresql.org/message-id/20231122223432.lywt4yz2bn7tlp27%40awork3.anarazel.de
---
src/include/common/hashfn_unstable.h | 213 +++++++++++++++++++++++++++
src/tools/pgindent/typedefs.list | 1 +
2 files changed, 214 insertions(+)
create mode 100644 src/include/common/hashfn_unstable.h
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
new file mode 100644
index 0000000000..bf1dbee28d
--- /dev/null
+++ b/src/include/common/hashfn_unstable.h
@@ -0,0 +1,213 @@
+/*
+Building blocks for creating fast inlineable hash functions. The
+unstable designation is in contrast to hashfn.h, which cannot break
+compatibility because hashes can be writen to disk and so must produce
+the same hashes between versions.
+
+ *
+ * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group
+ *
+ * src/include/common/hashfn_unstable.c
+ */
+
+#ifndef HASHFN_UNSTABLE_H
+#define HASHFN_UNSTABLE_H
+
+/*
+ * fasthash is a modification of code taken from
+ * https://code.google.com/archive/p/fast-hash/source/default/source
+ * under the terms of the MIT licencse. The original copyright
+ * notice follows:
+ */
+
+/* The MIT License
+
+ Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
+
+ Permission is hereby granted, free of charge, to any person
+ obtaining a copy of this software and associated documentation
+ files (the "Software"), to deal in the Software without
+ restriction, including without limitation the rights to use, copy,
+ modify, merge, publish, distribute, sublicense, and/or sell copies
+ of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+*/
+
+/*
+ * There are two interfaces available. Examples assume a 32-bit hash:
+ *
+ * 1) When the length is known ahead of time, use fasthash32().
+ * 2) When the length is not known, use the incremental interface. To
+ * ensure good results, keep track of the length and pass it to the finalizer:
+
+fasthash_state hs;
+fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0);
+fasthash_accum(&hs, <pointer to a chunk of the input>, <chunk length, up to 8>);
+return fasthash_final32(&hs, <final length>);
+
+*/
+
+
+typedef struct fasthash_state
+{
+ uint64 accum;
+#define FH_SIZEOF_ACCUM sizeof(uint64)
+
+ uint64 hash;
+} fasthash_state;
+
+
+#define FH_UNKNOWN_LENGTH 1
+
+/*
+ * Initialize the hash state.
+ *
+ * "len" is the length of the input, if known ahead of time.
+ * If that is not known, pass FH_UNKNOWN_LENGTH.
+ * "seed" can be zero.
+ */
+static inline void
+fasthash_init(fasthash_state *hs, int len, uint64 seed)
+{
+ memset(hs, 0, sizeof(fasthash_state));
+ hs->hash = seed ^ (len * 0x880355f21e6d1965);
+}
+
+/* Both the finalizer and part of the combining step */
+static inline uint64
+fasthash_mix(uint64 h, uint64 tweak)
+{
+ h ^= (h >> 23) + tweak;
+ h *= 0x2127599bf4325c37;
+ h ^= h >> 47;
+ return h;
+}
+
+static inline void
+fasthash_combine(fasthash_state *hs)
+{
+ hs->hash ^= fasthash_mix(hs->accum, 0);
+ hs->hash *= 0x880355f21e6d1965;
+
+ /* reset hash state for next input */
+ hs->accum = 0;
+}
+
+/* Accumulate up to 8 bytes of input and combine it into the hash */
+static inline void
+fasthash_accum(fasthash_state *hs, const char *k, int len)
+{
+ Assert(hs->accum == 0);
+ Assert(len <= FH_SIZEOF_ACCUM);
+
+ switch (len)
+ {
+ case 8:
+ memcpy(&hs->accum, k, 8);
+ break;
+ case 7:
+ hs->accum |= (uint64) k[6] << 48;
+ /* FALLTHROUGH */
+ case 6:
+ hs->accum |= (uint64) k[5] << 40;
+ /* FALLTHROUGH */
+ case 5:
+ hs->accum |= (uint64) k[4] << 32;
+ /* FALLTHROUGH */
+ case 4:
+ hs->accum |= (uint64) k[3] << 24;
+ /* FALLTHROUGH */
+ case 3:
+ hs->accum |= (uint64) k[2] << 16;
+ /* FALLTHROUGH */
+ case 2:
+ hs->accum |= (uint64) k[1] << 8;
+ /* FALLTHROUGH */
+ case 1:
+ hs->accum |= (uint64) k[0];
+ break;
+ case 0:
+ return;
+ }
+
+ fasthash_combine(hs);
+}
+
+/*
+ * The finalizer
+ *
+ * "tweak" is the input length when the caller doesn't know
+ * the length ahead of time, such as for NUL-terminated
+ * strings, otherwise zero.
+ */
+static inline uint64
+fasthash_final64(fasthash_state *hs, uint64 tweak)
+{
+ return fasthash_mix(hs->hash, tweak);
+}
+
+/*
+ * Reduce a 64-bit hash to a 32-bit hash.
+ *
+ * This provides a bit more additional mixing compared to
+ * just taking the lower 32-bits.
+ */
+static inline uint32
+fasthash_reduce32(uint64 h)
+{
+ /*
+ * The following trick converts the 64-bit hashcode to Fermat residue,
+ * which shall retain information from both the higher and lower parts of
+ * hashcode.
+ */
+ return h - (h >> 32);
+}
+
+static inline uint32
+fasthash_final32(fasthash_state *hs, uint64 tweak)
+{
+ return fasthash_reduce32(fasthash_final64(hs, tweak));
+}
+
+/*
+ * The original fasthash64 function, re-implemented using
+ * the incremental interface.
+ */
+static inline uint64
+fasthash64(const char *k, int len, uint64 seed)
+{
+ fasthash_state hs;
+
+ fasthash_init(&hs, len, seed);
+
+ while (len >= FH_SIZEOF_ACCUM)
+ {
+ fasthash_accum(&hs, k, FH_SIZEOF_ACCUM);
+ k += FH_SIZEOF_ACCUM;
+ len -= FH_SIZEOF_ACCUM;
+ }
+
+ fasthash_accum(&hs, k, len);
+ return fasthash_final64(&hs, 0);
+}
+
+/* Like fasthash64, but returns a 32-bit hash */
+static inline uint64
+fasthash32(const char *k, int len, uint64 seed)
+{
+ return fasthash_reduce32(fasthash64(k, len, seed));
+}
+
+#endif /* HASHFN_UNSTABLE_H */
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index d659adbfd6..4038d07458 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3321,6 +3321,7 @@ exec_thread_arg
execution_state
explain_get_index_name_hook_type
f_smgr
+fasthash_state
fd_set
fe_scram_state
fe_scram_state_enum
--
2.43.0
v11-0003-Use-fasthash-for-the-search-path-cache.patchapplication/x-patch; name=v11-0003-Use-fasthash-for-the-search-path-cache.patchDownload
From e825abb530f2edb8649d6d294ca501b082435eca Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Mon, 18 Dec 2023 11:10:28 +0700
Subject: [PATCH v11 3/8] Use fasthash for the search path cache
This serves to demonstrate the incremental API, allowing inlined
hash calculation without a strlen call. This brings the general case
performance closer to the optimization done in commit a86c61c9ee.
WIP: roleid should be mixed in normally, unless we have
reason to just use it as a seed.
Jeff Davis, with switch to chunked interface by me
Discussion: https://www.postgresql.org/message-id/b40292c99e623defe5eadedab1d438cf51a4107c.camel%40j-davis.com
---
src/backend/catalog/namespace.c | 24 +++++++++++++++++++-----
1 file changed, 19 insertions(+), 5 deletions(-)
diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index 5027efc91d..7fe2fd1fd4 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -41,7 +41,7 @@
#include "catalog/pg_ts_template.h"
#include "catalog/pg_type.h"
#include "commands/dbcommands.h"
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "funcapi.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
@@ -247,11 +247,25 @@ static bool MatchNamedCall(HeapTuple proctup, int nargs, List *argnames,
static inline uint32
spcachekey_hash(SearchPathCacheKey key)
{
- const unsigned char *bytes = (const unsigned char *) key.searchPath;
- int blen = strlen(key.searchPath);
+ const char *const start = key.searchPath;
+ const char *buf = key.searchPath;
+ fasthash_state hs;
- return hash_combine(hash_bytes(bytes, blen),
- hash_uint32(key.roleid));
+ /* WIP: maybe roleid should be mixed in normally */
+ fasthash_init(&hs, FH_UNKNOWN_LENGTH, key.roleid);
+ while (*buf)
+ {
+ int chunk_len = 0;
+
+ while (chunk_len < FH_SIZEOF_ACCUM && buf[chunk_len] != '\0')
+ chunk_len++;
+
+ fasthash_accum(&hs, buf, chunk_len);
+ buf += chunk_len;
+ }
+
+ /* pass the length to tweak the final mix */
+ return fasthash_final32(&hs, buf - start);
}
static inline bool
--
2.43.0
v11-0002-Use-fasthash-for-pgstat_hash_hash_key.patchapplication/x-patch; name=v11-0002-Use-fasthash-for-pgstat_hash_hash_key.patchDownload
From 04afcddc051abc2727cd413e9000ecaba4b38037 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sat, 9 Dec 2023 16:24:56 +0700
Subject: [PATCH v11 2/8] Use fasthash for pgstat_hash_hash_key
Previously this called the 32-bit Murmur finalizer on the three elements,
then joined with hash_combine(). Fasthash is simpler, executes faster
and takes up less binary space. While the collision and bias behavior
were almost certainly fine with the previous coding, now we have
measurements to prove it.
Discussion:
---
src/include/utils/pgstat_internal.h | 9 ++-------
1 file changed, 2 insertions(+), 7 deletions(-)
diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h
index 60fbf9394b..ecc46bef04 100644
--- a/src/include/utils/pgstat_internal.h
+++ b/src/include/utils/pgstat_internal.h
@@ -14,7 +14,7 @@
#define PGSTAT_INTERNAL_H
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "lib/dshash.h"
#include "lib/ilist.h"
#include "pgstat.h"
@@ -777,15 +777,10 @@ static inline uint32
pgstat_hash_hash_key(const void *d, size_t size, void *arg)
{
const PgStat_HashKey *key = (PgStat_HashKey *) d;
- uint32 hash;
Assert(size == sizeof(PgStat_HashKey) && arg == NULL);
- hash = murmurhash32(key->kind);
- hash = hash_combine(hash, murmurhash32(key->dboid));
- hash = hash_combine(hash, murmurhash32(key->objoid));
-
- return hash;
+ return fasthash32((const char *) key, size, 0);
}
/*
--
2.43.0
On Tue, Dec 26, 2023 at 4:01 PM John Naylor <johncnaylorls@gmail.com> wrote:
0001-0003 are same as earlier
0004 takes Jeff's idea and adds in an optimization from NetBSD's
strlen (I said OpenBSD earlier, but it goes back further). I added
stub code to simulate big-endian when requested at compile time, but a
later patch removes it. Since it benched well, I made the extra effort
to generalize it for other callers. After adding to the hash state, it
returns the length so the caller can pass it to the finalizer.
0005 is the benchmark (not for commit) -- I took the parser keyword
list and added enough padding to make every string aligned when the
whole thing is copied to an alloc'd area.Each of the bench_*.sql files named below are just running the
similarly-named function, all with the same argument, e.g. "select *
from bench_pgstat_hash_fh(100000);", so not attached.Strings:
-- strlen + hash_bytes
pgbench -n -T 20 -f bench_hash_bytes.sql -M prepared | grep latency
latency average = 1036.732 ms-- word-at-a-time hashing, with bytewise lookahead
pgbench -n -T 20 -f bench_cstr_unaligned.sql -M prepared | grep latency
latency average = 664.632 ms-- word-at-a-time for both hashing and lookahead (Jeff's aligned
coding plus a technique from NetBSD strlen)
pgbench -n -T 20 -f bench_cstr_aligned.sql -M prepared | grep latency
latency average = 436.701 msSo, the fully optimized aligned case is worth it if it's convenient.
0006 adds a byteswap for big-endian so we can reuse little endian
coding for the lookahead.0007 - I also wanted to put numbers to 0003 (pgstat hash). While the
motivation for that was cleanup, I had a hunch it would shave cycles
and take up less binary space. It does on both accounts:-- 3x murmur + hash_combine
pgbench -n -T 20 -f bench_pgstat_orig.sql -M prepared | grep latency
latency average = 333.540 ms-- fasthash32 (simple call, no state setup and final needed for a single value)
pgbench -n -T 20 -f bench_pgstat_fh.sql -M prepared | grep latency
latency average = 277.591 ms0008 - We can optimize the tail load when it's 4 bytes -- to save
loads, shifts, and OR's. My compiler can't figure this out for the
pgstat hash, with its fixed 4-byte tail. It's pretty simple and should
help other cases:pgbench -n -T 20 -f bench_pgstat_fh.sql -M prepared | grep latency
latency average = 226.113 ms
--- /dev/null
+++ b/contrib/bench_hash/bench_hash.c
@@ -0,0 +1,103 @@
+/*-------------------------------------------------------------------------
+ *
+ * bench_hash.c
+ *
+ * Copyright (c) 2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/test/modules/bench_hash/bench_hash.c
+ *
+ *-------------------------------------------------------------------------
+ */
you added this module to contrib module (root/contrib), your intention
(i guess) is to add in root/src/test/modules.
later I saw "0005 is the benchmark (not for commit)".
--- /dev/null
+++ b/src/include/common/hashfn_unstable.h
@@ -0,0 +1,213 @@
+/*
+Building blocks for creating fast inlineable hash functions. The
+unstable designation is in contrast to hashfn.h, which cannot break
+compatibility because hashes can be writen to disk and so must produce
+the same hashes between versions.
+
+ *
+ * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group
+ *
+ * src/include/common/hashfn_unstable.c
+ */
+
here should be "src/include/common/hashfn_unstable.h". typo: `writen`
In pgbench, I use --no-vacuum --time=20 -M prepared
My local computer is slow. but here is the test results:
select * from bench_cstring_hash_aligned(100000); 7318.893 ms
select * from bench_cstring_hash_unaligned(100000); 10383.173 ms
select * from bench_pgstat_hash(100000); 4474.989 ms
select * from bench_pgstat_hash_fh(100000); 9192.245 ms
select * from bench_string_hash(100000); 2048.008 ms
On Tue, Jan 2, 2024 at 6:56 AM jian he <jian.universality@gmail.com> wrote:
My local computer is slow. but here is the test results:
select * from bench_cstring_hash_aligned(100000); 7318.893 ms
select * from bench_cstring_hash_unaligned(100000); 10383.173 ms
select * from bench_pgstat_hash(100000); 4474.989 ms
select * from bench_pgstat_hash_fh(100000); 9192.245 ms
select * from bench_string_hash(100000); 2048.008 ms
This presents a 2x to 5x slowdown, so I'm skeptical this is typical --
what kind of platform is. For starters, what CPU and compiler?
On Wed, Jan 3, 2024 at 10:12 PM John Naylor <johncnaylorls@gmail.com> wrote:
On Tue, Jan 2, 2024 at 6:56 AM jian he <jian.universality@gmail.com> wrote:
My local computer is slow. but here is the test results:
select * from bench_cstring_hash_aligned(100000); 7318.893 ms
select * from bench_cstring_hash_unaligned(100000); 10383.173 ms
select * from bench_pgstat_hash(100000); 4474.989 ms
select * from bench_pgstat_hash_fh(100000); 9192.245 ms
select * from bench_string_hash(100000); 2048.008 msThis presents a 2x to 5x slowdown, so I'm skeptical this is typical --
what kind of platform is. For starters, what CPU and compiler?
I still cannot git apply your patch cleanly. in
http://cfbot.cputube.org/ i cannot find your patch.
( so, it might be that I test based on incomplete information).
but only hashfn_unstable.h influences bench_hash/bench_hash.c.
so I attached the whole patch that I had git applied, that is the
changes i applied for the following tests.
how I test using pgbench:
pgbench --no-vacuum --time=20 --file
/home/jian/tmp/bench_cstring_hash_aligned.sql -M prepared | grep
latency
The following is tested with another machine, also listed machine spec below.
I tested 3 times, the results is very similar as following:
select * from bench_cstring_hash_aligned(100000); 4705.686 ms
select * from bench_cstring_hash_unaligned(100000); 6835.753 ms
select * from bench_pgstat_hash(100000); 2678.978 ms
select * from bench_pgstat_hash_fh(100000); 6199.017 ms
select * from bench_string_hash(100000); 847.699 ms
src6=# select version();
version
--------------------------------------------------------------------
PostgreSQL 17devel on x86_64-linux, compiled by gcc-11.4.0, 64-bit
(1 row)
jian@jian:~/Desktop/pg_src/src6/postgres$ gcc --version
gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
Copyright (C) 2021 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
lscpu:
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Address sizes: 46 bits physical, 48 bits virtual
Byte Order: Little Endian
CPU(s): 20
On-line CPU(s) list: 0-19
Vendor ID: GenuineIntel
Model name: Intel(R) Core(TM) i5-14600K
CPU family: 6
Model: 183
Thread(s) per core: 2
Core(s) per socket: 14
Socket(s): 1
Stepping: 1
CPU max MHz: 5300.0000
CPU min MHz: 800.0000
BogoMIPS: 6988.80
Flags: fpu vme de pse tsc msr pae mce cx8 apic sep
mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht
tm pbe syscall nx pdpe1gb rdtscp l
m constant_tsc art arch_perfmon pebs bts
rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq
pni pclmulqdq dtes64 monitor ds_cpl vm
x smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm
sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx
f16c rdrand lahf_lm abm 3dnowprefetc
h cpuid_fault ssbd ibrs ibpb stibp
ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase
tsc_adjust bmi1 avx2 smep bmi2 erms invpcid
rdseed adx smap clflushopt clwb intel_pt
sha_ni xsaveopt xsavec xgetbv1 xsaves split_lock_detect avx_vnni
dtherm ida arat pln pts hwp hwp_notify hw
p_act_window hwp_epp hwp_pkg_req hfi umip pku
ospke waitpkg gfni vaes vpclmulqdq tme rdpid movdiri movdir64b fsrm
md_clear serialize pconfig arch_l
br ibt flush_l1d arch_capabilities
Virtualization features:
Virtualization: VT-x
Caches (sum of all):
L1d: 544 KiB (14 instances)
L1i: 704 KiB (14 instances)
L2: 20 MiB (8 instances)
L3: 24 MiB (1 instance)
NUMA:
NUMA node(s): 1
NUMA node0 CPU(s): 0-19
Vulnerabilities:
Gather data sampling: Not affected
Itlb multihit: Not affected
L1tf: Not affected
Mds: Not affected
Meltdown: Not affected
Mmio stale data: Not affected
Retbleed: Not affected
Spec rstack overflow: Not affected
Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl
Spectre v1: Mitigation; usercopy/swapgs barriers and
__user pointer sanitization
Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional,
RSB filling, PBRSB-eIBRS SW sequence
Srbds: Not affected
Tsx async abort: Not affected
jian@jian:~/Desktop/pg_src/src6/postgres$ git log
commit bbbf8cd54a05ad5c92e79c96133f219e80fad77c (HEAD -> master)
Author: jian he <jian.universality@gmail.com>
Date: Thu Jan 4 10:32:39 2024 +0800
bench_hash contrib module
commit c5385929593dd8499cfb5d85ac322e8ee1819fd4
Author: Peter Eisentraut <peter@eisentraut.org>
Date: Fri Dec 29 18:01:53 2023 +0100
Make all Perl warnings fatal
Attachments:
v12-0001-bench_hash-contrib-module.patchtext/x-patch; charset=US-ASCII; name=v12-0001-bench_hash-contrib-module.patchDownload
From bbbf8cd54a05ad5c92e79c96133f219e80fad77c Mon Sep 17 00:00:00 2001
From: jian he <jian.universality@gmail.com>
Date: Thu, 4 Jan 2024 10:32:39 +0800
Subject: [PATCH v12 1/1] bench_hash contrib module
---
.vscode/settings.json | 9 +
contrib/bench_hash/Makefile | 23 +
contrib/bench_hash/aligned_keywords.h | 991 +++++++++++++++++++++++++
contrib/bench_hash/bench_hash--1.0.sql | 30 +
contrib/bench_hash/bench_hash.c | 169 +++++
contrib/bench_hash/bench_hash.control | 5 +
contrib/bench_hash/meson.build | 19 +
contrib/meson.build | 1 +
src/backend/catalog/namespace.c | 28 +-
src/include/common/hashfn_unstable.h | 309 ++++++++
src/include/utils/pgstat_internal.h | 9 +-
src/tools/pgindent/typedefs.list | 1 +
12 files changed, 1583 insertions(+), 11 deletions(-)
create mode 100644 .vscode/settings.json
create mode 100644 contrib/bench_hash/Makefile
create mode 100644 contrib/bench_hash/aligned_keywords.h
create mode 100644 contrib/bench_hash/bench_hash--1.0.sql
create mode 100644 contrib/bench_hash/bench_hash.c
create mode 100644 contrib/bench_hash/bench_hash.control
create mode 100644 contrib/bench_hash/meson.build
create mode 100644 src/include/common/hashfn_unstable.h
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 00000000..c83feba3
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,9 @@
+{
+ "files.associations": {
+ "pg_bitutils.h": "c",
+ "hashfn_unstable.h": "c",
+ "hashfn.h": "c",
+ "memutils.h": "c",
+ "pgstat_internal.h": "c"
+ }
+}
\ No newline at end of file
diff --git a/contrib/bench_hash/Makefile b/contrib/bench_hash/Makefile
new file mode 100644
index 00000000..53270803
--- /dev/null
+++ b/contrib/bench_hash/Makefile
@@ -0,0 +1,23 @@
+# src/test/modules/test_parser/Makefile
+
+MODULE_big = test_parser
+OBJS = \
+ $(WIN32RES) \
+ test_parser.o
+PGFILEDESC = "test_parser - example of a custom parser for full-text search"
+
+EXTENSION = test_parser
+DATA = test_parser--1.0.sql
+
+REGRESS = test_parser
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = src/test/modules/test_parser
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/bench_hash/aligned_keywords.h b/contrib/bench_hash/aligned_keywords.h
new file mode 100644
index 00000000..c2bd67c8
--- /dev/null
+++ b/contrib/bench_hash/aligned_keywords.h
@@ -0,0 +1,991 @@
+/* created by copying from kwlist_d.h with this patch:
+
+--- a/src/tools/gen_keywordlist.pl
++++ b/src/tools/gen_keywordlist.pl
+@@ -97,7 +97,9 @@ while (<$kif>)
+ {
+ if (/^PG_KEYWORD\("(\w+)"/)
+ {
+- push @keywords, $1;
++ my $len = length($1) + 1;
++ my $aligned = $1 . "\\0" . "_" x ( ($len % 8) == 0 ? 0 : (8-($len % 8)) );
++ push @keywords, $aligned;
+ }
+ }
+
+@@ -127,7 +129,7 @@ for my $i (0 .. $#keywords - 1)
+ # Emit the string containing all the keywords.
+
+ printf $kwdef qq|static const char %s_kw_string[] =\n\t"|, $varname;
+-print $kwdef join qq|\\0"\n\t"|, @keywords;
++print $kwdef join qq|"\n\t"|, @keywords;
+ print $kwdef qq|";\n\n|;
+
+ # Emit an array of numerical offsets which will be used to index into the
+@@ -145,7 +147,7 @@ foreach my $name (@keywords)
+
+ # Calculate the cumulative offset of the next keyword,
+ # taking into account the null terminator.
+- $offset += $this_length + 1;
++ $offset += $this_length -1;
+
+ # Update max keyword length.
+ $max_len = $this_length if $max_len < $this_length;
+
+*/
+
+
+static const char aligned_words[] =
+ "abort\0__"
+ "absent\0_"
+ "absolute\0_______"
+ "access\0_"
+ "action\0_"
+ "add\0____"
+ "admin\0__"
+ "after\0__"
+ "aggregate\0______"
+ "all\0____"
+ "also\0___"
+ "alter\0__"
+ "always\0_"
+ "analyse\0"
+ "analyze\0"
+ "and\0____"
+ "any\0____"
+ "array\0__"
+ "as\0_____"
+ "asc\0____"
+ "asensitive\0_____"
+ "assertion\0______"
+ "assignment\0_____"
+ "asymmetric\0_____"
+ "at\0_____"
+ "atomic\0_"
+ "attach\0_"
+ "attribute\0______"
+ "authorization\0__"
+ "backward\0_______"
+ "before\0_"
+ "begin\0__"
+ "between\0"
+ "bigint\0_"
+ "binary\0_"
+ "bit\0____"
+ "boolean\0"
+ "both\0___"
+ "breadth\0"
+ "by\0_____"
+ "cache\0__"
+ "call\0___"
+ "called\0_"
+ "cascade\0"
+ "cascaded\0_______"
+ "case\0___"
+ "cast\0___"
+ "catalog\0"
+ "chain\0__"
+ "char\0___"
+ "character\0______"
+ "characteristics\0"
+ "check\0__"
+ "checkpoint\0_____"
+ "class\0__"
+ "close\0__"
+ "cluster\0"
+ "coalesce\0_______"
+ "collate\0"
+ "collation\0______"
+ "column\0_"
+ "columns\0"
+ "comment\0"
+ "comments\0_______"
+ "commit\0_"
+ "committed\0______"
+ "compression\0____"
+ "concurrently\0___"
+ "configuration\0__"
+ "conflict\0_______"
+ "connection\0_____"
+ "constraint\0_____"
+ "constraints\0____"
+ "content\0"
+ "continue\0_______"
+ "conversion\0_____"
+ "copy\0___"
+ "cost\0___"
+ "create\0_"
+ "cross\0__"
+ "csv\0____"
+ "cube\0___"
+ "current\0"
+ "current_catalog\0"
+ "current_date\0___"
+ "current_role\0___"
+ "current_schema\0_"
+ "current_time\0___"
+ "current_timestamp\0______"
+ "current_user\0___"
+ "cursor\0_"
+ "cycle\0__"
+ "data\0___"
+ "database\0_______"
+ "day\0____"
+ "deallocate\0_____"
+ "dec\0____"
+ "decimal\0"
+ "declare\0"
+ "default\0"
+ "defaults\0_______"
+ "deferrable\0_____"
+ "deferred\0_______"
+ "definer\0"
+ "delete\0_"
+ "delimiter\0______"
+ "delimiters\0_____"
+ "depends\0"
+ "depth\0__"
+ "desc\0___"
+ "detach\0_"
+ "dictionary\0_____"
+ "disable\0"
+ "discard\0"
+ "distinct\0_______"
+ "do\0_____"
+ "document\0_______"
+ "domain\0_"
+ "double\0_"
+ "drop\0___"
+ "each\0___"
+ "else\0___"
+ "enable\0_"
+ "encoding\0_______"
+ "encrypted\0______"
+ "end\0____"
+ "enum\0___"
+ "escape\0_"
+ "event\0__"
+ "except\0_"
+ "exclude\0"
+ "excluding\0______"
+ "exclusive\0______"
+ "execute\0"
+ "exists\0_"
+ "explain\0"
+ "expression\0_____"
+ "extension\0______"
+ "external\0_______"
+ "extract\0"
+ "false\0__"
+ "family\0_"
+ "fetch\0__"
+ "filter\0_"
+ "finalize\0_______"
+ "first\0__"
+ "float\0__"
+ "following\0______"
+ "for\0____"
+ "force\0__"
+ "foreign\0"
+ "format\0_"
+ "forward\0"
+ "freeze\0_"
+ "from\0___"
+ "full\0___"
+ "function\0_______"
+ "functions\0______"
+ "generated\0______"
+ "global\0_"
+ "grant\0__"
+ "granted\0"
+ "greatest\0_______"
+ "group\0__"
+ "grouping\0_______"
+ "groups\0_"
+ "handler\0"
+ "having\0_"
+ "header\0_"
+ "hold\0___"
+ "hour\0___"
+ "identity\0_______"
+ "if\0_____"
+ "ilike\0__"
+ "immediate\0______"
+ "immutable\0______"
+ "implicit\0_______"
+ "import\0_"
+ "in\0_____"
+ "include\0"
+ "including\0______"
+ "increment\0______"
+ "indent\0_"
+ "index\0__"
+ "indexes\0"
+ "inherit\0"
+ "inherits\0_______"
+ "initially\0______"
+ "inline\0_"
+ "inner\0__"
+ "inout\0__"
+ "input\0__"
+ "insensitive\0____"
+ "insert\0_"
+ "instead\0"
+ "int\0____"
+ "integer\0"
+ "intersect\0______"
+ "interval\0_______"
+ "into\0___"
+ "invoker\0"
+ "is\0_____"
+ "isnull\0_"
+ "isolation\0______"
+ "join\0___"
+ "json\0___"
+ "json_array\0_____"
+ "json_arrayagg\0__"
+ "json_object\0____"
+ "json_objectagg\0_"
+ "json_scalar\0____"
+ "json_serialize\0_"
+ "key\0____"
+ "keys\0___"
+ "label\0__"
+ "language\0_______"
+ "large\0__"
+ "last\0___"
+ "lateral\0"
+ "leading\0"
+ "leakproof\0______"
+ "least\0__"
+ "left\0___"
+ "level\0__"
+ "like\0___"
+ "limit\0__"
+ "listen\0_"
+ "load\0___"
+ "local\0__"
+ "localtime\0______"
+ "localtimestamp\0_"
+ "location\0_______"
+ "lock\0___"
+ "locked\0_"
+ "logged\0_"
+ "mapping\0"
+ "match\0__"
+ "matched\0"
+ "materialized\0___"
+ "maxvalue\0_______"
+ "merge\0__"
+ "method\0_"
+ "minute\0_"
+ "minvalue\0_______"
+ "mode\0___"
+ "month\0__"
+ "move\0___"
+ "name\0___"
+ "names\0__"
+ "national\0_______"
+ "natural\0"
+ "nchar\0__"
+ "new\0____"
+ "next\0___"
+ "nfc\0____"
+ "nfd\0____"
+ "nfkc\0___"
+ "nfkd\0___"
+ "no\0_____"
+ "none\0___"
+ "normalize\0______"
+ "normalized\0_____"
+ "not\0____"
+ "nothing\0"
+ "notify\0_"
+ "notnull\0"
+ "nowait\0_"
+ "null\0___"
+ "nullif\0_"
+ "nulls\0__"
+ "numeric\0"
+ "object\0_"
+ "of\0_____"
+ "off\0____"
+ "offset\0_"
+ "oids\0___"
+ "old\0____"
+ "on\0_____"
+ "only\0___"
+ "operator\0_______"
+ "option\0_"
+ "options\0"
+ "or\0_____"
+ "order\0__"
+ "ordinality\0_____"
+ "others\0_"
+ "out\0____"
+ "outer\0__"
+ "over\0___"
+ "overlaps\0_______"
+ "overlay\0"
+ "overriding\0_____"
+ "owned\0__"
+ "owner\0__"
+ "parallel\0_______"
+ "parameter\0______"
+ "parser\0_"
+ "partial\0"
+ "partition\0______"
+ "passing\0"
+ "password\0_______"
+ "placing\0"
+ "plans\0__"
+ "policy\0_"
+ "position\0_______"
+ "preceding\0______"
+ "precision\0______"
+ "prepare\0"
+ "prepared\0_______"
+ "preserve\0_______"
+ "primary\0"
+ "prior\0__"
+ "privileges\0_____"
+ "procedural\0_____"
+ "procedure\0______"
+ "procedures\0_____"
+ "program\0"
+ "publication\0____"
+ "quote\0__"
+ "range\0__"
+ "read\0___"
+ "real\0___"
+ "reassign\0_______"
+ "recheck\0"
+ "recursive\0______"
+ "ref\0____"
+ "references\0_____"
+ "referencing\0____"
+ "refresh\0"
+ "reindex\0"
+ "relative\0_______"
+ "release\0"
+ "rename\0_"
+ "repeatable\0_____"
+ "replace\0"
+ "replica\0"
+ "reset\0__"
+ "restart\0"
+ "restrict\0_______"
+ "return\0_"
+ "returning\0______"
+ "returns\0"
+ "revoke\0_"
+ "right\0__"
+ "role\0___"
+ "rollback\0_______"
+ "rollup\0_"
+ "routine\0"
+ "routines\0_______"
+ "row\0____"
+ "rows\0___"
+ "rule\0___"
+ "savepoint\0______"
+ "scalar\0_"
+ "schema\0_"
+ "schemas\0"
+ "scroll\0_"
+ "search\0_"
+ "second\0_"
+ "security\0_______"
+ "select\0_"
+ "sequence\0_______"
+ "sequences\0______"
+ "serializable\0___"
+ "server\0_"
+ "session\0"
+ "session_user\0___"
+ "set\0____"
+ "setof\0__"
+ "sets\0___"
+ "share\0__"
+ "show\0___"
+ "similar\0"
+ "simple\0_"
+ "skip\0___"
+ "smallint\0_______"
+ "snapshot\0_______"
+ "some\0___"
+ "sql\0____"
+ "stable\0_"
+ "standalone\0_____"
+ "start\0__"
+ "statement\0______"
+ "statistics\0_____"
+ "stdin\0__"
+ "stdout\0_"
+ "storage\0"
+ "stored\0_"
+ "strict\0_"
+ "strip\0__"
+ "subscription\0___"
+ "substring\0______"
+ "support\0"
+ "symmetric\0______"
+ "sysid\0__"
+ "system\0_"
+ "system_user\0____"
+ "table\0__"
+ "tables\0_"
+ "tablesample\0____"
+ "tablespace\0_____"
+ "temp\0___"
+ "template\0_______"
+ "temporary\0______"
+ "text\0___"
+ "then\0___"
+ "ties\0___"
+ "time\0___"
+ "timestamp\0______"
+ "to\0_____"
+ "trailing\0_______"
+ "transaction\0____"
+ "transform\0______"
+ "treat\0__"
+ "trigger\0"
+ "trim\0___"
+ "true\0___"
+ "truncate\0_______"
+ "trusted\0"
+ "type\0___"
+ "types\0__"
+ "uescape\0"
+ "unbounded\0______"
+ "uncommitted\0____"
+ "unencrypted\0____"
+ "union\0__"
+ "unique\0_"
+ "unknown\0"
+ "unlisten\0_______"
+ "unlogged\0_______"
+ "until\0__"
+ "update\0_"
+ "user\0___"
+ "using\0__"
+ "vacuum\0_"
+ "valid\0__"
+ "validate\0_______"
+ "validator\0______"
+ "value\0__"
+ "values\0_"
+ "varchar\0"
+ "variadic\0_______"
+ "varying\0"
+ "verbose\0"
+ "version\0"
+ "view\0___"
+ "views\0__"
+ "volatile\0_______"
+ "when\0___"
+ "where\0__"
+ "whitespace\0_____"
+ "window\0_"
+ "with\0___"
+ "within\0_"
+ "without\0"
+ "work\0___"
+ "wrapper\0"
+ "write\0__"
+ "xml\0____"
+ "xmlattributes\0__"
+ "xmlconcat\0______"
+ "xmlelement\0_____"
+ "xmlexists\0______"
+ "xmlforest\0______"
+ "xmlnamespaces\0__"
+ "xmlparse\0_______"
+ "xmlpi\0__"
+ "xmlroot\0"
+ "xmlserialize\0___"
+ "xmltable\0_______"
+ "year\0___"
+ "yes\0____"
+ "zone\0___";
+
+static const uint16 word_offsets[] = {
+ 0,
+ 8,
+ 16,
+ 32,
+ 40,
+ 48,
+ 56,
+ 64,
+ 72,
+ 88,
+ 96,
+ 104,
+ 112,
+ 120,
+ 128,
+ 136,
+ 144,
+ 152,
+ 160,
+ 168,
+ 176,
+ 192,
+ 208,
+ 224,
+ 240,
+ 248,
+ 256,
+ 264,
+ 280,
+ 296,
+ 312,
+ 320,
+ 328,
+ 336,
+ 344,
+ 352,
+ 360,
+ 368,
+ 376,
+ 384,
+ 392,
+ 400,
+ 408,
+ 416,
+ 424,
+ 440,
+ 448,
+ 456,
+ 464,
+ 472,
+ 480,
+ 496,
+ 512,
+ 520,
+ 536,
+ 544,
+ 552,
+ 560,
+ 576,
+ 584,
+ 600,
+ 608,
+ 616,
+ 624,
+ 640,
+ 648,
+ 664,
+ 680,
+ 696,
+ 712,
+ 728,
+ 744,
+ 760,
+ 776,
+ 784,
+ 800,
+ 816,
+ 824,
+ 832,
+ 840,
+ 848,
+ 856,
+ 864,
+ 872,
+ 888,
+ 904,
+ 920,
+ 936,
+ 952,
+ 976,
+ 992,
+ 1000,
+ 1008,
+ 1016,
+ 1032,
+ 1040,
+ 1056,
+ 1064,
+ 1072,
+ 1080,
+ 1088,
+ 1104,
+ 1120,
+ 1136,
+ 1144,
+ 1152,
+ 1168,
+ 1184,
+ 1192,
+ 1200,
+ 1208,
+ 1216,
+ 1232,
+ 1240,
+ 1248,
+ 1264,
+ 1272,
+ 1288,
+ 1296,
+ 1304,
+ 1312,
+ 1320,
+ 1328,
+ 1336,
+ 1352,
+ 1368,
+ 1376,
+ 1384,
+ 1392,
+ 1400,
+ 1408,
+ 1416,
+ 1432,
+ 1448,
+ 1456,
+ 1464,
+ 1472,
+ 1488,
+ 1504,
+ 1520,
+ 1528,
+ 1536,
+ 1544,
+ 1552,
+ 1560,
+ 1576,
+ 1584,
+ 1592,
+ 1608,
+ 1616,
+ 1624,
+ 1632,
+ 1640,
+ 1648,
+ 1656,
+ 1664,
+ 1672,
+ 1688,
+ 1704,
+ 1720,
+ 1728,
+ 1736,
+ 1744,
+ 1760,
+ 1768,
+ 1784,
+ 1792,
+ 1800,
+ 1808,
+ 1816,
+ 1824,
+ 1832,
+ 1848,
+ 1856,
+ 1864,
+ 1880,
+ 1896,
+ 1912,
+ 1920,
+ 1928,
+ 1936,
+ 1952,
+ 1968,
+ 1976,
+ 1984,
+ 1992,
+ 2000,
+ 2016,
+ 2032,
+ 2040,
+ 2048,
+ 2056,
+ 2064,
+ 2080,
+ 2088,
+ 2096,
+ 2104,
+ 2112,
+ 2128,
+ 2144,
+ 2152,
+ 2160,
+ 2168,
+ 2176,
+ 2192,
+ 2200,
+ 2208,
+ 2224,
+ 2240,
+ 2256,
+ 2272,
+ 2288,
+ 2304,
+ 2312,
+ 2320,
+ 2328,
+ 2344,
+ 2352,
+ 2360,
+ 2368,
+ 2376,
+ 2392,
+ 2400,
+ 2408,
+ 2416,
+ 2424,
+ 2432,
+ 2440,
+ 2448,
+ 2456,
+ 2472,
+ 2488,
+ 2504,
+ 2512,
+ 2520,
+ 2528,
+ 2536,
+ 2544,
+ 2552,
+ 2568,
+ 2584,
+ 2592,
+ 2600,
+ 2608,
+ 2624,
+ 2632,
+ 2640,
+ 2648,
+ 2656,
+ 2664,
+ 2680,
+ 2688,
+ 2696,
+ 2704,
+ 2712,
+ 2720,
+ 2728,
+ 2736,
+ 2744,
+ 2752,
+ 2760,
+ 2776,
+ 2792,
+ 2800,
+ 2808,
+ 2816,
+ 2824,
+ 2832,
+ 2840,
+ 2848,
+ 2856,
+ 2864,
+ 2872,
+ 2880,
+ 2888,
+ 2896,
+ 2904,
+ 2912,
+ 2920,
+ 2928,
+ 2944,
+ 2952,
+ 2960,
+ 2968,
+ 2976,
+ 2992,
+ 3000,
+ 3008,
+ 3016,
+ 3024,
+ 3040,
+ 3048,
+ 3064,
+ 3072,
+ 3080,
+ 3096,
+ 3112,
+ 3120,
+ 3128,
+ 3144,
+ 3152,
+ 3168,
+ 3176,
+ 3184,
+ 3192,
+ 3208,
+ 3224,
+ 3240,
+ 3248,
+ 3264,
+ 3280,
+ 3288,
+ 3296,
+ 3312,
+ 3328,
+ 3344,
+ 3360,
+ 3368,
+ 3384,
+ 3392,
+ 3400,
+ 3408,
+ 3416,
+ 3432,
+ 3440,
+ 3456,
+ 3464,
+ 3480,
+ 3496,
+ 3504,
+ 3512,
+ 3528,
+ 3536,
+ 3544,
+ 3560,
+ 3568,
+ 3576,
+ 3584,
+ 3592,
+ 3608,
+ 3616,
+ 3632,
+ 3640,
+ 3648,
+ 3656,
+ 3664,
+ 3680,
+ 3688,
+ 3696,
+ 3712,
+ 3720,
+ 3728,
+ 3736,
+ 3752,
+ 3760,
+ 3768,
+ 3776,
+ 3784,
+ 3792,
+ 3800,
+ 3816,
+ 3824,
+ 3840,
+ 3856,
+ 3872,
+ 3880,
+ 3888,
+ 3904,
+ 3912,
+ 3920,
+ 3928,
+ 3936,
+ 3944,
+ 3952,
+ 3960,
+ 3968,
+ 3984,
+ 4000,
+ 4008,
+ 4016,
+ 4024,
+ 4040,
+ 4048,
+ 4064,
+ 4080,
+ 4088,
+ 4096,
+ 4104,
+ 4112,
+ 4120,
+ 4128,
+ 4144,
+ 4160,
+ 4168,
+ 4184,
+ 4192,
+ 4200,
+ 4216,
+ 4224,
+ 4232,
+ 4248,
+ 4264,
+ 4272,
+ 4288,
+ 4304,
+ 4312,
+ 4320,
+ 4328,
+ 4336,
+ 4352,
+ 4360,
+ 4376,
+ 4392,
+ 4408,
+ 4416,
+ 4424,
+ 4432,
+ 4440,
+ 4456,
+ 4464,
+ 4472,
+ 4480,
+ 4488,
+ 4504,
+ 4520,
+ 4536,
+ 4544,
+ 4552,
+ 4560,
+ 4576,
+ 4592,
+ 4600,
+ 4608,
+ 4616,
+ 4624,
+ 4632,
+ 4640,
+ 4656,
+ 4672,
+ 4680,
+ 4688,
+ 4696,
+ 4712,
+ 4720,
+ 4728,
+ 4736,
+ 4744,
+ 4752,
+ 4768,
+ 4776,
+ 4784,
+ 4800,
+ 4808,
+ 4816,
+ 4824,
+ 4832,
+ 4840,
+ 4848,
+ 4856,
+ 4864,
+ 4880,
+ 4896,
+ 4912,
+ 4928,
+ 4944,
+ 4960,
+ 4976,
+ 4984,
+ 4992,
+ 5008,
+ 5024,
+ 5032,
+ 5040,
+};
+
+
+#define SCANKEYWORDS_NUM_KEYWORDS 473
+
diff --git a/contrib/bench_hash/bench_hash--1.0.sql b/contrib/bench_hash/bench_hash--1.0.sql
new file mode 100644
index 00000000..43ce946b
--- /dev/null
+++ b/contrib/bench_hash/bench_hash--1.0.sql
@@ -0,0 +1,30 @@
+/* src/test/modules/bench_hash/bench_hash--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION bench_hash" to load this file. \quit
+
+CREATE FUNCTION bench_string_hash(int4)
+RETURNS int
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
+
+
+CREATE FUNCTION bench_cstring_hash_unaligned(int4)
+RETURNS int
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION bench_cstring_hash_aligned(int4)
+RETURNS int
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION bench_pgstat_hash(int4)
+RETURNS int
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION bench_pgstat_hash_FH(int4)
+RETURNS int
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
diff --git a/contrib/bench_hash/bench_hash.c b/contrib/bench_hash/bench_hash.c
new file mode 100644
index 00000000..017cf333
--- /dev/null
+++ b/contrib/bench_hash/bench_hash.c
@@ -0,0 +1,169 @@
+/*-------------------------------------------------------------------------
+ *
+ * bench_hash.c
+ *
+ * Copyright (c) 2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/test/modules/bench_hash/bench_hash.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "fmgr.h"
+
+PG_MODULE_MAGIC;
+
+#include "aligned_keywords.h"
+
+#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
+#include "miscadmin.h"
+#include "utils/memutils.h"
+#include "utils/pgstat_internal.h"
+
+
+PG_FUNCTION_INFO_V1(bench_string_hash);
+Datum
+bench_string_hash(PG_FUNCTION_ARGS)
+{
+ int32 count = PG_GETARG_INT32(0);
+ uint32 hash = 0;
+
+ while (count-- > 0)
+ {
+ for (int i=0; i< SCANKEYWORDS_NUM_KEYWORDS; i++)
+ {
+ int idx = word_offsets[i];
+ int s_len = strlen(&aligned_words[idx]);
+ hash += hash_bytes((const unsigned char *) &aligned_words[idx], s_len);
+ }
+ CHECK_FOR_INTERRUPTS();
+ }
+
+ PG_RETURN_INT32(hash);
+}
+
+
+PG_FUNCTION_INFO_V1(bench_cstring_hash_unaligned);
+Datum
+bench_cstring_hash_unaligned(PG_FUNCTION_ARGS)
+{
+ int32 count = PG_GETARG_INT32(0);
+ uint32 hash = 0;
+
+ char* p = (char*) palloc(5048);
+ memcpy(p, aligned_words, 5048);
+
+ while (count-- > 0)
+ {
+ for (int i=0; i< SCANKEYWORDS_NUM_KEYWORDS; i++)
+ {
+ int idx = word_offsets[i];
+ int s_len;
+ fasthash_state hs;
+
+ fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0);
+ s_len = fasthash_accum_cstring_unaligned(&hs, &p[idx]);
+ hash += fasthash_final32(&hs, s_len);
+ }
+ CHECK_FOR_INTERRUPTS();
+ }
+
+ PG_RETURN_INT32(hash);
+}
+
+
+PG_FUNCTION_INFO_V1(bench_cstring_hash_aligned);
+Datum
+bench_cstring_hash_aligned(PG_FUNCTION_ARGS)
+{
+ int32 count = PG_GETARG_INT32(0);
+ uint32 hash = 0;
+
+ char* p = (char*) palloc(5048);
+ memcpy(p, aligned_words, 5048);
+
+ while (count-- > 0)
+ {
+ for (int i=0; i< SCANKEYWORDS_NUM_KEYWORDS; i++)
+ {
+ int idx = word_offsets[i];
+ int s_len;
+ fasthash_state hs;
+
+ fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0);
+ s_len = fasthash_accum_cstring_aligned(&hs, &p[idx]);
+ hash += fasthash_final32(&hs, s_len);
+ }
+ CHECK_FOR_INTERRUPTS();
+ }
+
+ PG_RETURN_INT32(hash);
+}
+
+static inline uint32
+pgstat_hash_hash_key_orig(const void *d, size_t size, void *arg)
+{
+ const PgStat_HashKey *key = (PgStat_HashKey *) d;
+ uint32 hash;
+
+ Assert(size == sizeof(PgStat_HashKey) && arg == NULL);
+
+ hash = murmurhash32(key->kind);
+ hash = hash_combine(hash, murmurhash32(key->dboid));
+ hash = hash_combine(hash, murmurhash32(key->objoid));
+
+ return hash;
+}
+
+static inline uint32
+pgstat_hash_hash_key_FH(const void *d, size_t size, void *arg)
+{
+ const PgStat_HashKey *key = (PgStat_HashKey *) d;
+
+ Assert(size == sizeof(PgStat_HashKey) && arg == NULL);
+
+ return fasthash32((const char *) key, size, 0);
+}
+
+PG_FUNCTION_INFO_V1(bench_pgstat_hash);
+Datum
+bench_pgstat_hash(PG_FUNCTION_ARGS)
+{
+ int32 count = PG_GETARG_INT32(0);
+ uint32 hash = 0;
+
+ while (count-- > 0)
+ {
+ for (int i=0; i< SCANKEYWORDS_NUM_KEYWORDS - 3; i++)
+ {
+ int idx = word_offsets[i];
+ hash += pgstat_hash_hash_key_orig((const void *) &aligned_words[idx], sizeof(PgStat_HashKey), NULL);
+ }
+ CHECK_FOR_INTERRUPTS();
+ }
+
+ PG_RETURN_INT32(hash);
+}
+
+PG_FUNCTION_INFO_V1(bench_pgstat_hash_fh);
+Datum
+bench_pgstat_hash_fh(PG_FUNCTION_ARGS)
+{
+ int32 count = PG_GETARG_INT32(0);
+ uint32 hash = 0;
+
+ while (count-- > 0)
+ {
+ for (int i=0; i< SCANKEYWORDS_NUM_KEYWORDS - 3; i++)
+ {
+ int idx = word_offsets[i];
+ hash += pgstat_hash_hash_key_FH((const void *) &aligned_words[idx], sizeof(PgStat_HashKey), NULL);
+ }
+ CHECK_FOR_INTERRUPTS();
+ }
+
+ PG_RETURN_INT32(hash);
+}
diff --git a/contrib/bench_hash/bench_hash.control b/contrib/bench_hash/bench_hash.control
new file mode 100644
index 00000000..ffc63858
--- /dev/null
+++ b/contrib/bench_hash/bench_hash.control
@@ -0,0 +1,5 @@
+# bench_hash extension
+comment = 'benchmark some hash functions'
+default_version = '1.0'
+module_pathname = '$libdir/bench_hash'
+relocatable = true
diff --git a/contrib/bench_hash/meson.build b/contrib/bench_hash/meson.build
new file mode 100644
index 00000000..f8d88d8b
--- /dev/null
+++ b/contrib/bench_hash/meson.build
@@ -0,0 +1,19 @@
+# Copyright (c) 2022-2023, PostgreSQL Global Development Group
+
+bench_hash_sources = files(
+ 'bench_hash.c',
+)
+
+bench_hash = shared_module('bench_hash',
+ bench_hash_sources,
+# link_with: pgport_srv,
+ kwargs: contrib_mod_args,
+)
+contrib_targets += bench_hash
+
+install_data(
+ 'bench_hash.control',
+ 'bench_hash--1.0.sql',
+ kwargs: contrib_data_args,
+)
+
diff --git a/contrib/meson.build b/contrib/meson.build
index c0b267c6..0e991954 100644
--- a/contrib/meson.build
+++ b/contrib/meson.build
@@ -12,6 +12,7 @@ contrib_doc_args = {
'install_dir': contrib_doc_dir,
}
+subdir('bench_hash')
subdir('adminpack')
subdir('amcheck')
subdir('auth_delay')
diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index 37a69e90..43b406c3 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -41,7 +41,7 @@
#include "catalog/pg_ts_template.h"
#include "catalog/pg_type.h"
#include "commands/dbcommands.h"
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "funcapi.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
@@ -248,11 +248,31 @@ static bool MatchNamedCall(HeapTuple proctup, int nargs, List *argnames,
static inline uint32
spcachekey_hash(SearchPathCacheKey key)
{
- const unsigned char *bytes = (const unsigned char *) key.searchPath;
+ fasthash_state hs;
+ int sp_len;
+
+ /* WIP: maybe roleid should be mixed in normally */
+ uint64 seed = key.roleid;
+
+ // XXX not for commit
+#ifdef USE_ASSERT_CHECKING
+
int blen = strlen(key.searchPath);
- return hash_combine(hash_bytes(bytes, blen),
- hash_uint32(key.roleid));
+ uint64 h_orig = fasthash64(key.searchPath, blen, key.roleid);
+
+ // Compare orig to optimized string interface
+ fasthash_init(&hs, blen, key.roleid);
+ (void) fasthash_accum_cstring(&hs, key.searchPath);
+ Assert(fasthash_final64(&hs, 0) == h_orig);
+#endif
+
+ fasthash_init(&hs, FH_UNKNOWN_LENGTH, seed);
+
+ sp_len = fasthash_accum_cstring(&hs, key.searchPath);
+
+ /* pass the length to tweak the final mix */
+ return fasthash_final32(&hs, sp_len);
}
static inline bool
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
new file mode 100644
index 00000000..5bc1fc88
--- /dev/null
+++ b/src/include/common/hashfn_unstable.h
@@ -0,0 +1,309 @@
+/*
+Building blocks for creating fast inlineable hash functions. The
+unstable designation is in contrast to hashfn.h, which cannot break
+compatibility because hashes can be writen to disk and so must produce
+the same hashes between versions.
+
+ *
+ * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group
+ *
+ * src/include/common/hashfn_unstable.c
+ */
+
+#ifndef HASHFN_UNSTABLE_H
+#define HASHFN_UNSTABLE_H
+
+#include "port/pg_bitutils.h"
+#include "port/pg_bswap.h"
+
+/*
+ * fasthash is a modification of code taken from
+ * https://code.google.com/archive/p/fast-hash/source/default/source
+ * under the terms of the MIT licencse. The original copyright
+ * notice follows:
+ */
+
+/* The MIT License
+
+ Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
+
+ Permission is hereby granted, free of charge, to any person
+ obtaining a copy of this software and associated documentation
+ files (the "Software"), to deal in the Software without
+ restriction, including without limitation the rights to use, copy,
+ modify, merge, publish, distribute, sublicense, and/or sell copies
+ of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+*/
+
+/*
+ * There are two interfaces available. Examples assume a 32-bit hash:
+ *
+ * 1) When the length is known ahead of time, use fasthash32().
+ * 2) When the length is not known, use the incremental interface. To
+ * ensure good results, keep track of the length and pass it to the finalizer:
+
+fasthash_state hs;
+fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0);
+fasthash_accum(&hs, <pointer to a chunk of the input>, <chunk length, up to 8>);
+return fasthash_final32(&hs, <final length>);
+
+*/
+
+
+typedef struct fasthash_state
+{
+ uint64 accum;
+
+ uint64 hash;
+} fasthash_state;
+
+#define FH_SIZEOF_ACCUM 8
+StaticAssertDecl(sizeof(((fasthash_state*) 0)->accum) == FH_SIZEOF_ACCUM, "wrong size for size macro");
+
+#define FH_UNKNOWN_LENGTH 1
+
+/*
+ * Initialize the hash state.
+ *
+ * "len" is the length of the input, if known ahead of time.
+ * If that is not known, pass FH_UNKNOWN_LENGTH.
+ * "seed" can be zero.
+ */
+static inline void
+fasthash_init(fasthash_state *hs, int len, uint64 seed)
+{
+ memset(hs, 0, sizeof(fasthash_state));
+ hs->hash = seed ^ (len * 0x880355f21e6d1965);
+}
+
+/* Both the finalizer and part of the combining step */
+static inline uint64
+fasthash_mix(uint64 h, uint64 tweak)
+{
+ h ^= (h >> 23) + tweak;
+ h *= 0x2127599bf4325c37;
+ h ^= h >> 47;
+ return h;
+}
+
+static inline void
+fasthash_combine(fasthash_state *hs)
+{
+ hs->hash ^= fasthash_mix(hs->accum, 0);
+ hs->hash *= 0x880355f21e6d1965;
+
+ /* reset hash state for next input */
+ hs->accum = 0;
+}
+
+/* Accumulate up to 8 bytes of input and combine it into the hash */
+static inline void
+fasthash_accum(fasthash_state *hs, const char *k, int len)
+{
+ Assert(hs->accum == 0);
+ Assert(len <= FH_SIZEOF_ACCUM);
+
+ switch (len)
+ {
+ case 8:
+ memcpy(&hs->accum, k, 8);
+ break;
+ case 7:
+ hs->accum |= (uint64) k[6] << 48;
+ /* FALLTHROUGH */
+ case 6:
+ hs->accum |= (uint64) k[5] << 40;
+ /* FALLTHROUGH */
+ case 5:
+ hs->accum |= (uint64) k[4] << 32;
+ /* FALLTHROUGH */
+ case 4:
+ hs->accum |= (uint64) k[3] << 24;
+ /* FALLTHROUGH */
+ case 3:
+ hs->accum |= (uint64) k[2] << 16;
+ /* FALLTHROUGH */
+ case 2:
+ hs->accum |= (uint64) k[1] << 8;
+ /* FALLTHROUGH */
+ case 1:
+ hs->accum |= (uint64) k[0];
+ break;
+ case 0:
+ return;
+ }
+
+ fasthash_combine(hs);
+}
+
+/* From: https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord */
+#define haszero64(v) \
+ (((v) - 0x0101010101010101UL) & ~(v) & 0x8080808080808080UL)
+
+/*
+ * With an aligned pointer, we consume the string a word at a time. Loading
+ * the word containing the NUL terminator cannot segfault since page boundaries
+ * are MAXALIGN'd. For that last word, only use bytes up to the NUL for the hash.
+ * The algorithm was adopted from NetBSD's strlen.
+ */
+static inline int
+fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
+{
+ const char *const start = str;
+ const char *buf = start;
+ int remainder;
+ uint64 zero_bytes;
+
+ Assert(PointerIsAligned(start, uint64));
+ for (;;)
+ {
+ uint64 chunk = *(uint64 *)buf;
+
+#ifdef WORDS_BIGENDIAN
+ /* switch to little endian, to make later calculations easier */
+ chunk = pg_bswap64(chunk);
+#endif
+
+ /*
+ * With little-endian representation, we can use this calculation,
+ * which sets bits in the first byte in the result word
+ * that corresponds to a zero byte in the original word.
+ * The rest of the bytes are indeterminate, so cannot be used
+ * on big-endian machines without either swapping or a bytewise check.
+ */
+ zero_bytes = haszero64(chunk);
+ if (zero_bytes)
+ break;
+
+ hs->accum = chunk;
+ fasthash_combine(hs);
+ buf += FH_SIZEOF_ACCUM;
+ }
+
+ /*
+ * Bytes with set bits will be 0x80, so
+ * calculate the first occurrence of a zero byte within the input word
+ * by counting the number of trailing (for LE)
+ * zeros and dividing the result by 8.
+ */
+ remainder = pg_rightmost_one_pos64(zero_bytes) / BITS_PER_BYTE;
+ fasthash_accum(hs, buf, remainder);
+ buf += remainder;
+
+ return buf - start;
+}
+
+static inline int
+fasthash_accum_cstring_unaligned(fasthash_state *hs, const char *str)
+{
+ const char *const start = str;
+ const char *buf = str;
+
+ while (*buf)
+ {
+ int chunk_len = 0;
+
+ while (chunk_len < FH_SIZEOF_ACCUM && buf[chunk_len] != '\0')
+ chunk_len++;
+
+ fasthash_accum(hs, buf, chunk_len);
+ buf += chunk_len;
+ }
+
+ return buf - start;
+}
+
+/*
+ * Accumulate the input into the hash state
+ * and return the length of the string.
+ */
+static inline int
+fasthash_accum_cstring(fasthash_state *hs, const char *str)
+{
+#if SIZEOF_VOID_P >= FH_SIZEOF_ACCUM
+ if (PointerIsAligned(str, uint64))
+ return fasthash_accum_cstring_aligned(hs, str);
+ else
+#endif
+ return fasthash_accum_cstring_unaligned(hs, str);
+}
+
+/*
+ * The finalizer
+ *
+ * "tweak" is the input length when the caller doesn't know
+ * the length ahead of time, such as for NUL-terminated
+ * strings, otherwise zero.
+ */
+static inline uint64
+fasthash_final64(fasthash_state *hs, uint64 tweak)
+{
+ return fasthash_mix(hs->hash, tweak);
+}
+
+/*
+ * Reduce a 64-bit hash to a 32-bit hash.
+ *
+ * This provides a bit more additional mixing compared to
+ * just taking the lower 32-bits.
+ */
+static inline uint32
+fasthash_reduce32(uint64 h)
+{
+ /*
+ * The following trick converts the 64-bit hashcode to Fermat residue,
+ * which shall retain information from both the higher and lower parts of
+ * hashcode.
+ */
+ return h - (h >> 32);
+}
+
+static inline uint32
+fasthash_final32(fasthash_state *hs, uint64 tweak)
+{
+ return fasthash_reduce32(fasthash_final64(hs, tweak));
+}
+
+/*
+ * The original fasthash64 function, re-implemented using
+ * the incremental interface.
+ */
+static inline uint64
+fasthash64(const char *k, int len, uint64 seed)
+{
+ fasthash_state hs;
+
+ fasthash_init(&hs, len, seed);
+
+ while (len >= FH_SIZEOF_ACCUM)
+ {
+ fasthash_accum(&hs, k, FH_SIZEOF_ACCUM);
+ k += FH_SIZEOF_ACCUM;
+ len -= FH_SIZEOF_ACCUM;
+ }
+
+ fasthash_accum(&hs, k, len);
+ return fasthash_final64(&hs, 0);
+}
+
+/* Like fasthash64, but returns a 32-bit hash */
+static inline uint64
+fasthash32(const char *k, int len, uint64 seed)
+{
+ return fasthash_reduce32(fasthash64(k, len, seed));
+}
+
+#endif /* HASHFN_UNSTABLE_H */
diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h
index 60fbf939..ecc46bef 100644
--- a/src/include/utils/pgstat_internal.h
+++ b/src/include/utils/pgstat_internal.h
@@ -14,7 +14,7 @@
#define PGSTAT_INTERNAL_H
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "lib/dshash.h"
#include "lib/ilist.h"
#include "pgstat.h"
@@ -777,15 +777,10 @@ static inline uint32
pgstat_hash_hash_key(const void *d, size_t size, void *arg)
{
const PgStat_HashKey *key = (PgStat_HashKey *) d;
- uint32 hash;
Assert(size == sizeof(PgStat_HashKey) && arg == NULL);
- hash = murmurhash32(key->kind);
- hash = hash_combine(hash, murmurhash32(key->dboid));
- hash = hash_combine(hash, murmurhash32(key->objoid));
-
- return hash;
+ return fasthash32((const char *) key, size, 0);
}
/*
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index e37ef9aa..f513d7fb 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3326,6 +3326,7 @@ exec_thread_arg
execution_state
explain_get_index_name_hook_type
f_smgr
+fasthash_state
fd_set
fe_scram_state
fe_scram_state_enum
--
2.34.1
On Thu, Jan 4, 2024 at 10:01 AM jian he <jian.universality@gmail.com> wrote:
I still cannot git apply your patch cleanly. in
I don't know why you're using that -- the git apply man page even says
"Use git-am(1) to create commits from patches generated by
git-format-patch(1) and/or received by email."
Or, if that fails, use "patch".
http://cfbot.cputube.org/ i cannot find your patch.
( so, it might be that I test based on incomplete information).
but only hashfn_unstable.h influences bench_hash/bench_hash.c.so I attached the whole patch that I had git applied, that is the
changes i applied for the following tests.
Well, aside from the added text-editor detritus, it looks like this
has everything except v11-0008, without which I still get improvement
for the pgstat hash.
Model name: Intel(R) Core(TM) i5-14600K
The following is tested with another machine, also listed machine spec below.
I tested 3 times, the results is very similar as following:
select * from bench_cstring_hash_aligned(100000); 4705.686 ms
select * from bench_cstring_hash_unaligned(100000); 6835.753 ms
select * from bench_pgstat_hash(100000); 2678.978 ms
select * from bench_pgstat_hash_fh(100000); 6199.017 ms
select * from bench_string_hash(100000); 847.699 ms
I was fully prepared to believe something like 32-bit Arm would have
difficulty with 64-bit shifts/multiplies etc., but this makes no sense
at all. In this test, on my machine, HEAD's pgstat_hash is 3x faster
than HEAD's "strlen + hash_bytes", but for you it's 3x slower. To
improve reproducibility, I've added the .sql files and a bench script
to v13. I invite you to run bench_hash.sh and see if that changes
anything.
v13 also
- adds an assert that aligned and unaligned C string calculations give
the same result
- properly mixes roleid in the namespace hash, since it's now
convenient to do so (0005 is an alternate method)
- removes the broken makefile from the benchmark (not for commit anyway)
Attachments:
v13-0005-WIP-a-safer-way-to-accumulate-a-single-struct-me.patchtext/x-patch; charset=US-ASCII; name=v13-0005-WIP-a-safer-way-to-accumulate-a-single-struct-me.patchDownload
From cf64f9a0603837dd89efdf1aa455395906e75ded Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Fri, 5 Jan 2024 17:21:53 +0700
Subject: [PATCH v13 5/6] WIP: a safer way to accumulate a single struct member
into the hash state
---
src/backend/catalog/namespace.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index d1eae2a2d4..83fd57906c 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -258,7 +258,9 @@ spcachekey_hash(SearchPathCacheKey key)
fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0);
- fasthash_accum(&hs, (const char*) &key.roleid, sizeof(Oid));
+ hs.accum = key.roleid;
+ fasthash_combine(&hs);
+
sp_len = fasthash_accum_cstring(&hs, key.searchPath);
/* pass the length to tweak the final mix */
--
2.43.0
v13-0006-Add-benchmarks-for-hashing.patchtext/x-patch; charset=US-ASCII; name=v13-0006-Add-benchmarks-for-hashing.patchDownload
From 3ff66ebbe9f27639984c726dbd4005002b2615b9 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sun, 24 Dec 2023 09:46:44 +0700
Subject: [PATCH v13 6/6] Add benchmarks for hashing
---
bench_cstr_aligned.sql | 1 +
bench_cstr_unaligned.sql | 1 +
bench_pgstat_fh.sql | 2 +
bench_pgstat_orig.sql | 1 +
bench_string_hash.sql | 2 +
contrib/bench_hash/aligned_keywords.h | 991 +++++++++++++++++++++++++
contrib/bench_hash/bench_hash--1.0.sql | 30 +
contrib/bench_hash/bench_hash.c | 169 +++++
contrib/bench_hash/bench_hash.control | 5 +
contrib/bench_hash/meson.build | 19 +
contrib/meson.build | 1 +
runbench.sh | 16 +
12 files changed, 1238 insertions(+)
create mode 100644 bench_cstr_aligned.sql
create mode 100644 bench_cstr_unaligned.sql
create mode 100644 bench_pgstat_fh.sql
create mode 100644 bench_pgstat_orig.sql
create mode 100644 bench_string_hash.sql
create mode 100644 contrib/bench_hash/aligned_keywords.h
create mode 100644 contrib/bench_hash/bench_hash--1.0.sql
create mode 100644 contrib/bench_hash/bench_hash.c
create mode 100644 contrib/bench_hash/bench_hash.control
create mode 100644 contrib/bench_hash/meson.build
create mode 100755 runbench.sh
diff --git a/bench_cstr_aligned.sql b/bench_cstr_aligned.sql
new file mode 100644
index 0000000000..9ce6074fe2
--- /dev/null
+++ b/bench_cstr_aligned.sql
@@ -0,0 +1 @@
+select * from bench_cstring_hash_aligned(100000);
diff --git a/bench_cstr_unaligned.sql b/bench_cstr_unaligned.sql
new file mode 100644
index 0000000000..d654be3c07
--- /dev/null
+++ b/bench_cstr_unaligned.sql
@@ -0,0 +1 @@
+select * from bench_cstring_hash_unaligned(100000);
diff --git a/bench_pgstat_fh.sql b/bench_pgstat_fh.sql
new file mode 100644
index 0000000000..1130361c43
--- /dev/null
+++ b/bench_pgstat_fh.sql
@@ -0,0 +1,2 @@
+select * from bench_pgstat_hash_fh(100000);
+
diff --git a/bench_pgstat_orig.sql b/bench_pgstat_orig.sql
new file mode 100644
index 0000000000..bd6d084fc2
--- /dev/null
+++ b/bench_pgstat_orig.sql
@@ -0,0 +1 @@
+select * from bench_pgstat_hash(100000);
diff --git a/bench_string_hash.sql b/bench_string_hash.sql
new file mode 100644
index 0000000000..fad5a41811
--- /dev/null
+++ b/bench_string_hash.sql
@@ -0,0 +1,2 @@
+select * from bench_string_hash(100000);
+
diff --git a/contrib/bench_hash/aligned_keywords.h b/contrib/bench_hash/aligned_keywords.h
new file mode 100644
index 0000000000..c2bd67c856
--- /dev/null
+++ b/contrib/bench_hash/aligned_keywords.h
@@ -0,0 +1,991 @@
+/* created by copying from kwlist_d.h with this patch:
+
+--- a/src/tools/gen_keywordlist.pl
++++ b/src/tools/gen_keywordlist.pl
+@@ -97,7 +97,9 @@ while (<$kif>)
+ {
+ if (/^PG_KEYWORD\("(\w+)"/)
+ {
+- push @keywords, $1;
++ my $len = length($1) + 1;
++ my $aligned = $1 . "\\0" . "_" x ( ($len % 8) == 0 ? 0 : (8-($len % 8)) );
++ push @keywords, $aligned;
+ }
+ }
+
+@@ -127,7 +129,7 @@ for my $i (0 .. $#keywords - 1)
+ # Emit the string containing all the keywords.
+
+ printf $kwdef qq|static const char %s_kw_string[] =\n\t"|, $varname;
+-print $kwdef join qq|\\0"\n\t"|, @keywords;
++print $kwdef join qq|"\n\t"|, @keywords;
+ print $kwdef qq|";\n\n|;
+
+ # Emit an array of numerical offsets which will be used to index into the
+@@ -145,7 +147,7 @@ foreach my $name (@keywords)
+
+ # Calculate the cumulative offset of the next keyword,
+ # taking into account the null terminator.
+- $offset += $this_length + 1;
++ $offset += $this_length -1;
+
+ # Update max keyword length.
+ $max_len = $this_length if $max_len < $this_length;
+
+*/
+
+
+static const char aligned_words[] =
+ "abort\0__"
+ "absent\0_"
+ "absolute\0_______"
+ "access\0_"
+ "action\0_"
+ "add\0____"
+ "admin\0__"
+ "after\0__"
+ "aggregate\0______"
+ "all\0____"
+ "also\0___"
+ "alter\0__"
+ "always\0_"
+ "analyse\0"
+ "analyze\0"
+ "and\0____"
+ "any\0____"
+ "array\0__"
+ "as\0_____"
+ "asc\0____"
+ "asensitive\0_____"
+ "assertion\0______"
+ "assignment\0_____"
+ "asymmetric\0_____"
+ "at\0_____"
+ "atomic\0_"
+ "attach\0_"
+ "attribute\0______"
+ "authorization\0__"
+ "backward\0_______"
+ "before\0_"
+ "begin\0__"
+ "between\0"
+ "bigint\0_"
+ "binary\0_"
+ "bit\0____"
+ "boolean\0"
+ "both\0___"
+ "breadth\0"
+ "by\0_____"
+ "cache\0__"
+ "call\0___"
+ "called\0_"
+ "cascade\0"
+ "cascaded\0_______"
+ "case\0___"
+ "cast\0___"
+ "catalog\0"
+ "chain\0__"
+ "char\0___"
+ "character\0______"
+ "characteristics\0"
+ "check\0__"
+ "checkpoint\0_____"
+ "class\0__"
+ "close\0__"
+ "cluster\0"
+ "coalesce\0_______"
+ "collate\0"
+ "collation\0______"
+ "column\0_"
+ "columns\0"
+ "comment\0"
+ "comments\0_______"
+ "commit\0_"
+ "committed\0______"
+ "compression\0____"
+ "concurrently\0___"
+ "configuration\0__"
+ "conflict\0_______"
+ "connection\0_____"
+ "constraint\0_____"
+ "constraints\0____"
+ "content\0"
+ "continue\0_______"
+ "conversion\0_____"
+ "copy\0___"
+ "cost\0___"
+ "create\0_"
+ "cross\0__"
+ "csv\0____"
+ "cube\0___"
+ "current\0"
+ "current_catalog\0"
+ "current_date\0___"
+ "current_role\0___"
+ "current_schema\0_"
+ "current_time\0___"
+ "current_timestamp\0______"
+ "current_user\0___"
+ "cursor\0_"
+ "cycle\0__"
+ "data\0___"
+ "database\0_______"
+ "day\0____"
+ "deallocate\0_____"
+ "dec\0____"
+ "decimal\0"
+ "declare\0"
+ "default\0"
+ "defaults\0_______"
+ "deferrable\0_____"
+ "deferred\0_______"
+ "definer\0"
+ "delete\0_"
+ "delimiter\0______"
+ "delimiters\0_____"
+ "depends\0"
+ "depth\0__"
+ "desc\0___"
+ "detach\0_"
+ "dictionary\0_____"
+ "disable\0"
+ "discard\0"
+ "distinct\0_______"
+ "do\0_____"
+ "document\0_______"
+ "domain\0_"
+ "double\0_"
+ "drop\0___"
+ "each\0___"
+ "else\0___"
+ "enable\0_"
+ "encoding\0_______"
+ "encrypted\0______"
+ "end\0____"
+ "enum\0___"
+ "escape\0_"
+ "event\0__"
+ "except\0_"
+ "exclude\0"
+ "excluding\0______"
+ "exclusive\0______"
+ "execute\0"
+ "exists\0_"
+ "explain\0"
+ "expression\0_____"
+ "extension\0______"
+ "external\0_______"
+ "extract\0"
+ "false\0__"
+ "family\0_"
+ "fetch\0__"
+ "filter\0_"
+ "finalize\0_______"
+ "first\0__"
+ "float\0__"
+ "following\0______"
+ "for\0____"
+ "force\0__"
+ "foreign\0"
+ "format\0_"
+ "forward\0"
+ "freeze\0_"
+ "from\0___"
+ "full\0___"
+ "function\0_______"
+ "functions\0______"
+ "generated\0______"
+ "global\0_"
+ "grant\0__"
+ "granted\0"
+ "greatest\0_______"
+ "group\0__"
+ "grouping\0_______"
+ "groups\0_"
+ "handler\0"
+ "having\0_"
+ "header\0_"
+ "hold\0___"
+ "hour\0___"
+ "identity\0_______"
+ "if\0_____"
+ "ilike\0__"
+ "immediate\0______"
+ "immutable\0______"
+ "implicit\0_______"
+ "import\0_"
+ "in\0_____"
+ "include\0"
+ "including\0______"
+ "increment\0______"
+ "indent\0_"
+ "index\0__"
+ "indexes\0"
+ "inherit\0"
+ "inherits\0_______"
+ "initially\0______"
+ "inline\0_"
+ "inner\0__"
+ "inout\0__"
+ "input\0__"
+ "insensitive\0____"
+ "insert\0_"
+ "instead\0"
+ "int\0____"
+ "integer\0"
+ "intersect\0______"
+ "interval\0_______"
+ "into\0___"
+ "invoker\0"
+ "is\0_____"
+ "isnull\0_"
+ "isolation\0______"
+ "join\0___"
+ "json\0___"
+ "json_array\0_____"
+ "json_arrayagg\0__"
+ "json_object\0____"
+ "json_objectagg\0_"
+ "json_scalar\0____"
+ "json_serialize\0_"
+ "key\0____"
+ "keys\0___"
+ "label\0__"
+ "language\0_______"
+ "large\0__"
+ "last\0___"
+ "lateral\0"
+ "leading\0"
+ "leakproof\0______"
+ "least\0__"
+ "left\0___"
+ "level\0__"
+ "like\0___"
+ "limit\0__"
+ "listen\0_"
+ "load\0___"
+ "local\0__"
+ "localtime\0______"
+ "localtimestamp\0_"
+ "location\0_______"
+ "lock\0___"
+ "locked\0_"
+ "logged\0_"
+ "mapping\0"
+ "match\0__"
+ "matched\0"
+ "materialized\0___"
+ "maxvalue\0_______"
+ "merge\0__"
+ "method\0_"
+ "minute\0_"
+ "minvalue\0_______"
+ "mode\0___"
+ "month\0__"
+ "move\0___"
+ "name\0___"
+ "names\0__"
+ "national\0_______"
+ "natural\0"
+ "nchar\0__"
+ "new\0____"
+ "next\0___"
+ "nfc\0____"
+ "nfd\0____"
+ "nfkc\0___"
+ "nfkd\0___"
+ "no\0_____"
+ "none\0___"
+ "normalize\0______"
+ "normalized\0_____"
+ "not\0____"
+ "nothing\0"
+ "notify\0_"
+ "notnull\0"
+ "nowait\0_"
+ "null\0___"
+ "nullif\0_"
+ "nulls\0__"
+ "numeric\0"
+ "object\0_"
+ "of\0_____"
+ "off\0____"
+ "offset\0_"
+ "oids\0___"
+ "old\0____"
+ "on\0_____"
+ "only\0___"
+ "operator\0_______"
+ "option\0_"
+ "options\0"
+ "or\0_____"
+ "order\0__"
+ "ordinality\0_____"
+ "others\0_"
+ "out\0____"
+ "outer\0__"
+ "over\0___"
+ "overlaps\0_______"
+ "overlay\0"
+ "overriding\0_____"
+ "owned\0__"
+ "owner\0__"
+ "parallel\0_______"
+ "parameter\0______"
+ "parser\0_"
+ "partial\0"
+ "partition\0______"
+ "passing\0"
+ "password\0_______"
+ "placing\0"
+ "plans\0__"
+ "policy\0_"
+ "position\0_______"
+ "preceding\0______"
+ "precision\0______"
+ "prepare\0"
+ "prepared\0_______"
+ "preserve\0_______"
+ "primary\0"
+ "prior\0__"
+ "privileges\0_____"
+ "procedural\0_____"
+ "procedure\0______"
+ "procedures\0_____"
+ "program\0"
+ "publication\0____"
+ "quote\0__"
+ "range\0__"
+ "read\0___"
+ "real\0___"
+ "reassign\0_______"
+ "recheck\0"
+ "recursive\0______"
+ "ref\0____"
+ "references\0_____"
+ "referencing\0____"
+ "refresh\0"
+ "reindex\0"
+ "relative\0_______"
+ "release\0"
+ "rename\0_"
+ "repeatable\0_____"
+ "replace\0"
+ "replica\0"
+ "reset\0__"
+ "restart\0"
+ "restrict\0_______"
+ "return\0_"
+ "returning\0______"
+ "returns\0"
+ "revoke\0_"
+ "right\0__"
+ "role\0___"
+ "rollback\0_______"
+ "rollup\0_"
+ "routine\0"
+ "routines\0_______"
+ "row\0____"
+ "rows\0___"
+ "rule\0___"
+ "savepoint\0______"
+ "scalar\0_"
+ "schema\0_"
+ "schemas\0"
+ "scroll\0_"
+ "search\0_"
+ "second\0_"
+ "security\0_______"
+ "select\0_"
+ "sequence\0_______"
+ "sequences\0______"
+ "serializable\0___"
+ "server\0_"
+ "session\0"
+ "session_user\0___"
+ "set\0____"
+ "setof\0__"
+ "sets\0___"
+ "share\0__"
+ "show\0___"
+ "similar\0"
+ "simple\0_"
+ "skip\0___"
+ "smallint\0_______"
+ "snapshot\0_______"
+ "some\0___"
+ "sql\0____"
+ "stable\0_"
+ "standalone\0_____"
+ "start\0__"
+ "statement\0______"
+ "statistics\0_____"
+ "stdin\0__"
+ "stdout\0_"
+ "storage\0"
+ "stored\0_"
+ "strict\0_"
+ "strip\0__"
+ "subscription\0___"
+ "substring\0______"
+ "support\0"
+ "symmetric\0______"
+ "sysid\0__"
+ "system\0_"
+ "system_user\0____"
+ "table\0__"
+ "tables\0_"
+ "tablesample\0____"
+ "tablespace\0_____"
+ "temp\0___"
+ "template\0_______"
+ "temporary\0______"
+ "text\0___"
+ "then\0___"
+ "ties\0___"
+ "time\0___"
+ "timestamp\0______"
+ "to\0_____"
+ "trailing\0_______"
+ "transaction\0____"
+ "transform\0______"
+ "treat\0__"
+ "trigger\0"
+ "trim\0___"
+ "true\0___"
+ "truncate\0_______"
+ "trusted\0"
+ "type\0___"
+ "types\0__"
+ "uescape\0"
+ "unbounded\0______"
+ "uncommitted\0____"
+ "unencrypted\0____"
+ "union\0__"
+ "unique\0_"
+ "unknown\0"
+ "unlisten\0_______"
+ "unlogged\0_______"
+ "until\0__"
+ "update\0_"
+ "user\0___"
+ "using\0__"
+ "vacuum\0_"
+ "valid\0__"
+ "validate\0_______"
+ "validator\0______"
+ "value\0__"
+ "values\0_"
+ "varchar\0"
+ "variadic\0_______"
+ "varying\0"
+ "verbose\0"
+ "version\0"
+ "view\0___"
+ "views\0__"
+ "volatile\0_______"
+ "when\0___"
+ "where\0__"
+ "whitespace\0_____"
+ "window\0_"
+ "with\0___"
+ "within\0_"
+ "without\0"
+ "work\0___"
+ "wrapper\0"
+ "write\0__"
+ "xml\0____"
+ "xmlattributes\0__"
+ "xmlconcat\0______"
+ "xmlelement\0_____"
+ "xmlexists\0______"
+ "xmlforest\0______"
+ "xmlnamespaces\0__"
+ "xmlparse\0_______"
+ "xmlpi\0__"
+ "xmlroot\0"
+ "xmlserialize\0___"
+ "xmltable\0_______"
+ "year\0___"
+ "yes\0____"
+ "zone\0___";
+
+static const uint16 word_offsets[] = {
+ 0,
+ 8,
+ 16,
+ 32,
+ 40,
+ 48,
+ 56,
+ 64,
+ 72,
+ 88,
+ 96,
+ 104,
+ 112,
+ 120,
+ 128,
+ 136,
+ 144,
+ 152,
+ 160,
+ 168,
+ 176,
+ 192,
+ 208,
+ 224,
+ 240,
+ 248,
+ 256,
+ 264,
+ 280,
+ 296,
+ 312,
+ 320,
+ 328,
+ 336,
+ 344,
+ 352,
+ 360,
+ 368,
+ 376,
+ 384,
+ 392,
+ 400,
+ 408,
+ 416,
+ 424,
+ 440,
+ 448,
+ 456,
+ 464,
+ 472,
+ 480,
+ 496,
+ 512,
+ 520,
+ 536,
+ 544,
+ 552,
+ 560,
+ 576,
+ 584,
+ 600,
+ 608,
+ 616,
+ 624,
+ 640,
+ 648,
+ 664,
+ 680,
+ 696,
+ 712,
+ 728,
+ 744,
+ 760,
+ 776,
+ 784,
+ 800,
+ 816,
+ 824,
+ 832,
+ 840,
+ 848,
+ 856,
+ 864,
+ 872,
+ 888,
+ 904,
+ 920,
+ 936,
+ 952,
+ 976,
+ 992,
+ 1000,
+ 1008,
+ 1016,
+ 1032,
+ 1040,
+ 1056,
+ 1064,
+ 1072,
+ 1080,
+ 1088,
+ 1104,
+ 1120,
+ 1136,
+ 1144,
+ 1152,
+ 1168,
+ 1184,
+ 1192,
+ 1200,
+ 1208,
+ 1216,
+ 1232,
+ 1240,
+ 1248,
+ 1264,
+ 1272,
+ 1288,
+ 1296,
+ 1304,
+ 1312,
+ 1320,
+ 1328,
+ 1336,
+ 1352,
+ 1368,
+ 1376,
+ 1384,
+ 1392,
+ 1400,
+ 1408,
+ 1416,
+ 1432,
+ 1448,
+ 1456,
+ 1464,
+ 1472,
+ 1488,
+ 1504,
+ 1520,
+ 1528,
+ 1536,
+ 1544,
+ 1552,
+ 1560,
+ 1576,
+ 1584,
+ 1592,
+ 1608,
+ 1616,
+ 1624,
+ 1632,
+ 1640,
+ 1648,
+ 1656,
+ 1664,
+ 1672,
+ 1688,
+ 1704,
+ 1720,
+ 1728,
+ 1736,
+ 1744,
+ 1760,
+ 1768,
+ 1784,
+ 1792,
+ 1800,
+ 1808,
+ 1816,
+ 1824,
+ 1832,
+ 1848,
+ 1856,
+ 1864,
+ 1880,
+ 1896,
+ 1912,
+ 1920,
+ 1928,
+ 1936,
+ 1952,
+ 1968,
+ 1976,
+ 1984,
+ 1992,
+ 2000,
+ 2016,
+ 2032,
+ 2040,
+ 2048,
+ 2056,
+ 2064,
+ 2080,
+ 2088,
+ 2096,
+ 2104,
+ 2112,
+ 2128,
+ 2144,
+ 2152,
+ 2160,
+ 2168,
+ 2176,
+ 2192,
+ 2200,
+ 2208,
+ 2224,
+ 2240,
+ 2256,
+ 2272,
+ 2288,
+ 2304,
+ 2312,
+ 2320,
+ 2328,
+ 2344,
+ 2352,
+ 2360,
+ 2368,
+ 2376,
+ 2392,
+ 2400,
+ 2408,
+ 2416,
+ 2424,
+ 2432,
+ 2440,
+ 2448,
+ 2456,
+ 2472,
+ 2488,
+ 2504,
+ 2512,
+ 2520,
+ 2528,
+ 2536,
+ 2544,
+ 2552,
+ 2568,
+ 2584,
+ 2592,
+ 2600,
+ 2608,
+ 2624,
+ 2632,
+ 2640,
+ 2648,
+ 2656,
+ 2664,
+ 2680,
+ 2688,
+ 2696,
+ 2704,
+ 2712,
+ 2720,
+ 2728,
+ 2736,
+ 2744,
+ 2752,
+ 2760,
+ 2776,
+ 2792,
+ 2800,
+ 2808,
+ 2816,
+ 2824,
+ 2832,
+ 2840,
+ 2848,
+ 2856,
+ 2864,
+ 2872,
+ 2880,
+ 2888,
+ 2896,
+ 2904,
+ 2912,
+ 2920,
+ 2928,
+ 2944,
+ 2952,
+ 2960,
+ 2968,
+ 2976,
+ 2992,
+ 3000,
+ 3008,
+ 3016,
+ 3024,
+ 3040,
+ 3048,
+ 3064,
+ 3072,
+ 3080,
+ 3096,
+ 3112,
+ 3120,
+ 3128,
+ 3144,
+ 3152,
+ 3168,
+ 3176,
+ 3184,
+ 3192,
+ 3208,
+ 3224,
+ 3240,
+ 3248,
+ 3264,
+ 3280,
+ 3288,
+ 3296,
+ 3312,
+ 3328,
+ 3344,
+ 3360,
+ 3368,
+ 3384,
+ 3392,
+ 3400,
+ 3408,
+ 3416,
+ 3432,
+ 3440,
+ 3456,
+ 3464,
+ 3480,
+ 3496,
+ 3504,
+ 3512,
+ 3528,
+ 3536,
+ 3544,
+ 3560,
+ 3568,
+ 3576,
+ 3584,
+ 3592,
+ 3608,
+ 3616,
+ 3632,
+ 3640,
+ 3648,
+ 3656,
+ 3664,
+ 3680,
+ 3688,
+ 3696,
+ 3712,
+ 3720,
+ 3728,
+ 3736,
+ 3752,
+ 3760,
+ 3768,
+ 3776,
+ 3784,
+ 3792,
+ 3800,
+ 3816,
+ 3824,
+ 3840,
+ 3856,
+ 3872,
+ 3880,
+ 3888,
+ 3904,
+ 3912,
+ 3920,
+ 3928,
+ 3936,
+ 3944,
+ 3952,
+ 3960,
+ 3968,
+ 3984,
+ 4000,
+ 4008,
+ 4016,
+ 4024,
+ 4040,
+ 4048,
+ 4064,
+ 4080,
+ 4088,
+ 4096,
+ 4104,
+ 4112,
+ 4120,
+ 4128,
+ 4144,
+ 4160,
+ 4168,
+ 4184,
+ 4192,
+ 4200,
+ 4216,
+ 4224,
+ 4232,
+ 4248,
+ 4264,
+ 4272,
+ 4288,
+ 4304,
+ 4312,
+ 4320,
+ 4328,
+ 4336,
+ 4352,
+ 4360,
+ 4376,
+ 4392,
+ 4408,
+ 4416,
+ 4424,
+ 4432,
+ 4440,
+ 4456,
+ 4464,
+ 4472,
+ 4480,
+ 4488,
+ 4504,
+ 4520,
+ 4536,
+ 4544,
+ 4552,
+ 4560,
+ 4576,
+ 4592,
+ 4600,
+ 4608,
+ 4616,
+ 4624,
+ 4632,
+ 4640,
+ 4656,
+ 4672,
+ 4680,
+ 4688,
+ 4696,
+ 4712,
+ 4720,
+ 4728,
+ 4736,
+ 4744,
+ 4752,
+ 4768,
+ 4776,
+ 4784,
+ 4800,
+ 4808,
+ 4816,
+ 4824,
+ 4832,
+ 4840,
+ 4848,
+ 4856,
+ 4864,
+ 4880,
+ 4896,
+ 4912,
+ 4928,
+ 4944,
+ 4960,
+ 4976,
+ 4984,
+ 4992,
+ 5008,
+ 5024,
+ 5032,
+ 5040,
+};
+
+
+#define SCANKEYWORDS_NUM_KEYWORDS 473
+
diff --git a/contrib/bench_hash/bench_hash--1.0.sql b/contrib/bench_hash/bench_hash--1.0.sql
new file mode 100644
index 0000000000..43ce946bf6
--- /dev/null
+++ b/contrib/bench_hash/bench_hash--1.0.sql
@@ -0,0 +1,30 @@
+/* src/test/modules/bench_hash/bench_hash--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION bench_hash" to load this file. \quit
+
+CREATE FUNCTION bench_string_hash(int4)
+RETURNS int
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
+
+
+CREATE FUNCTION bench_cstring_hash_unaligned(int4)
+RETURNS int
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION bench_cstring_hash_aligned(int4)
+RETURNS int
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION bench_pgstat_hash(int4)
+RETURNS int
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION bench_pgstat_hash_FH(int4)
+RETURNS int
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
diff --git a/contrib/bench_hash/bench_hash.c b/contrib/bench_hash/bench_hash.c
new file mode 100644
index 0000000000..017cf333ce
--- /dev/null
+++ b/contrib/bench_hash/bench_hash.c
@@ -0,0 +1,169 @@
+/*-------------------------------------------------------------------------
+ *
+ * bench_hash.c
+ *
+ * Copyright (c) 2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/test/modules/bench_hash/bench_hash.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "fmgr.h"
+
+PG_MODULE_MAGIC;
+
+#include "aligned_keywords.h"
+
+#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
+#include "miscadmin.h"
+#include "utils/memutils.h"
+#include "utils/pgstat_internal.h"
+
+
+PG_FUNCTION_INFO_V1(bench_string_hash);
+Datum
+bench_string_hash(PG_FUNCTION_ARGS)
+{
+ int32 count = PG_GETARG_INT32(0);
+ uint32 hash = 0;
+
+ while (count-- > 0)
+ {
+ for (int i=0; i< SCANKEYWORDS_NUM_KEYWORDS; i++)
+ {
+ int idx = word_offsets[i];
+ int s_len = strlen(&aligned_words[idx]);
+ hash += hash_bytes((const unsigned char *) &aligned_words[idx], s_len);
+ }
+ CHECK_FOR_INTERRUPTS();
+ }
+
+ PG_RETURN_INT32(hash);
+}
+
+
+PG_FUNCTION_INFO_V1(bench_cstring_hash_unaligned);
+Datum
+bench_cstring_hash_unaligned(PG_FUNCTION_ARGS)
+{
+ int32 count = PG_GETARG_INT32(0);
+ uint32 hash = 0;
+
+ char* p = (char*) palloc(5048);
+ memcpy(p, aligned_words, 5048);
+
+ while (count-- > 0)
+ {
+ for (int i=0; i< SCANKEYWORDS_NUM_KEYWORDS; i++)
+ {
+ int idx = word_offsets[i];
+ int s_len;
+ fasthash_state hs;
+
+ fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0);
+ s_len = fasthash_accum_cstring_unaligned(&hs, &p[idx]);
+ hash += fasthash_final32(&hs, s_len);
+ }
+ CHECK_FOR_INTERRUPTS();
+ }
+
+ PG_RETURN_INT32(hash);
+}
+
+
+PG_FUNCTION_INFO_V1(bench_cstring_hash_aligned);
+Datum
+bench_cstring_hash_aligned(PG_FUNCTION_ARGS)
+{
+ int32 count = PG_GETARG_INT32(0);
+ uint32 hash = 0;
+
+ char* p = (char*) palloc(5048);
+ memcpy(p, aligned_words, 5048);
+
+ while (count-- > 0)
+ {
+ for (int i=0; i< SCANKEYWORDS_NUM_KEYWORDS; i++)
+ {
+ int idx = word_offsets[i];
+ int s_len;
+ fasthash_state hs;
+
+ fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0);
+ s_len = fasthash_accum_cstring_aligned(&hs, &p[idx]);
+ hash += fasthash_final32(&hs, s_len);
+ }
+ CHECK_FOR_INTERRUPTS();
+ }
+
+ PG_RETURN_INT32(hash);
+}
+
+static inline uint32
+pgstat_hash_hash_key_orig(const void *d, size_t size, void *arg)
+{
+ const PgStat_HashKey *key = (PgStat_HashKey *) d;
+ uint32 hash;
+
+ Assert(size == sizeof(PgStat_HashKey) && arg == NULL);
+
+ hash = murmurhash32(key->kind);
+ hash = hash_combine(hash, murmurhash32(key->dboid));
+ hash = hash_combine(hash, murmurhash32(key->objoid));
+
+ return hash;
+}
+
+static inline uint32
+pgstat_hash_hash_key_FH(const void *d, size_t size, void *arg)
+{
+ const PgStat_HashKey *key = (PgStat_HashKey *) d;
+
+ Assert(size == sizeof(PgStat_HashKey) && arg == NULL);
+
+ return fasthash32((const char *) key, size, 0);
+}
+
+PG_FUNCTION_INFO_V1(bench_pgstat_hash);
+Datum
+bench_pgstat_hash(PG_FUNCTION_ARGS)
+{
+ int32 count = PG_GETARG_INT32(0);
+ uint32 hash = 0;
+
+ while (count-- > 0)
+ {
+ for (int i=0; i< SCANKEYWORDS_NUM_KEYWORDS - 3; i++)
+ {
+ int idx = word_offsets[i];
+ hash += pgstat_hash_hash_key_orig((const void *) &aligned_words[idx], sizeof(PgStat_HashKey), NULL);
+ }
+ CHECK_FOR_INTERRUPTS();
+ }
+
+ PG_RETURN_INT32(hash);
+}
+
+PG_FUNCTION_INFO_V1(bench_pgstat_hash_fh);
+Datum
+bench_pgstat_hash_fh(PG_FUNCTION_ARGS)
+{
+ int32 count = PG_GETARG_INT32(0);
+ uint32 hash = 0;
+
+ while (count-- > 0)
+ {
+ for (int i=0; i< SCANKEYWORDS_NUM_KEYWORDS - 3; i++)
+ {
+ int idx = word_offsets[i];
+ hash += pgstat_hash_hash_key_FH((const void *) &aligned_words[idx], sizeof(PgStat_HashKey), NULL);
+ }
+ CHECK_FOR_INTERRUPTS();
+ }
+
+ PG_RETURN_INT32(hash);
+}
diff --git a/contrib/bench_hash/bench_hash.control b/contrib/bench_hash/bench_hash.control
new file mode 100644
index 0000000000..ffc63858d2
--- /dev/null
+++ b/contrib/bench_hash/bench_hash.control
@@ -0,0 +1,5 @@
+# bench_hash extension
+comment = 'benchmark some hash functions'
+default_version = '1.0'
+module_pathname = '$libdir/bench_hash'
+relocatable = true
diff --git a/contrib/bench_hash/meson.build b/contrib/bench_hash/meson.build
new file mode 100644
index 0000000000..f8d88d8b5c
--- /dev/null
+++ b/contrib/bench_hash/meson.build
@@ -0,0 +1,19 @@
+# Copyright (c) 2022-2023, PostgreSQL Global Development Group
+
+bench_hash_sources = files(
+ 'bench_hash.c',
+)
+
+bench_hash = shared_module('bench_hash',
+ bench_hash_sources,
+# link_with: pgport_srv,
+ kwargs: contrib_mod_args,
+)
+contrib_targets += bench_hash
+
+install_data(
+ 'bench_hash.control',
+ 'bench_hash--1.0.sql',
+ kwargs: contrib_data_args,
+)
+
diff --git a/contrib/meson.build b/contrib/meson.build
index c12dc906ca..1c16f0d0a8 100644
--- a/contrib/meson.build
+++ b/contrib/meson.build
@@ -12,6 +12,7 @@ contrib_doc_args = {
'install_dir': contrib_doc_dir,
}
+subdir('bench_hash')
subdir('adminpack')
subdir('amcheck')
subdir('auth_delay')
diff --git a/runbench.sh b/runbench.sh
new file mode 100755
index 0000000000..dfad1afa7f
--- /dev/null
+++ b/runbench.sh
@@ -0,0 +1,16 @@
+# create extension bench_hash;
+
+cat bench_string_hash.sql
+pgbench -n -T 20 -f bench_string_hash.sql -M prepared | grep latency
+
+cat bench_cstr_unaligned.sql
+pgbench -n -T 20 -f bench_cstr_unaligned.sql -M prepared | grep latency
+
+cat bench_cstr_aligned.sql
+pgbench -n -T 20 -f bench_cstr_aligned.sql -M prepared | grep latency
+
+cat bench_pgstat_orig.sql
+pgbench -n -T 20 -f bench_pgstat_orig.sql -M prepared | grep latency
+
+cat bench_pgstat_fh.sql
+pgbench -n -T 20 -f bench_pgstat_fh.sql -M prepared | grep latency
--
2.43.0
v13-0004-Use-fasthash-for-the-search-path-cache.patchtext/x-patch; charset=US-ASCII; name=v13-0004-Use-fasthash-for-the-search-path-cache.patchDownload
From b9cbedf2ecda8d2929f3ec60f19532529259c7bb Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Mon, 18 Dec 2023 11:10:28 +0700
Subject: [PATCH v13 4/6] Use fasthash for the search path cache
This serves to demonstrate the incremental API, allowing inlined
hash calculation without a strlen call. This brings the general case
performance closer to the optimization done in commit a86c61c9ee.
Jeff Davis, with some adjustments by me
Discussion: https://postgr.es/m/b40292c99e623defe5eadedab1d438cf51a4107c.camel%40j-davis.com
---
src/backend/catalog/namespace.c | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index eecc50a958..d1eae2a2d4 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -41,7 +41,7 @@
#include "catalog/pg_ts_template.h"
#include "catalog/pg_type.h"
#include "commands/dbcommands.h"
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "funcapi.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
@@ -253,11 +253,16 @@ static bool MatchNamedCall(HeapTuple proctup, int nargs, List *argnames,
static inline uint32
spcachekey_hash(SearchPathCacheKey key)
{
- const unsigned char *bytes = (const unsigned char *) key.searchPath;
- int blen = strlen(key.searchPath);
+ fasthash_state hs;
+ int sp_len;
- return hash_combine(hash_bytes(bytes, blen),
- hash_uint32(key.roleid));
+ fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0);
+
+ fasthash_accum(&hs, (const char*) &key.roleid, sizeof(Oid));
+ sp_len = fasthash_accum_cstring(&hs, key.searchPath);
+
+ /* pass the length to tweak the final mix */
+ return fasthash_final32(&hs, sp_len);
}
static inline bool
--
2.43.0
v13-0002-Use-fasthash-for-pgstat_hash_hash_key.patchtext/x-patch; charset=US-ASCII; name=v13-0002-Use-fasthash-for-pgstat_hash_hash_key.patchDownload
From ada0dcec91474e2c89afd79e6c9c35eeae88d875 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sat, 9 Dec 2023 16:24:56 +0700
Subject: [PATCH v13 2/6] Use fasthash for pgstat_hash_hash_key
Previously this called the 32-bit Murmur finalizer on the three elements,
then joined with hash_combine(). Fasthash is simpler, executes faster
and takes up less binary space. While the collision and bias behavior
were almost certainly fine with the previous coding, now we have
measurements to prove it.
Discussion:
---
src/include/utils/pgstat_internal.h | 9 ++-------
1 file changed, 2 insertions(+), 7 deletions(-)
diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h
index 9862589f36..bbbb35bcd8 100644
--- a/src/include/utils/pgstat_internal.h
+++ b/src/include/utils/pgstat_internal.h
@@ -14,7 +14,7 @@
#define PGSTAT_INTERNAL_H
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "lib/dshash.h"
#include "lib/ilist.h"
#include "pgstat.h"
@@ -777,15 +777,10 @@ static inline uint32
pgstat_hash_hash_key(const void *d, size_t size, void *arg)
{
const PgStat_HashKey *key = (PgStat_HashKey *) d;
- uint32 hash;
Assert(size == sizeof(PgStat_HashKey) && arg == NULL);
- hash = murmurhash32(key->kind);
- hash = hash_combine(hash, murmurhash32(key->dboid));
- hash = hash_combine(hash, murmurhash32(key->objoid));
-
- return hash;
+ return fasthash32((const char *) key, size, 0);
}
/*
--
2.43.0
v13-0003-Add-optimized-string-hashing-to-hashfn_unstable..patchtext/x-patch; charset=US-ASCII; name=v13-0003-Add-optimized-string-hashing-to-hashfn_unstable..patchDownload
From 8117b3123216d8a3b2f2bf79804745b29733ddd1 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Mon, 18 Dec 2023 11:10:28 +0700
Subject: [PATCH v13 3/6] Add optimized string hashing to hashfn_unstable.h
Given an already-initialized hash state and a C-string,
accumulate the hash of the string into the hash state
and return the length for the caller to (optionally)
save for the finalizer. This avoids a strlen call.
If the string pointer is aligned, we can use a word-
at-a-time algorithm both for NUL lookahead and for computing
the remainder length up to the NUL. The latter was inspired
by NetBSD's strlen(). The aligned case is only used on 64-bit
platforms, since it's not worth the extra complexity for 32-bit.
Jeff Davis and John Naylor
Discussion: https://postgr.es/m/3820f030fd008ff14134b3e9ce5cc6dd623ed479.camel%40j-davis.com
---
src/include/common/hashfn_unstable.h | 114 ++++++++++++++++++++++++++-
1 file changed, 111 insertions(+), 3 deletions(-)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index 5e882532d2..8d8952beb3 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -16,6 +16,9 @@ and may differ by hardware platform.
#ifndef HASHFN_UNSTABLE_H
#define HASHFN_UNSTABLE_H
+#include "port/pg_bitutils.h"
+#include "port/pg_bswap.h"
+
/*
* fasthash is a modification of code taken from
* https://code.google.com/archive/p/fast-hash/source/default/source
@@ -57,8 +60,8 @@ and may differ by hardware platform.
fasthash_state hs;
fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0);
-fasthash_accum(&hs, <pointer to a chunk of the input>, <chunk length, up to 8>);
-return fasthash_final32(&hs, <final length>);
+len = fasthash_accum_cstring(&hs, *str);
+return fasthash_final32(&hs, len);
*/
@@ -66,11 +69,12 @@ return fasthash_final32(&hs, <final length>);
typedef struct fasthash_state
{
uint64 accum;
-#define FH_SIZEOF_ACCUM sizeof(uint64)
uint64 hash;
} fasthash_state;
+#define FH_SIZEOF_ACCUM 8
+StaticAssertDecl(sizeof(((fasthash_state*) 0)->accum) == FH_SIZEOF_ACCUM, "wrong size for size macro");
#define FH_UNKNOWN_LENGTH 1
@@ -151,6 +155,110 @@ fasthash_accum(fasthash_state *hs, const char *k, int len)
fasthash_combine(hs);
}
+/* From: https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord */
+#define haszero64(v) \
+ (((v) - 0x0101010101010101UL) & ~(v) & 0x8080808080808080UL)
+
+/*
+ * With an aligned pointer, we consume the string a word at a time. Loading
+ * the word containing the NUL terminator cannot segfault since page boundaries
+ * are MAXALIGN'd. For that last word, only use bytes up to the NUL for the hash.
+ * The algorithm was adopted from NetBSD's strlen.
+ */
+static inline int
+fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
+{
+ const char *const start = str;
+ const char *buf = start;
+ int remainder;
+ uint64 zero_bytes;
+
+ Assert(PointerIsAligned(start, uint64));
+ for (;;)
+ {
+ uint64 chunk = *(uint64 *)buf;
+
+ /*
+ * With little-endian representation, we can use this calculation,
+ * which sets bits in the first byte in the result word
+ * that corresponds to a zero byte in the original word.
+ * The rest of the bytes are indeterminate, so cannot be used
+ * on big-endian machines without either swapping or a bytewise check.
+ */
+#ifdef WORDS_BIGENDIAN
+ zero_bytes = haszero64(pg_bswap(chunk));
+#else
+ zero_bytes = haszero64(chunk);
+#endif
+ if (zero_bytes)
+ break;
+
+ hs->accum = chunk;
+ fasthash_combine(hs);
+ buf += FH_SIZEOF_ACCUM;
+ }
+
+ /*
+ * Bytes with set bits will be 0x80, so
+ * calculate the first occurrence of a zero byte within the input word
+ * by counting the number of trailing (for LE)
+ * zeros and dividing the result by 8.
+ */
+ remainder = pg_rightmost_one_pos64(zero_bytes) / BITS_PER_BYTE;
+ fasthash_accum(hs, buf, remainder);
+ buf += remainder;
+
+ return buf - start;
+}
+
+static inline int
+fasthash_accum_cstring_unaligned(fasthash_state *hs, const char *str)
+{
+ const char *const start = str;
+ const char *buf = str;
+
+ while (*buf)
+ {
+ int chunk_len = 0;
+
+ while (chunk_len < FH_SIZEOF_ACCUM && buf[chunk_len] != '\0')
+ chunk_len++;
+
+ fasthash_accum(hs, buf, chunk_len);
+ buf += chunk_len;
+ }
+
+ return buf - start;
+}
+
+/*
+ * Accumulate the input into the hash state
+ * and return the length of the string.
+ */
+static inline int
+fasthash_accum_cstring(fasthash_state *hs, const char *str)
+{
+#if SIZEOF_VOID_P >= 8
+
+ int len;
+#ifdef USE_ASSERT_CHECKING
+ int len_check;
+ fasthash_state hs_check;
+
+ memcpy(&hs_check, hs, sizeof(fasthash_state));
+ len_check = fasthash_accum_cstring_unaligned(&hs_check, str);
+#endif
+ if (PointerIsAligned(str, uint64))
+ {
+ len = fasthash_accum_cstring_aligned(hs, str);
+ Assert(hs_check.hash == hs->hash && len_check == len);
+ return len;
+ }
+#endif /* SIZEOF_VOID_P */
+
+ return fasthash_accum_cstring_unaligned(hs, str);
+}
+
/*
* The finalizer
*
--
2.43.0
v13-0001-Add-inlineable-incremental-hash-functions-for-in.patchtext/x-patch; charset=US-ASCII; name=v13-0001-Add-inlineable-incremental-hash-functions-for-in.patchDownload
From a44af6d75684ff4d5d039041b2827be4716cd159 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Mon, 27 Nov 2023 17:03:38 +0700
Subject: [PATCH v13 1/6] Add inlineable, incremental hash functions for
in-memory use
A number of places hash NUL-termminated strings. Currently, we need
to call strlen first because hash_bytes needs the length. For short
strings the C library call has a large overhead, and strlen calls
show up prominently in profiles.
Per suggestion from Andres Freund, add hash functions with an
incremental interface. Instead of trying to whack around hash_bytes
while maintaining its current behavior on all platforms, we base
this work on fasthash (MIT licensed) which is simple, faster than
hash_bytes for inputs over 12 bytes long, and also passes the hash
function testing suite SMHasher.
The original functions have been reimplemented using our new
incremental interface to validate that this method will still give the
same answer, provided we have the input length ahead of time. Future
work will use these for some existing uses of simplehash and dynahash.
The new functionality lives in a new header hashfn_unstable.h. The
name implies we have the freedom to change things across versions that
would be unacceptable for our other hash functions that are used for
e.g. hash indexes and hash partitioning. As such, these should only
be used for in-memory data structures like hash tables. There is also
no guarantee of being endian-independent.
Reviewed by Jeff Davis and (in an earlier version) Heikki Linnakangas
Discussion: https://postgr.es/m/20231122223432.lywt4yz2bn7tlp27%40awork3.anarazel.de
---
src/include/common/hashfn_unstable.h | 219 +++++++++++++++++++++++++++
src/tools/pgindent/typedefs.list | 1 +
2 files changed, 220 insertions(+)
create mode 100644 src/include/common/hashfn_unstable.h
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
new file mode 100644
index 0000000000..5e882532d2
--- /dev/null
+++ b/src/include/common/hashfn_unstable.h
@@ -0,0 +1,219 @@
+/*
+Building blocks for creating fast inlineable hash functions. The
+unstable designation is in contrast to hashfn.h, which cannot break
+compatibility because hashes can be writen to disk and so must produce
+the same hashes between versions.
+
+The functions in this file are not guaranteed to be stable between versions,
+and may differ by hardware platform.
+
+ *
+ * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group
+ *
+ * src/include/common/hashfn_unstable.c
+ */
+
+#ifndef HASHFN_UNSTABLE_H
+#define HASHFN_UNSTABLE_H
+
+/*
+ * fasthash is a modification of code taken from
+ * https://code.google.com/archive/p/fast-hash/source/default/source
+ * under the terms of the MIT licencse. The original copyright
+ * notice follows:
+ */
+
+/* The MIT License
+
+ Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
+
+ Permission is hereby granted, free of charge, to any person
+ obtaining a copy of this software and associated documentation
+ files (the "Software"), to deal in the Software without
+ restriction, including without limitation the rights to use, copy,
+ modify, merge, publish, distribute, sublicense, and/or sell copies
+ of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+*/
+
+/*
+ * There are two interfaces available. Examples assume a 32-bit hash:
+ *
+ * 1) When the length is known ahead of time, use fasthash32().
+ * 2) When the length is not known, use the incremental interface. To
+ * ensure good results, keep track of the length and pass it to the finalizer:
+
+fasthash_state hs;
+fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0);
+fasthash_accum(&hs, <pointer to a chunk of the input>, <chunk length, up to 8>);
+return fasthash_final32(&hs, <final length>);
+
+*/
+
+
+typedef struct fasthash_state
+{
+ uint64 accum;
+#define FH_SIZEOF_ACCUM sizeof(uint64)
+
+ uint64 hash;
+} fasthash_state;
+
+
+#define FH_UNKNOWN_LENGTH 1
+
+/*
+ * Initialize the hash state.
+ *
+ * "len" is the length of the input, if known ahead of time.
+ * If that is not known, pass FH_UNKNOWN_LENGTH.
+ * "seed" can be zero.
+ */
+static inline void
+fasthash_init(fasthash_state *hs, int len, uint64 seed)
+{
+ memset(hs, 0, sizeof(fasthash_state));
+ hs->hash = seed ^ (len * 0x880355f21e6d1965);
+}
+
+/* Both the finalizer and part of the combining step */
+static inline uint64
+fasthash_mix(uint64 h, uint64 tweak)
+{
+ h ^= (h >> 23) + tweak;
+ h *= 0x2127599bf4325c37;
+ h ^= h >> 47;
+ return h;
+}
+
+static inline void
+fasthash_combine(fasthash_state *hs)
+{
+ hs->hash ^= fasthash_mix(hs->accum, 0);
+ hs->hash *= 0x880355f21e6d1965;
+
+ /* reset hash state for next input */
+ hs->accum = 0;
+}
+
+/* Accumulate up to 8 bytes of input and combine it into the hash */
+static inline void
+fasthash_accum(fasthash_state *hs, const char *k, int len)
+{
+ uint32 lower_four;
+
+ Assert(hs->accum == 0);
+ Assert(len <= FH_SIZEOF_ACCUM);
+
+ switch (len)
+ {
+ case 8:
+ memcpy(&hs->accum, k, 8);
+ break;
+ case 7:
+ hs->accum |= (uint64) k[6] << 48;
+ /* FALLTHROUGH */
+ case 6:
+ hs->accum |= (uint64) k[5] << 40;
+ /* FALLTHROUGH */
+ case 5:
+ hs->accum |= (uint64) k[4] << 32;
+ /* FALLTHROUGH */
+ case 4:
+ memcpy(&lower_four, k, sizeof(lower_four));
+ hs->accum |= lower_four;
+ break;
+ case 3:
+ hs->accum |= (uint64) k[2] << 16;
+ /* FALLTHROUGH */
+ case 2:
+ hs->accum |= (uint64) k[1] << 8;
+ /* FALLTHROUGH */
+ case 1:
+ hs->accum |= (uint64) k[0];
+ break;
+ case 0:
+ return;
+ }
+
+ fasthash_combine(hs);
+}
+
+/*
+ * The finalizer
+ *
+ * "tweak" is the input length when the caller doesn't know
+ * the length ahead of time, such as for NUL-terminated
+ * strings, otherwise zero.
+ */
+static inline uint64
+fasthash_final64(fasthash_state *hs, uint64 tweak)
+{
+ return fasthash_mix(hs->hash, tweak);
+}
+
+/*
+ * Reduce a 64-bit hash to a 32-bit hash.
+ *
+ * This provides a bit more additional mixing compared to
+ * just taking the lower 32-bits.
+ */
+static inline uint32
+fasthash_reduce32(uint64 h)
+{
+ /*
+ * The following trick converts the 64-bit hashcode to Fermat residue,
+ * which shall retain information from both the higher and lower parts of
+ * hashcode.
+ */
+ return h - (h >> 32);
+}
+
+static inline uint32
+fasthash_final32(fasthash_state *hs, uint64 tweak)
+{
+ return fasthash_reduce32(fasthash_final64(hs, tweak));
+}
+
+/*
+ * The original fasthash64 function, re-implemented using
+ * the incremental interface.
+ */
+static inline uint64
+fasthash64(const char *k, int len, uint64 seed)
+{
+ fasthash_state hs;
+
+ fasthash_init(&hs, len, seed);
+
+ while (len >= FH_SIZEOF_ACCUM)
+ {
+ fasthash_accum(&hs, k, FH_SIZEOF_ACCUM);
+ k += FH_SIZEOF_ACCUM;
+ len -= FH_SIZEOF_ACCUM;
+ }
+
+ fasthash_accum(&hs, k, len);
+ return fasthash_final64(&hs, 0);
+}
+
+/* Like fasthash64, but returns a 32-bit hash */
+static inline uint64
+fasthash32(const char *k, int len, uint64 seed)
+{
+ return fasthash_reduce32(fasthash64(k, len, seed));
+}
+
+#endif /* HASHFN_UNSTABLE_H */
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 5fd46b7bd1..eb2e6b6309 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3329,6 +3329,7 @@ exec_thread_arg
execution_state
explain_get_index_name_hook_type
f_smgr
+fasthash_state
fd_set
fe_scram_state
fe_scram_state_enum
--
2.43.0
On Fri, Jan 5, 2024 at 6:54 PM John Naylor <johncnaylorls@gmail.com> wrote:
On Thu, Jan 4, 2024 at 10:01 AM jian he <jian.universality@gmail.com> wrote:
I still cannot git apply your patch cleanly. in
I don't know why you're using that -- the git apply man page even says
"Use git-am(1) to create commits from patches generated by
git-format-patch(1) and/or received by email."Or, if that fails, use "patch".
http://cfbot.cputube.org/ i cannot find your patch.
( so, it might be that I test based on incomplete information).
but only hashfn_unstable.h influences bench_hash/bench_hash.c.so I attached the whole patch that I had git applied, that is the
changes i applied for the following tests.Well, aside from the added text-editor detritus, it looks like this
has everything except v11-0008, without which I still get improvement
for the pgstat hash.Model name: Intel(R) Core(TM) i5-14600K
The following is tested with another machine, also listed machine spec below.
I tested 3 times, the results is very similar as following:
select * from bench_cstring_hash_aligned(100000); 4705.686 ms
select * from bench_cstring_hash_unaligned(100000); 6835.753 ms
select * from bench_pgstat_hash(100000); 2678.978 ms
select * from bench_pgstat_hash_fh(100000); 6199.017 ms
select * from bench_string_hash(100000); 847.699 msI was fully prepared to believe something like 32-bit Arm would have
difficulty with 64-bit shifts/multiplies etc., but this makes no sense
at all. In this test, on my machine, HEAD's pgstat_hash is 3x faster
than HEAD's "strlen + hash_bytes", but for you it's 3x slower. To
improve reproducibility, I've added the .sql files and a bench script
to v13. I invite you to run bench_hash.sh and see if that changes
anything.
git apply has a verbose option.
also personally I based on vscode editor, the color to view the changes.
jian@jian:~/Desktop/pg_src/src4/postgres$ git apply
$PATCHES/v13-0006-Add-benchmarks-for-hashing.patch
/home/jian/Downloads/patches/v13-0006-Add-benchmarks-for-hashing.patch:81:
indent with spaces.
if (/^PG_KEYWORD\("(\w+)"/)
/home/jian/Downloads/patches/v13-0006-Add-benchmarks-for-hashing.patch:82:
indent with spaces.
{
/home/jian/Downloads/patches/v13-0006-Add-benchmarks-for-hashing.patch:87:
indent with spaces.
}
/home/jian/Downloads/patches/v13-0006-Add-benchmarks-for-hashing.patch:89:
trailing whitespace.
/home/jian/Downloads/patches/v13-0006-Add-benchmarks-for-hashing.patch:92:
trailing whitespace.
warning: squelched 11 whitespace errors
warning: 16 lines add whitespace errors.
jian@jian:~/Desktop/pg_src/src4/postgres$ bash runbench.sh
select * from bench_string_hash(100000);
latency average = 875.482 ms
select * from bench_cstring_hash_unaligned(100000);
latency average = 6539.231 ms
select * from bench_cstring_hash_aligned(100000);
latency average = 4401.278 ms
select * from bench_pgstat_hash(100000);
latency average = 2679.732 ms
select * from bench_pgstat_hash_fh(100000);
latency average = 5711.012 ms
jian@jian:~/Desktop/pg_src/src4/postgres$ bash runbench.sh
select * from bench_string_hash(100000);
latency average = 874.261 ms
select * from bench_cstring_hash_unaligned(100000);
latency average = 6538.874 ms
select * from bench_cstring_hash_aligned(100000);
latency average = 4400.546 ms
select * from bench_pgstat_hash(100000);
latency average = 2682.013 ms
select * from bench_pgstat_hash_fh(100000);
latency average = 5709.815 ms
meson:
meson setup ${BUILD} \
-Dprefix=${PG_PREFIX} \
-Dpgport=5459 \
-Dplperl=enabled \
-Dplpython=enabled \
-Dssl=openssl \
-Dldap=enabled \
-Dlibxml=enabled \
-Dlibxslt=enabled \
-Duuid=e2fs \
-Dzstd=enabled \
-Dlz4=enabled \
-Dsystemd=enabled \
-Dcassert=true \
-Db_coverage=true \
-Dicu=enabled \
-Dbuildtype=debug \
-Dwerror=true \
-Dc_args='-Wunused-variable
-Wuninitialized
-Werror=maybe-uninitialized
-Wreturn-type
-DWRITE_READ_PARSE_PLAN_TREES
-DCOPY_PARSE_PLAN_TREES
-DREALLOCATE_BITMAPSETS
-DRAW_EXPRESSION_COVERAGE_TEST -fno-omit-frame-pointer' \
-Ddocs_pdf=disabled \
-Dllvm=disabled \
-Ddocs_pdf=disabled
On Fri, Jan 5, 2024 at 6:58 PM jian he <jian.universality@gmail.com> wrote:
-Dcassert=true \
-Dbuildtype=debug \
These probably don't matter much for this test, but these should be
off for any performance testing.
-DWRITE_READ_PARSE_PLAN_TREES
-DCOPY_PARSE_PLAN_TREES
-DREALLOCATE_BITMAPSETS
-DRAW_EXPRESSION_COVERAGE_TEST
I'd guess it was was of these, which should likewise be off as well.
On Sat, Jan 6, 2024 at 9:04 AM John Naylor <johncnaylorls@gmail.com> wrote:
On Fri, Jan 5, 2024 at 6:58 PM jian he <jian.universality@gmail.com> wrote:
-Dcassert=true \
-Dbuildtype=debug \
These probably don't matter much for this test, but these should be
off for any performance testing.-DWRITE_READ_PARSE_PLAN_TREES
-DCOPY_PARSE_PLAN_TREES
-DREALLOCATE_BITMAPSETS
-DRAW_EXPRESSION_COVERAGE_TESTI'd guess it was was of these, which should likewise be off as well.
Thanks for pointing it out.
meson setup ${BUILD} \
-Dprefix=${PG_PREFIX} \
-Dpgport=5459 \
-Dplperl=enabled \
-Dplpython=enabled \
-Dssl=openssl \
-Dldap=enabled \
-Dlibxml=enabled \
-Dlibxslt=enabled \
-Duuid=e2fs \
-Dzstd=enabled \
-Dlz4=enabled \
-Dsystemd=enabled \
-Dicu=enabled \
-Dbuildtype=release \
-Ddocs_pdf=disabled \
-Dllvm=disabled \
-Ddocs_pdf=disabled
now the results:
jian@jian:~/Desktop/pg_src/src4/postgres$ bash
/home/jian/Desktop/pg_src/src4/postgres/runbench.sh
select * from bench_string_hash(100000);
latency average = 145.021 ms
select * from bench_cstring_hash_unaligned(100000);
latency average = 100.829 ms
select * from bench_cstring_hash_aligned(100000);
latency average = 100.606 ms
select * from bench_pgstat_hash(100000);
latency average = 96.140 ms
select * from bench_pgstat_hash_fh(100000);
latency average = 62.784 ms
jian@jian:~/Desktop/pg_src/src4/postgres$ bash
/home/jian/Desktop/pg_src/src4/postgres/runbench.sh
select * from bench_string_hash(100000);
latency average = 147.782 ms
select * from bench_cstring_hash_unaligned(100000);
latency average = 101.179 ms
select * from bench_cstring_hash_aligned(100000);
latency average = 101.219 ms
select * from bench_pgstat_hash(100000);
latency average = 96.357 ms
select * from bench_pgstat_hash_fh(100000);
latency average = 62.902 ms
On Sat, Jan 6, 2024 at 9:01 AM jian he <jian.universality@gmail.com> wrote:
latency average = 147.782 ms
select * from bench_cstring_hash_unaligned(100000);
latency average = 101.179 ms
select * from bench_cstring_hash_aligned(100000);
latency average = 101.219 ms
Thanks for testing again! This looks closer to my results. It doesn't
show improvement for the aligned case, but it's not worse, either.
There is still some polishing to be done, mostly on comments/examples,
but I think it's mostly there. I'll return to it by next week.
Hi John,
On Mon, Jan 8, 2024 at 10:44 AM John Naylor <johncnaylorls@gmail.com> wrote:
On Sat, Jan 6, 2024 at 9:01 AM jian he <jian.universality@gmail.com> wrote:
latency average = 147.782 ms
select * from bench_cstring_hash_unaligned(100000);
latency average = 101.179 ms
select * from bench_cstring_hash_aligned(100000);
latency average = 101.219 msThanks for testing again! This looks closer to my results. It doesn't
show improvement for the aligned case, but it's not worse, either.There is still some polishing to be done, mostly on comments/examples,
but I think it's mostly there. I'll return to it by next week.
+ * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group
A kind reminder, it's already 2024 :)
I'm also curious why the 2018, is there any convention for that?
--
Regards
Junwang Zhao
On Mon, Jan 8, 2024 at 2:24 PM Junwang Zhao <zhjwpku@gmail.com> wrote:
+ * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group
A kind reminder, it's already 2024 :)
I'm also curious why the 2018, is there any convention for that?
The convention I followed was "blind copy-paste", but the first year
is supposed to be when the file entered the repo. Thanks, will fix.
I spent some time rewriting the comments and a couple other cosmetic
changes, and squashed into two patches: the second one has the
optimized string hashing. They each have still just one demo use case.
It looks pretty close to commitable, but I'll leave this up for a few
days in case anyone wants to have another look.
After this first step is out of the way, we can look into using this
more widely, including dynahash and the GUC hash.
Attachments:
v14-0002-Add-optimized-C-string-hashing.patchtext/x-patch; charset=US-ASCII; name=v14-0002-Add-optimized-C-string-hashing.patchDownload
From cff2bfda6a3067936ef17162a2db2609185afb24 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Tue, 16 Jan 2024 16:32:48 +0700
Subject: [PATCH v14 2/2] Add optimized C string hashing
Given an already-initialized hash state and a NUL-terminated string,
accumulate the hash of the string into the hash state and return the
length for the caller to (optionally) save for the finalizer. This
avoids a strlen call.
If the string pointer is aligned, we can use a word-at-a-time
algorithm for NUL lookahead. The aligned case is only used on 64-bit
platforms, since it's not worth the extra complexity for 32-bit.
As demonstration, use this in the search path cache. This brings the
general case performance closer to the optimization done in commit
a86c61c9ee.
There are other places that could benefit from this, but that is left
for future work.
Handling the tail of the string after finishing the word-wise loop
was inspired by NetBSD's strlen(), but no code was taken since that
is written in assembly language.
Jeff Davis and John Naylor
Discussion: https://postgr.es/m/3820f030fd008ff14134b3e9ce5cc6dd623ed479.camel%40j-davis.com
Discussion: https://postgr.es/m/b40292c99e623defe5eadedab1d438cf51a4107c.camel%40j-davis.com
---
src/backend/catalog/namespace.c | 20 +++--
src/include/common/hashfn_unstable.h | 130 +++++++++++++++++++++++++++
2 files changed, 145 insertions(+), 5 deletions(-)
diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index eecc50a958..b610aa6242 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -41,7 +41,7 @@
#include "catalog/pg_ts_template.h"
#include "catalog/pg_type.h"
#include "commands/dbcommands.h"
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "funcapi.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
@@ -253,11 +253,21 @@ static bool MatchNamedCall(HeapTuple proctup, int nargs, List *argnames,
static inline uint32
spcachekey_hash(SearchPathCacheKey key)
{
- const unsigned char *bytes = (const unsigned char *) key.searchPath;
- int blen = strlen(key.searchPath);
+ fasthash_state hs;
+ int sp_len;
- return hash_combine(hash_bytes(bytes, blen),
- hash_uint32(key.roleid));
+ fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0);
+
+ hs.accum = key.roleid;
+ fasthash_combine(&hs);
+
+ /*
+ * Combine search path into the hash and save the length for tweaking the
+ * final mix.
+ */
+ sp_len = fasthash_accum_cstring(&hs, key.searchPath);
+
+ return fasthash_final32(&hs, sp_len);
}
static inline bool
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index 88fa613d4e..ff36114379 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -61,6 +61,24 @@
* 2) Incremental interface. This can used for incorporating multiple
* inputs. The standalone functions use this internally, so see fasthash64()
* for an an example of how this works.
+ *
+ * The incremental interface is especially useful if any of the inputs
+ * are NUL-terminated C strings, since the length is not needed ahead
+ * of time. This avoids needing to call strlen(). This case is optimized
+ * in fasthash_accum_cstring() :
+ *
+ * fasthash_state hs;
+ * fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0);
+ * len = fasthash_accum_cstring(&hs, *str);
+ * ...
+ * return fasthash_final32(&hs, len);
+ *
+ * Here we pass FH_UNKNOWN_LENGTH as a convention, since passing zero
+ * would zero out the internal seed as well. fasthash_accum_cstring()
+ * returns the length of the string, which is computed on-the-fly while
+ * mixing the string into the hash. Experimentation has found that
+ * SMHasher fails unless we incorporate the length, so it is passed to
+ * the finalizer as a tweak.
*/
@@ -154,6 +172,118 @@ fasthash_accum(fasthash_state *hs, const char *k, int len)
fasthash_combine(hs);
}
+/*
+ * Set high bit in lowest byte where the input is zero, from:
+ * https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
+ */
+#define haszero64(v) \
+ (((v) - 0x0101010101010101) & ~(v) & 0x8080808080808080)
+
+/*
+ * all-purpose workhorse for fasthash_accum_cstring
+ */
+static inline int
+fasthash_accum_cstring_unaligned(fasthash_state *hs, const char *str)
+{
+ const char *const start = str;
+
+ while (*str)
+ {
+ int chunk_len = 0;
+
+ while (chunk_len < FH_SIZEOF_ACCUM && str[chunk_len] != '\0')
+ chunk_len++;
+
+ fasthash_accum(hs, str, chunk_len);
+ str += chunk_len;
+ }
+
+ return str - start;
+}
+
+/*
+ * specialized workhorse for fasthash_accum_cstring
+ *
+ * With an aligned pointer, we consume the string a word at a time.
+ * Loading the word containing the NUL terminator cannot segfault since
+ * allocation boundaries are suitably aligned.
+ */
+static inline int
+fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
+{
+ const char *const start = str;
+ int remainder;
+ uint64 zero_bytes_le;
+
+ Assert(PointerIsAligned(start, uint64));
+ for (;;)
+ {
+ uint64 chunk = *(uint64 *) str;
+
+ /*
+ * With little-endian representation, we can use this calculation,
+ * which sets bits in the first byte in the result word that
+ * corresponds to a zero byte in the original word. The rest of the
+ * bytes are indeterminate, so cannot be used on big-endian machines
+ * without either swapping or a bytewise check.
+ */
+#ifdef WORDS_BIGENDIAN
+ zero_bytes_le = haszero64(pg_bswap(chunk));
+#else
+ zero_bytes_le = haszero64(chunk);
+#endif
+ if (zero_bytes_le)
+ break;
+
+ hs->accum = chunk;
+ fasthash_combine(hs);
+ str += FH_SIZEOF_ACCUM;
+ }
+
+ /*
+ * For the last word, only use bytes up to the NUL for the hash. Bytes
+ * with set bits will be 0x80, so calculate the first occurrence of a zero
+ * byte within the input word by counting the number of trailing (because
+ * little-endian) zeros and dividing the result by 8.
+ */
+ remainder = pg_rightmost_one_pos64(zero_bytes_le) / BITS_PER_BYTE;
+ fasthash_accum(hs, str, remainder);
+ str += remainder;
+
+ return str - start;
+}
+
+/*
+ * Mix 'str' into the hash state and return the length of the string.
+ */
+static inline int
+fasthash_accum_cstring(fasthash_state *hs, const char *str)
+{
+#if SIZEOF_VOID_P >= 8
+
+ int len;
+#ifdef USE_ASSERT_CHECKING
+ int len_check;
+ fasthash_state hs_check;
+
+ memcpy(&hs_check, hs, sizeof(fasthash_state));
+ len_check = fasthash_accum_cstring_unaligned(&hs_check, str);
+#endif
+ if (PointerIsAligned(str, uint64))
+ {
+ len = fasthash_accum_cstring_aligned(hs, str);
+ Assert(hs_check.hash == hs->hash && len_check == len);
+ return len;
+ }
+#endif /* SIZEOF_VOID_P */
+
+ /*
+ * It's not worth it to try to make the word-at-a-time optimization work
+ * on 32-bit platforms.
+ */
+ return fasthash_accum_cstring_unaligned(hs, str);
+}
+
/*
* The finalizer
*
--
2.43.0
v14-0001-Add-inline-incremental-hash-functions-for-in-mem.patchtext/x-patch; charset=US-ASCII; name=v14-0001-Add-inline-incremental-hash-functions-for-in-mem.patchDownload
From 1f484009e93277525603acc4ece31412eaf173cb Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Mon, 27 Nov 2023 17:03:38 +0700
Subject: [PATCH v14 1/2] Add inline incremental hash functions for in-memory
use
It can be useful for a hash function to expose separate initialization,
accumulation, and finalization steps. In particular, this is useful
for building inline hash functions for simplehash. Instead of trying
to whack around hash_bytes while maintaining its current behavior on
all platforms, we base this work on fasthash (MIT licensed) which
is simple, faster than hash_bytes for inputs over 12 bytes long,
and also passes the hash function testing suite SMHasher.
The fasthash functions have been reimplemented using our added-on
incremental interface to validate that this method will still give
the same answer, provided we have the input length ahead of time.
This functionality lives in a new header hashfn_unstable.h. The name
implies we have the freedom to change things across versions that
would be unacceptable for our other hash functions that are used for
e.g. hash indexes and hash partitioning. As such, these should only
be used for in-memory data structures like hash tables. There is also
no guarantee of being independent of endianness or pointer size.
As demonstration, use fasthash for pgstat_hash_hash_key. Previously
this called the 32-bit murmur finalizer on the three elements,
then joined them with hash_combine(). The new function is simpler,
faster and takes up less binary space. While the collision and bias
behavior were almost certainly fine with the previous coding, now we
have objective confidence of that.
There are other places that could benefit from this, but that is left
for future work.
Reviewed by Jeff Davis, Heikki Linnakangas, Jian He, Junwang Zhao
Credit to Andres Freund for the idea
Discussion: https://postgr.es/m/20231122223432.lywt4yz2bn7tlp27%40awork3.anarazel.de
Discussion: https://postgr.es/m/20231122223432.lywt4yz2bn7tlp27%40awork3.anarazel.de
---
src/include/common/hashfn_unstable.h | 224 +++++++++++++++++++++++++++
src/include/utils/pgstat_internal.h | 11 +-
src/tools/pgindent/typedefs.list | 1 +
3 files changed, 228 insertions(+), 8 deletions(-)
create mode 100644 src/include/common/hashfn_unstable.h
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
new file mode 100644
index 0000000000..88fa613d4e
--- /dev/null
+++ b/src/include/common/hashfn_unstable.h
@@ -0,0 +1,224 @@
+/*
+ * hashfn_unstable.h
+ *
+ * Building blocks for creating fast inlineable hash functions. The
+ * unstable designation is in contrast to hashfn.h, which cannot break
+ * compatibility because hashes can be written to disk and so must produce
+ * the same hashes between versions.
+ *
+ * The functions in this file are not guaranteed to be stable between
+ * versions, and may differ by hardware platform.
+ *
+ *
+ * Portions Copyright (c) 2024, PostgreSQL Global Development Group
+ *
+ * src/include/common/hashfn_unstable.h
+ */
+#ifndef HASHFN_UNSTABLE_H
+#define HASHFN_UNSTABLE_H
+
+#include "port/pg_bitutils.h"
+#include "port/pg_bswap.h"
+
+/*
+ * fasthash is a modification of code taken from
+ * https://code.google.com/archive/p/fast-hash/source/default/source
+ * under the terms of the MIT licencse. The original copyright
+ * notice follows:
+ */
+
+/* The MIT License
+
+ Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
+
+ Permission is hereby granted, free of charge, to any person
+ obtaining a copy of this software and associated documentation
+ files (the "Software"), to deal in the Software without
+ restriction, including without limitation the rights to use, copy,
+ modify, merge, publish, distribute, sublicense, and/or sell copies
+ of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+*/
+
+/*
+ * fasthash as implemented here has two interfaces:
+ *
+ * 1) Standalone functions, e.g. fasthash32() for a single value with a
+ * known length.
+ *
+ * 2) Incremental interface. This can used for incorporating multiple
+ * inputs. The standalone functions use this internally, so see fasthash64()
+ * for an an example of how this works.
+ */
+
+
+typedef struct fasthash_state
+{
+ /* staging area for chunks of input */
+ uint64 accum;
+
+ uint64 hash;
+} fasthash_state;
+
+#define FH_SIZEOF_ACCUM sizeof(uint64)
+
+#define FH_UNKNOWN_LENGTH 1
+
+/*
+ * Initialize the hash state.
+ *
+ * 'len' is the length of the input, if known ahead of time.
+ * If that is not known, pass FH_UNKNOWN_LENGTH.
+ * 'seed' can be zero.
+ */
+static inline void
+fasthash_init(fasthash_state *hs, int len, uint64 seed)
+{
+ memset(hs, 0, sizeof(fasthash_state));
+ hs->hash = seed ^ (len * 0x880355f21e6d1965);
+}
+
+/* both the finalizer and part of the combining step */
+static inline uint64
+fasthash_mix(uint64 h, uint64 tweak)
+{
+ h ^= (h >> 23) + tweak;
+ h *= 0x2127599bf4325c37;
+ h ^= h >> 47;
+ return h;
+}
+
+/* combine one chunk of input into the hash */
+static inline void
+fasthash_combine(fasthash_state *hs)
+{
+ hs->hash ^= fasthash_mix(hs->accum, 0);
+ hs->hash *= 0x880355f21e6d1965;
+
+ /* reset hash state for next input */
+ hs->accum = 0;
+}
+
+/* accumulate up to 8 bytes of input and combine it into the hash */
+static inline void
+fasthash_accum(fasthash_state *hs, const char *k, int len)
+{
+ uint32 lower_four;
+
+ Assert(hs->accum == 0);
+ Assert(len <= FH_SIZEOF_ACCUM);
+
+ switch (len)
+ {
+ case 8:
+ memcpy(&hs->accum, k, 8);
+ break;
+ case 7:
+ hs->accum |= (uint64) k[6] << 48;
+ /* FALLTHROUGH */
+ case 6:
+ hs->accum |= (uint64) k[5] << 40;
+ /* FALLTHROUGH */
+ case 5:
+ hs->accum |= (uint64) k[4] << 32;
+ /* FALLTHROUGH */
+ case 4:
+ memcpy(&lower_four, k, sizeof(lower_four));
+ hs->accum |= lower_four;
+ break;
+ case 3:
+ hs->accum |= (uint64) k[2] << 16;
+ /* FALLTHROUGH */
+ case 2:
+ hs->accum |= (uint64) k[1] << 8;
+ /* FALLTHROUGH */
+ case 1:
+ hs->accum |= (uint64) k[0];
+ break;
+ case 0:
+ return;
+ }
+
+ fasthash_combine(hs);
+}
+
+/*
+ * The finalizer
+ *
+ * 'tweak' is intended to be the input length when the caller doesn't know
+ * the length ahead of time, such as for NUL-terminated strings, otherwise
+ * zero.
+ */
+static inline uint64
+fasthash_final64(fasthash_state *hs, uint64 tweak)
+{
+ return fasthash_mix(hs->hash, tweak);
+}
+
+/*
+ * Reduce a 64-bit hash to a 32-bit hash.
+ *
+ * This optional step provides a bit more additional mixing compared to
+ * just taking the lower 32-bits.
+ */
+static inline uint32
+fasthash_reduce32(uint64 h)
+{
+ /*
+ * Convert the 64-bit hashcode to Fermat residue, which shall retain
+ * information from both the higher and lower parts of hashcode.
+ */
+ return h - (h >> 32);
+}
+
+/* finalize and reduce */
+static inline uint32
+fasthash_final32(fasthash_state *hs, uint64 tweak)
+{
+ return fasthash_reduce32(fasthash_final64(hs, tweak));
+}
+
+/*
+ * The original fasthash64 function, re-implemented using the incremental
+ * interface. Returns a 64-bit hashcode. 'len' controls not only how
+ * many bytes to hash, but also modifies the internal seed.
+ * 'seed' can be zero.
+ */
+static inline uint64
+fasthash64(const char *k, int len, uint64 seed)
+{
+ fasthash_state hs;
+
+ fasthash_init(&hs, len, seed);
+
+ while (len >= FH_SIZEOF_ACCUM)
+ {
+ fasthash_accum(&hs, k, FH_SIZEOF_ACCUM);
+ k += FH_SIZEOF_ACCUM;
+ len -= FH_SIZEOF_ACCUM;
+ }
+
+ fasthash_accum(&hs, k, len);
+ return fasthash_final64(&hs, 0);
+}
+
+/* like fasthash64, but returns a 32-bit hashcode */
+static inline uint64
+fasthash32(const char *k, int len, uint64 seed)
+{
+ return fasthash_reduce32(fasthash64(k, len, seed));
+}
+
+#endif /* HASHFN_UNSTABLE_H */
diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h
index 9862589f36..207944f100 100644
--- a/src/include/utils/pgstat_internal.h
+++ b/src/include/utils/pgstat_internal.h
@@ -14,7 +14,7 @@
#define PGSTAT_INTERNAL_H
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "lib/dshash.h"
#include "lib/ilist.h"
#include "pgstat.h"
@@ -776,16 +776,11 @@ pgstat_cmp_hash_key(const void *a, const void *b, size_t size, void *arg)
static inline uint32
pgstat_hash_hash_key(const void *d, size_t size, void *arg)
{
- const PgStat_HashKey *key = (PgStat_HashKey *) d;
- uint32 hash;
+ const char *key = (const char *) d;
Assert(size == sizeof(PgStat_HashKey) && arg == NULL);
- hash = murmurhash32(key->kind);
- hash = hash_combine(hash, murmurhash32(key->dboid));
- hash = hash_combine(hash, murmurhash32(key->objoid));
-
- return hash;
+ return fasthash32(key, size, 0);
}
/*
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 5fd46b7bd1..eb2e6b6309 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3329,6 +3329,7 @@ exec_thread_arg
execution_state
explain_get_index_name_hook_type
f_smgr
+fasthash_state
fd_set
fe_scram_state
fe_scram_state_enum
--
2.43.0
On 17/01/2024 09:15, John Naylor wrote:
/*
* hashfn_unstable.h
*
* Building blocks for creating fast inlineable hash functions. The
* unstable designation is in contrast to hashfn.h, which cannot break
* compatibility because hashes can be written to disk and so must produce
* the same hashes between versions.
*
* The functions in this file are not guaranteed to be stable between
* versions, and may differ by hardware platform.
These paragraphs sound a bit awkward. It kind of buries the lede, the
"these functions are not guaranteed to be stable" part, to the bottom.
Maybe something like:
"
Building blocks for creating fast inlineable hash functions. The
functions in this file are not guaranteed to be stable between versions,
and may differ by hardware platform. Hence they must not be used in
indexes or other on-disk structures. See hashfn.h if you need stability.
"
typo: licencse
Other than that, LGTM.
--
Heikki Linnakangas
Neon (https://neon.tech)
On Wed, Jan 17, 2024 at 9:54 PM Heikki Linnakangas <hlinnaka@iki.fi> wrote:
Maybe something like:
"
Building blocks for creating fast inlineable hash functions. The
functions in this file are not guaranteed to be stable between versions,
and may differ by hardware platform. Hence they must not be used in
indexes or other on-disk structures. See hashfn.h if you need stability.
"typo: licencse
Other than that, LGTM.
Pushed that way, thanks! After fixing another typo in big endian
builds, an s390x member reported green, so I think that aspect is
working now. I'll come back to follow-up topics shortly.
On 19/01/2024 09:27, John Naylor wrote:
Pushed that way, thanks! After fixing another typo in big endian
builds, an s390x member reported green, so I think that aspect is
working now. I'll come back to follow-up topics shortly.
Thanks! I started to look at how to use this, and I have some questions.
I'd like to replace this murmurhash ussage in resowner.c with this:
/*
* Most resource kinds store a pointer in 'value', and pointers are unique
* all on their own. But some resources store plain integers (Files and
* Buffers as of this writing), so we want to incorporate the 'kind' in
* the hash too, otherwise those resources will collide a lot. But
* because there are only a few resource kinds like that - and only a few
* resource kinds to begin with - we don't need to work too hard to mix
* 'kind' into the hash. Just add it with hash_combine(), it perturbs the
* result enough for our purposes.
*/
#if SIZEOF_DATUM == 8
return hash_combine64(murmurhash64((uint64) value), (uint64) kind);
#else
return hash_combine(murmurhash32((uint32) value), (uint32) kind);
#endif
The straightforward replacement would be:
fasthash_state hs;
fasthash_init(&hs, sizeof(Datum), 0);
fasthash_accum(&hs, (char *) &kind, sizeof(ResourceOwnerDesc *));
fasthash_accum(&hs, (char *) &value, sizeof(Datum));
return fasthash_final32(&hs, 0);
But I wonder if it would be OK to abuse the 'seed' and 'tweak'
parameters to the init and final functions instead, like this:
fasthash_state hs;
fasthash_init(&hs, sizeof(Datum), (uint64) kind);
return fasthash_final32(&hs, (uint64) value);
I couldn't find any guidance on what properties the 'seed' and 'tweak'
have, compared to just accumulating the values with accum. Anyone know?
--
Heikki Linnakangas
Neon (https://neon.tech)
On Fri, 2024-01-19 at 14:27 +0700, John Naylor wrote:
Pushed that way, thanks!
Thank you.
One post-commit question on 0aba255440: why do
haszero64(pg_bswap64(chunk)) rather than just haszero64(chunk)? How
does byteswapping affect whether a zero byte exists or not?
Regards,
Jeff Davis
On Fri, 2024-01-19 at 13:38 -0800, Jeff Davis wrote:
One post-commit question on 0aba255440: why do
haszero64(pg_bswap64(chunk)) rather than just haszero64(chunk)? How
does byteswapping affect whether a zero byte exists or not?
I missed that it was used later when finding the rightmost one
position.
The placement of the comment was slightly confusing. Is:
haszero64(pg_bswap64(chunk)) == pg_bswap64(haszero64(chunk))
? If so, perhaps we can do the byte swapping outside of the loop, which
might save a few cycles on longer strings and would be more readable.
Regards,
Jeff Davis
On Fri, Jan 19, 2024 at 11:54 PM Heikki Linnakangas <hlinnaka@iki.fi> wrote:
Thanks! I started to look at how to use this, and I have some questions.
I'd like to replace this murmurhash ussage in resowner.c with this:/*
* Most resource kinds store a pointer in 'value', and pointers are unique
* all on their own. But some resources store plain integers (Files and
* Buffers as of this writing), so we want to incorporate the 'kind' in
* the hash too, otherwise those resources will collide a lot. But
* because there are only a few resource kinds like that - and only a few
* resource kinds to begin with - we don't need to work too hard to mix
* 'kind' into the hash. Just add it with hash_combine(), it perturbs the
* result enough for our purposes.
*/
#if SIZEOF_DATUM == 8
return hash_combine64(murmurhash64((uint64) value), (uint64) kind);
#else
return hash_combine(murmurhash32((uint32) value), (uint32) kind);
#endifThe straightforward replacement would be:
fasthash_state hs;
fasthash_init(&hs, sizeof(Datum), 0);
fasthash_accum(&hs, (char *) &kind, sizeof(ResourceOwnerDesc *));
fasthash_accum(&hs, (char *) &value, sizeof(Datum));
return fasthash_final32(&hs, 0);
That would give the fullest mixing possible, more than currently.
But I wonder if it would be OK to abuse the 'seed' and 'tweak'
parameters to the init and final functions instead, like this:fasthash_state hs;
fasthash_init(&hs, sizeof(Datum), (uint64) kind);
return fasthash_final32(&hs, (uint64) value);
This would go in the other direction, and sacrifice some quality for
speed. The fasthash finalizer is pretty short -- XMX, where X is
"right shift and XOR" and M is "multiply". In looking at some other
hash functions, it seems that's often done only if the input has
already had some mixing. The Murmur finalizer has the shape XMXMX, and
that seems to be the preferred way to get good mixing on a single
register-sized value. For that reason, hash functions whose main loop
is designed for long inputs will often skip that for small inputs and
just go straight to a Murmur-style finalizer. Fasthash doesn't do
that, so for a small input it ends up doing XMXM then XMX, which is a
little more expensive.
I couldn't find any guidance on what properties the 'seed' and 'tweak'
have, compared to just accumulating the values with accum. Anyone know?
In Postgres, I only know of one use of a seed parameter, to create two
independent hash functions from hash_bytes_uint32_extended(), in
brin-bloom indexes. I think that's the more typical use for a seed.
Since there was no guidance with the existing hash functions, and it's
a widespread concept, I didn't feel the need to put any here. We could
change that.
I modeled the finalizer tweak on one of the finalizers in xxHash that
also used it only for the input length. Length is used as a tiebreaker
where otherwise it will often not collide anyway, so it seems that's
how we should think about using it elsewhere. There is a comment above
fasthash_final64 mentioning that the tweak is used for length when
that is not known ahead of time, but it might be good to generalize
that, and maybe put it somewhere more prominent. With that in mind,
I'm not sure "value" is a good fit for the tweak. "kind" is sort of
in the middle because IIUC it doesn't mattter at all for pointer
values, but it's important for other kinds, which would commonly
collide.
If I were to change from murmur64, I'd probably go in between the two
extremes mentioned earlier, and mix "value" normally and pass "kind"
as the seed:
fasthash_state hs;
fasthash_init(&hs, sizeof(Datum), kind);
fasthash_accum(&hs, (char *) &value, sizeof(Datum));
return fasthash_final32(&hs, 0);
On Sat, Jan 20, 2024 at 7:13 AM Jeff Davis <pgsql@j-davis.com> wrote:
On Fri, 2024-01-19 at 13:38 -0800, Jeff Davis wrote:
One post-commit question on 0aba255440: why do
haszero64(pg_bswap64(chunk)) rather than just haszero64(chunk)? How
does byteswapping affect whether a zero byte exists or not?I missed that it was used later when finding the rightmost one
position.The placement of the comment was slightly confusing. Is:
haszero64(pg_bswap64(chunk)) == pg_bswap64(haszero64(chunk))
? If so, perhaps we can do the byte swapping outside of the loop, which
might save a few cycles on longer strings and would be more readable.
The above identity is not true for this haszero64 macro. I phrased it
as "The rest of the bytes are indeterminate", but that's not very
clear. It can only be true if it set bytes for all and only those
bytes where the input had zeros. In the NetBSD strlen source, there is
a comment telling of a way to do this:
~(((x & 0x7f....7f) + 0x7f....7f) | (x | 0x7f....7f))
https://github.com/NetBSD/src/blob/trunk/common/lib/libc/arch/x86_64/string/strlen.S
(They don't actually use it of course, since x86_64 is little-endian)
From the commentary there, it sounds like 1 or 2 more instructions.
One unmentioned assumption I had was that the byte swap would be a
single instruction on all platforms where we care about performance
(*). If that's not the case, we could switch to the above macro for
big-endian machines. It'd be less readable since we'd then need an
additional #ifdef for counting leading, rather than trailing zeros
(that would avoid byte-swapping entirely). Either way, I'm afraid
big-endian is stuck doing a bit of extra work somewhere. That work
could be amortized by doing a quick check in the loop and afterwards
completely redoing the zero check (or a bytewise check same as the
unaligned path), but that would penalize short strings.
(*) 32-bit platforms don't take this path, but mamba's build failed
because the previously-misspelled symbol was still in the source file.
We could also #ifdef around the whole aligned-path function, although
it's redundant.
I hope this makes it more clear. Maybe the comment could use some work.
On Sat, 2024-01-20 at 13:48 +0700, John Naylor wrote:
The above identity is not true for this haszero64 macro.
I see.
I hope this makes it more clear. Maybe the comment could use some
work.
Yes, thank you. I don't think we need to change the algorithm.
After having stepped away from this work for a couple weeks and
returning to it, I think the comments and/or naming could be more
clear. We first use the result of haszero64() as a boolean to break out
of the loop, but then later use it in a more interesting way to count
the number of remaining bytes.
Perhaps you can take the comment out of the loop and just describe the
algorithm we're using, and make a note that we have to byteswap first.
"Indeterminate" could be explained briefly as well.
These are minor comments.
Regards,
Jeff Davis
I wrote:
fasthash_init(&hs, sizeof(Datum), kind);
fasthash_accum(&hs, (char *) &value, sizeof(Datum));
return fasthash_final32(&hs, 0);
It occurred to me that it's strange to have two places that length can
be passed. That was a side effect of the original, which used length
to both know how many bytes to read, and to modify the internal seed.
With the incremental API, it doesn't make sense to pass the length (or
a dummy macro) up front -- with a compile-time fixed length, it can't
possibly break a tie, so it's just noise.
0001 removes the length from initialization in the incremental
interface. The standalone functions use length directly the same as
before, but after initialization. Thoughts?
Also, the fasthash_accum call is a bit verbose, because it's often
used in a loop with varlen input. For register-sized values, I think
it's simpler to say this, as done in the search path cache, so maybe a
comment to that effect would be helpful:
hs.accum = value;
fasthash_combine(&hs);
I noticed that we already have a more recent, stronger 64-bit mixer
than murmur64: splitmix64, in pg_prng.c. We could put that, as well as
a better 4-byte mixer [1]Examples of both in https://www.boost.org/doc/libs/1_84_0/boost/container_hash/detail/hash_mix.hpp in hashfn_unstable.h, for in-memory use.
Maybe with names like "hash_4bytes" etc. so it's not tied to a
specific implementation. I see one simplehash case that can use it,
even if the resowner hash table gets rid of it.
0002 and 0003 use fasthash for dynahash and GUC hash, respectively.
These cannot use the existing cstring hashing directly because of
truncation and case-folding, respectively. (Some simplehash uses can,
but that can come later)
On Sun, Jan 21, 2024 at 8:06 AM Jeff Davis <pgsql@j-davis.com> wrote:
After having stepped away from this work for a couple weeks and
returning to it, I think the comments and/or naming could be more
clear. We first use the result of haszero64() as a boolean to break out
of the loop, but then later use it in a more interesting way to count
the number of remaining bytes.Perhaps you can take the comment out of the loop and just describe the
algorithm we're using, and make a note that we have to byteswap first.
"Indeterminate" could be explained briefly as well.
v15-0004 is a stab at that. As an idea, it also renames zero_bytes_le
to zero_byte_low to reflect the effect better. There might be some
other comment edits needed to explain usage, so I plan to hold on to
this for later. Let me know what you think.
[1]: Examples of both in https://www.boost.org/doc/libs/1_84_0/boost/container_hash/detail/hash_mix.hpp
https://www.boost.org/doc/libs/1_84_0/boost/container_hash/detail/hash_mix.hpp
Attachments:
v15-0003-Use-fasthash-for-guc_name_hash.patchtext/x-patch; charset=US-ASCII; name=v15-0003-Use-fasthash-for-guc_name_hash.patchDownload
From ad25c43c264c5857bf41cbf056ac7d4ab0995b40 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sun, 21 Jan 2024 17:49:22 +0700
Subject: [PATCH v15 3/4] Use fasthash for guc_name_hash
---
src/backend/utils/misc/guc.c | 31 ++++++++++++++++++++++---------
1 file changed, 22 insertions(+), 9 deletions(-)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 8f65ef3d89..e76ab52618 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -33,6 +33,7 @@
#include "catalog/objectaccess.h"
#include "catalog/pg_authid.h"
#include "catalog/pg_parameter_acl.h"
+#include "common/hashfn_unstable.h"
#include "guc_internal.h"
#include "libpq/pqformat.h"
#include "parser/scansup.h"
@@ -1324,22 +1325,34 @@ guc_name_compare(const char *namea, const char *nameb)
static uint32
guc_name_hash(const void *key, Size keysize)
{
- uint32 result = 0;
const char *name = *(const char *const *) key;
+ const char *const start = name;
+ fasthash_state hs;
+
+ fasthash_init(&hs, 0);
while (*name)
{
- char ch = *name++;
+ int chunk_len = 0;
- /* Case-fold in the same way as guc_name_compare */
- if (ch >= 'A' && ch <= 'Z')
- ch += 'a' - 'A';
+ while (chunk_len < FH_SIZEOF_ACCUM && name[chunk_len] != '\0')
+ {
+ chunk_len++;
+ hs.accum <<= BITS_PER_BYTE;
+ hs.accum |= name[chunk_len];
+ }
- /* Merge into hash ... not very bright, but it needn't be */
- result = pg_rotate_left32(result, 5);
- result ^= (uint32) ch;
+ /* Quick ASCII-only downcasing */
+ hs.accum |= 0x2020202020202020;
+
+ /* merge into hash and reset for next iteration */
+ fasthash_combine(&hs);
+ hs.accum = 0;
+
+ name += chunk_len;
}
- return result;
+
+ return fasthash_final32(&hs, name - start);
}
/*
--
2.43.0
v15-0004-WIP-comment-edits.patchtext/x-patch; charset=US-ASCII; name=v15-0004-WIP-comment-edits.patchDownload
From e33633ba036ff521482fb24e8984b5865c8515c8 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sun, 21 Jan 2024 15:33:22 +0700
Subject: [PATCH v15 4/4] WIP: comment edits
Clarify detection of zero bytes when hashing aligned C strings
Discussion: https://postgr.es/m/48e8f8bbe0be9c789f98776c7438244ab7a7cc63.camel%40j-davis.com
---
src/include/common/hashfn_unstable.h | 38 ++++++++++++++++------------
1 file changed, 22 insertions(+), 16 deletions(-)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index 8e829297fd..8c42e876be 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -209,26 +209,33 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
{
const char *const start = str;
int remainder;
- uint64 zero_bytes_le;
+ uint64 zero_byte_low;
Assert(PointerIsAligned(start, uint64));
+
+ /*
+ * For every chunk of input, check for zero bytes before mixing into the
+ * hash. The chunk with zeros must contain the NUL terminator. We arrange
+ * so that zero_byte_low tells us not only that a zero exists, but also
+ * where it is, so we can hash the remainder of the string.
+ *
+ * The haszero64 calculation will set bits corresponding to the lowest
+ * byte where a zero exists, so that suffices for little-endian machines.
+ * For big-endian machines, we would need bits set for the highest zero
+ * byte in the chunk, since the trailing junk past the terminator could
+ * contain additional zeros. haszero64 does not give us that, so we
+ * byteswap the chunk first.
+ */
for (;;)
{
uint64 chunk = *(uint64 *) str;
- /*
- * With little-endian representation, we can use this calculation,
- * which sets bits in the first byte in the result word that
- * corresponds to a zero byte in the original word. The rest of the
- * bytes are indeterminate, so cannot be used on big-endian machines
- * without either swapping or a bytewise check.
- */
#ifdef WORDS_BIGENDIAN
- zero_bytes_le = haszero64(pg_bswap64(chunk));
+ zero_byte_low = haszero64(pg_bswap64(chunk));
#else
- zero_bytes_le = haszero64(chunk);
+ zero_byte_low = haszero64(chunk);
#endif
- if (zero_bytes_le)
+ if (zero_byte_low)
break;
hs->accum = chunk;
@@ -237,12 +244,11 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
}
/*
- * For the last word, only use bytes up to the NUL for the hash. Bytes
- * with set bits will be 0x80, so calculate the first occurrence of a zero
- * byte within the input word by counting the number of trailing (because
- * little-endian) zeros and dividing the result by 8.
+ * Bytes with set bits will be 0x80, so the number of trailing zeros will
+ * be in the range 7, 15, ..., 63. We turn this into the byte position by
+ * dividing by 8.
*/
- remainder = pg_rightmost_one_pos64(zero_bytes_le) / BITS_PER_BYTE;
+ remainder = pg_rightmost_one_pos64(zero_byte_low) / BITS_PER_BYTE;
fasthash_accum(hs, str, remainder);
str += remainder;
--
2.43.0
v15-0001-Initialization-of-incremental-hashing-no-longer-.patchtext/x-patch; charset=US-ASCII; name=v15-0001-Initialization-of-incremental-hashing-no-longer-.patchDownload
From 46e84c4782e2a1291430be4a7d4651de7f387608 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sun, 21 Jan 2024 19:19:14 +0700
Subject: [PATCH v15 1/4] Initialization of incremental hashing no longer uses
length
When the incremental interface was written, care was taken to make
sure the answer always matched upstream when the length was known
ahead of time.
When that is not known ahead of time, callers that create a hash
function using the incremental interface were already advised to
incorporate length into the finalizer once it is known e.g. after
hashing a C string. Experimentation has shown that this also works
well when the length is known ahead of time, so there's no advantage
to having two places to pass the length.
Further, if the length is a compile-time constant in this case,
it can't possibly be needed as a tiebreaker for this caller, so
there's not much point in using it to affect the internal seed upon
initialization.
It's worthwhile that the standalone functions fasthash{32,64} can still
give the same answer as the original namesakes, but it's trivial for
them to reset the seed after initialization. Hence, do that and remove
"len" from fasthash_init, as well as the macro for unknown length.
TODO: comment updates
---
src/backend/catalog/namespace.c | 2 +-
src/include/common/hashfn_unstable.h | 8 ++++----
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index b610aa6242..8df30b2440 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -256,7 +256,7 @@ spcachekey_hash(SearchPathCacheKey key)
fasthash_state hs;
int sp_len;
- fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0);
+ fasthash_init(&hs, 0);
hs.accum = key.roleid;
fasthash_combine(&hs);
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index 3d927e1fb1..8e829297fd 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -89,7 +89,6 @@ typedef struct fasthash_state
#define FH_SIZEOF_ACCUM sizeof(uint64)
-#define FH_UNKNOWN_LENGTH 1
/*
* Initialize the hash state.
@@ -99,10 +98,10 @@ typedef struct fasthash_state
* 'seed' can be zero.
*/
static inline void
-fasthash_init(fasthash_state *hs, int len, uint64 seed)
+fasthash_init(fasthash_state *hs, uint64 seed)
{
memset(hs, 0, sizeof(fasthash_state));
- hs->hash = seed ^ (len * 0x880355f21e6d1965);
+ hs->hash = seed ^ 0x880355f21e6d1965;
}
/* both the finalizer and part of the combining step */
@@ -328,7 +327,8 @@ fasthash64(const char *k, int len, uint64 seed)
{
fasthash_state hs;
- fasthash_init(&hs, len, seed);
+ fasthash_init(&hs, 0);
+ hs.hash = seed ^ (len * 0x880355f21e6d1965);
while (len >= FH_SIZEOF_ACCUM)
{
--
2.43.0
v15-0002-Use-fasthash-for-dynahash-s-default-string-hash.patchtext/x-patch; charset=US-ASCII; name=v15-0002-Use-fasthash-for-dynahash-s-default-string-hash.patchDownload
From 54ef850a0bb909b242ec553b3ea84853611ec233 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sun, 21 Jan 2024 16:04:16 +0700
Subject: [PATCH v15 2/4] Use fasthash for dynahash's default string hash
This avoids strlen calls. string_hash is kept around in case
extensions are using it.
---
src/backend/utils/hash/dynahash.c | 52 +++++++++++++++++++++++++++----
src/common/hashfn.c | 3 +-
2 files changed, 48 insertions(+), 7 deletions(-)
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c
index a4152080b5..92c7989575 100644
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -98,6 +98,7 @@
#include "access/xact.h"
#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "port/pg_bitutils.h"
#include "storage/shmem.h"
#include "storage/spin.h"
@@ -307,6 +308,45 @@ string_compare(const char *key1, const char *key2, Size keysize)
return strncmp(key1, key2, keysize - 1);
}
+/*
+ * default_string_hash: hash function for keys that are NUL-terminated strings.
+ *
+ * NOTE: this is the default hash function if none is specified.
+ */
+static uint32
+default_string_hash(const void *key, Size keysize)
+{
+ const char *k = (const char *) key;
+ Size s_len = 0;
+ fasthash_state hs;
+
+ /*
+ * If the string exceeds keysize-1 bytes, we want to hash only that many,
+ * because when it is copied into the hash table it will be truncated at
+ * that length.
+ */
+
+ fasthash_init(&hs, 0);
+
+ while (*k && s_len < keysize - 1)
+ {
+ int chunk_len = 0;
+
+ while (k[chunk_len] != '\0' &&
+ s_len < keysize - 1 &&
+ chunk_len < FH_SIZEOF_ACCUM)
+ {
+ chunk_len++;
+ s_len++;
+ }
+
+ fasthash_accum(&hs, k, chunk_len);
+ k += chunk_len;
+ }
+
+ return fasthash_final32(&hs, s_len);
+}
+
/************************** CREATE ROUTINES **********************/
@@ -418,8 +458,8 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
else
{
/*
- * string_hash used to be considered the default hash method, and in a
- * non-assert build it effectively still is. But we now consider it
+ * string_hash used to be considered the default hash method, and
+ * it effectively still was until version 17. Since version 14 we consider it
* an assertion error to not say HASH_STRINGS explicitly. To help
* catch mistaken usage of HASH_STRINGS, we also insist on a
* reasonably long string length: if the keysize is only 4 or 8 bytes,
@@ -428,12 +468,12 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Assert(flags & HASH_STRINGS);
Assert(info->keysize > 8);
- hashp->hash = string_hash;
+ hashp->hash = default_string_hash;
}
/*
* If you don't specify a match function, it defaults to string_compare if
- * you used string_hash, and to memcmp otherwise.
+ * you used default_string_hash, and to memcmp otherwise.
*
* Note: explicitly specifying string_hash is deprecated, because this
* might not work for callers in loadable modules on some platforms due to
@@ -442,7 +482,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
*/
if (flags & HASH_COMPARE)
hashp->match = info->match;
- else if (hashp->hash == string_hash)
+ else if (hashp->hash == default_string_hash)
hashp->match = (HashCompareFunc) string_compare;
else
hashp->match = memcmp;
@@ -452,7 +492,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
*/
if (flags & HASH_KEYCOPY)
hashp->keycopy = info->keycopy;
- else if (hashp->hash == string_hash)
+ else if (hashp->hash == default_string_hash)
{
/*
* The signature of keycopy is meant for memcpy(), which returns
diff --git a/src/common/hashfn.c b/src/common/hashfn.c
index 4db468cf85..3090b3cbd9 100644
--- a/src/common/hashfn.c
+++ b/src/common/hashfn.c
@@ -654,7 +654,8 @@ hash_bytes_uint32_extended(uint32 k, uint64 seed)
/*
* string_hash: hash function for keys that are NUL-terminated strings.
*
- * NOTE: this is the default hash function if none is specified.
+ * NOTE: this was the default string hash for dynahash until vesion 17,
+ * and is now here only for backward compatibility.
*/
uint32
string_hash(const void *key, Size keysize)
--
2.43.0
On Mon, 2024-01-22 at 09:03 +0700, John Naylor wrote:
v15-0004 is a stab at that. As an idea, it also renames zero_bytes_le
to zero_byte_low to reflect the effect better. There might be some
other comment edits needed to explain usage, so I plan to hold on to
this for later. Let me know what you think.
0004 looks good to me. No urgency so feel free to hold it until a
convenient time.
Regards,
Jeff Davis
On Sun, 21 Jan 2024 at 03:06, Jeff Davis <pgsql@j-davis.com> wrote:
Yes, thank you. I don't think we need to change the algorithm.
Jumping in here at a random point just to share my findings from
poking around this on and off. I am concentrating here on cstring
hashing as that is the most complicated one.
One thing that caught my eye in testing was that the unaligned cstring
code was unexpectedly faster for short strings (3-18B uniform
distribution). Looking into it the cause was fasthash_accum() called
in the final iteration. In the unaligned case compiler (clang-15)
unrolled the inner loop which allowed it to jump directly into the
correct place in the switch. In the unaligned case clang decided to
use a data dependent jump which then mispredicts all of the time.
But given that we know the data length and we have it in a register
already, it's easy enough to just mask out data past the end with a
shift. See patch 1. Performance benefit is about 1.5x Measured on a
small test harness that just hashes and finalizes an array of strings,
with a data dependency between consecutive hashes (next address
depends on the previous hash output).
Unaligned case can actually take advantage of the same trick as the
aligned case, it just has to shuffle the data from two consecutive
words before applying the combine function. Patch 2 implements this.
It makes the unaligned case almost as fast as the aligned one, both on
short and long strings. 10% benefit on short strings, 50% on long
ones.
Not sure if the second one is worth the extra code. A different
approach would be to use the simple word at a time hashing for the
unaligned case too and handle word accesses that straddle a page
boundary as a special case. Obviously this only makes sense for
platforms that support unaligned access. On x86 unaligned access
within a cache line is basically free, and across cache lines is only
slightly more expensive. On benchmarks calling the aligned code on
unaligned strings only has a 5% penalty on long strings, short ones
are indistinguishable.
I also took a look at using SIMD for implementing the hash using the
same aligned access + shuffle trick. The good news is that the
shuffling works well enough that neither it nor checking for string
end are the longest chain. The bad news is that the data load,
alignment, zero finding and masking form a big dependency chain on the
first iteration. Mixing and finalization is even worse, fasthash uses
64bit imul instruction that has a 3 cycle latency, the iteration to
iteration chain is imul + xor, for 4 cycles or 2 B/cycle (in practice
a bit less due to ALU port contention). In SIMD registers there is no
64bit multiply, and 32 bit multiply has a terrible 10 cycle latency on
Intel. AES instructions are an interesting option, but it seems that 2
are needed for good enough mixing, at 4 cycles each, we again end up
at 2B/cycle. Finalization needs another 3 AES instructions, a shuffle
and a xor fold to pass SMHasher, for 17 cycles. The mix latency issue
could be worked around by doing more mixing in parallel, potentially
up to 8x faster, but this does not help short strings at all and would
make the code way bigger. SIMD code does use fewer instructions so it
interleaves better with nearby code that is not dependent on it, not
sure if that matters anywhere.
The short version is that for very long (4k+) strings the attached
SIMD code is 35% faster, for short strings it is 35% slower, and this
is very much x86-64-v3 only and would need a fallback when AVX and
AES-NI are not available. Basically a dead end for the use cases this
hash function is used for.
Regards,
Ants Aasma
Attachments:
0002-Unaligned-fasthash-word-at-a-time-hashing.patchtext/x-patch; charset=US-ASCII; name=0002-Unaligned-fasthash-word-at-a-time-hashing.patchDownload
From 912f46be12536985dda7bcfb669d4ec13e79d073 Mon Sep 17 00:00:00 2001
From: Ants Aasma <ants@cybertec.at>
Date: Mon, 29 Jan 2024 21:07:44 +0200
Subject: [PATCH 2/2] Unaligned fasthash word at a time hashing
About 10% performance benefit on short strings, 50% on long ones,
making the performance almost identical to the aligned case.
---
src/include/common/hashfn_unstable.h | 156 +++++++++++++++++++++++----
1 file changed, 138 insertions(+), 18 deletions(-)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index 8ee1b99a204..1e44814d84a 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -189,6 +189,38 @@ first_byte_nonzero(uint64 v)
#endif
}
+/*
+ * Selects first n bits in memory order and masks the rest with NUL.
+ * Using value 0 for n results in undefined behavior.
+ */
+static inline uint64
+first_n64(uint64 v, uint64 n)
+{
+ Assert(0 < n && n <= 64);
+#ifdef WORDS_BIGENDIAN
+ return v & ((~0ULL) << (64 - n));
+#else
+ return v & ((~0ULL) >> (64 - n));
+#endif
+}
+
+/*
+ * Does the equivalent of an unaligned word access into two consecutive
+ * words, taking the last 8 - offset bytes from first and adding first
+ * offset bytes from second word. offset must be in range [1..7]
+ */
+static inline uint64
+align_n64(uint64 a, uint64 b, int offset)
+{
+ Assert(offset > 0 && offset < 8);
+#ifdef WORDS_BIGENDIAN
+ return (a << (offset * 8)) | (b >> (64 - offset * 8));
+#else
+ return (a >> (offset * 8)) | (b << (64 - offset * 8));
+#endif
+}
+
+
/*
* all-purpose workhorse for fasthash_accum_cstring
*/
@@ -212,17 +244,15 @@ fasthash_accum_cstring_unaligned(fasthash_state *hs, const char *str)
}
/*
- * specialized workhorse for fasthash_accum_cstring
+ * specialized workhorse for fasthash_accum_cstring_autoaligned
*
- * With an aligned pointer, we consume the string a word at a time.
- * Loading the word containing the NUL terminator cannot segfault since
- * allocation boundaries are suitably aligned.
+ * If the string is aligned we don't need to correct for alignment.
*/
static inline int
fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
{
const char *const start = str;
- int remainder;
+ int remainder_bits;
uint64 zero_bytes_le;
uint64 chunk;
@@ -259,18 +289,111 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
*/
if (first_byte_nonzero(chunk))
{
- remainder = pg_rightmost_one_pos64(zero_bytes_le) / BITS_PER_BYTE;
+ remainder_bits = pg_rightmost_one_pos64(zero_bytes_le);
+ hs->accum = first_n64(chunk, remainder_bits);
+ fasthash_combine(hs);
+
+ str += remainder_bits / BITS_PER_BYTE;
+ }
+
+ return str - start;
+}
+
+/*
+ * specialized implementation for fasthash_accum_cstring
+ *
+ * With an aligned pointer, we consume the string a word at a time.
+ * Loading the word containing the NUL terminator cannot segfault since
+ * allocation boundaries are suitably aligned.
+ *
+ * If the pointer is not aligned we can still load a word at a time, but
+ * have to combine the hashed word from two words using the alignment
+ * offset.
+ */
+static inline int
+fasthash_accum_cstring_autoaligned(fasthash_state *hs, const char *str)
+{
+ const char *const start = str;
+ int remainder_bits;
+ uint64 zero_bytes_le;
+ int offset;
+ uint64 chunk, prev;
+
+ offset = (uintptr_t) str & 7;
+
+ /*
+ * Special case aligned string. Needed to avoid shift by 64 in
+ * alignment code. It's also faster.
+ */
+ if (!offset)
+ return fasthash_accum_cstring_aligned(hs, str);
+
+ /* Not using MAXALIGN here because we need to round down */
+ str = (const char *) ((uintptr_t) str & ~7);
+ chunk = *(uint64 *) str;
+ /* Mask out the preceding bytes with -1 */
+ chunk |= (-1ULL >> (64 - 8*offset));
+ zero_bytes_le = haszero64(chunk);
+ /* String ends in first loaded word. */
+ if (zero_bytes_le) {
+ remainder_bits = pg_rightmost_one_pos64(zero_bytes_le);
+
+ /*
+ * Special case empty string to avoid shift by 64 below and have
+ * the number of combine operations match the unaligned version.
+ **/
+ if (remainder_bits < BITS_PER_BYTE)
+ {
+ return 0;
+ }
+
+ /* Mask out everything past the last byte and align */
+ hs->accum = align_n64(first_n64(chunk , remainder_bits), 0, offset);
+ fasthash_combine(hs);
+ return remainder_bits / BITS_PER_BYTE - offset;
+ }
+
+ prev = chunk;
+ str += FH_SIZEOF_ACCUM;
+ for (;;)
+ {
+ chunk = *(uint64 *) str;
+
+ /*
+ * With little-endian representation, we can use this calculation,
+ * which sets bits in the first byte in the result word that
+ * corresponds to a zero byte in the original word. The rest of the
+ * bytes are indeterminate, so cannot be used on big-endian machines
+ * without either swapping or a bytewise check.
+ */
#ifdef WORDS_BIGENDIAN
- hs->accum = chunk & ((~0ULL) << (64 - BITS_PER_BYTE*remainder));
+ zero_bytes_le = haszero64(pg_bswap64(chunk));
#else
- hs->accum = chunk & ((~0ULL) >> (64 - BITS_PER_BYTE*remainder));
+ zero_bytes_le = haszero64(chunk);
#endif
+ if (zero_bytes_le)
+ break;
+
+ hs->accum = align_n64(prev, chunk, offset);
fasthash_combine(hs);
+ str += FH_SIZEOF_ACCUM;
+ prev = chunk;
+ }
+
+ remainder_bits = pg_rightmost_one_pos64(zero_bytes_le);
+ /* Relying on 0 remaining character having 7 zero bits */
+ chunk = first_n64(chunk, remainder_bits);
+
+ hs->accum = align_n64(prev, chunk, offset);
+ fasthash_combine(hs);
- str += remainder;
+ if (remainder_bits / BITS_PER_BYTE > offset)
+ {
+ hs->accum = align_n64(chunk, 0, offset);
+ fasthash_combine(hs);
}
- return str - start;
+ return (str - start) + remainder_bits / BITS_PER_BYTE;
}
/*
@@ -289,19 +412,16 @@ fasthash_accum_cstring(fasthash_state *hs, const char *str)
memcpy(&hs_check, hs, sizeof(fasthash_state));
len_check = fasthash_accum_cstring_unaligned(&hs_check, str);
#endif
- if (PointerIsAligned(str, uint64))
- {
- len = fasthash_accum_cstring_aligned(hs, str);
- Assert(hs_check.hash == hs->hash && len_check == len);
- return len;
- }
-#endif /* SIZEOF_VOID_P */
-
+ len = fasthash_accum_cstring_autoaligned(hs, str);
+ Assert(hs_check.hash == hs->hash && len_check == len);
+ return len;
+#else
/*
* It's not worth it to try to make the word-at-a-time optimization work
* on 32-bit platforms.
*/
return fasthash_accum_cstring_unaligned(hs, str);
+#endif /* SIZEOF_VOID_P */
}
/*
--
2.34.1
0001-Speed-up-last-iteration-of-aligned-fasthash.patchtext/x-patch; charset=US-ASCII; name=0001-Speed-up-last-iteration-of-aligned-fasthash.patchDownload
From b226898acc15c329cf73308ff9d77f0a15f08322 Mon Sep 17 00:00:00 2001
From: Ants Aasma <ants@cybertec.at>
Date: Mon, 29 Jan 2024 15:16:02 +0200
Subject: [PATCH 1/2] Speed up last iteration of aligned fasthash
We know the length of the string so we can mask out end of the
string with a shift. Without this the aligned version was slower
than unaligned on small strings.
---
src/include/common/hashfn_unstable.h | 31 ++++++++++++++++++++++++----
1 file changed, 27 insertions(+), 4 deletions(-)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index 3d927e1fb18..8ee1b99a204 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -176,6 +176,19 @@ fasthash_accum(fasthash_state *hs, const char *k, int len)
#define haszero64(v) \
(((v) - 0x0101010101010101) & ~(v) & 0x8080808080808080)
+/*
+ * Returns non-zero when first byte in memory order is not NUL
+ */
+static inline int
+first_byte_nonzero(uint64 v)
+{
+#ifdef WORDS_BIGENDIAN
+ return v >> 56;
+#else
+ return v & 0xFF;
+#endif
+}
+
/*
* all-purpose workhorse for fasthash_accum_cstring
*/
@@ -211,11 +224,12 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
const char *const start = str;
int remainder;
uint64 zero_bytes_le;
+ uint64 chunk;
Assert(PointerIsAligned(start, uint64));
for (;;)
{
- uint64 chunk = *(uint64 *) str;
+ chunk = *(uint64 *) str;
/*
* With little-endian representation, we can use this calculation,
@@ -243,9 +257,18 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
* byte within the input word by counting the number of trailing (because
* little-endian) zeros and dividing the result by 8.
*/
- remainder = pg_rightmost_one_pos64(zero_bytes_le) / BITS_PER_BYTE;
- fasthash_accum(hs, str, remainder);
- str += remainder;
+ if (first_byte_nonzero(chunk))
+ {
+ remainder = pg_rightmost_one_pos64(zero_bytes_le) / BITS_PER_BYTE;
+#ifdef WORDS_BIGENDIAN
+ hs->accum = chunk & ((~0ULL) << (64 - BITS_PER_BYTE*remainder));
+#else
+ hs->accum = chunk & ((~0ULL) >> (64 - BITS_PER_BYTE*remainder));
+#endif
+ fasthash_combine(hs);
+
+ str += remainder;
+ }
return str - start;
}
--
2.34.1
On Tue, Jan 30, 2024 at 4:13 AM Ants Aasma <ants.aasma@cybertec.at> wrote:
But given that we know the data length and we have it in a register
already, it's easy enough to just mask out data past the end with a
shift. See patch 1. Performance benefit is about 1.5x Measured on a
small test harness that just hashes and finalizes an array of strings,
with a data dependency between consecutive hashes (next address
depends on the previous hash output).
Interesting work! I've taken this idea and (I'm guessing, haven't
tested) improved it by re-using an intermediate step for the
conditional, simplifying the creation of the mask, and moving the
bitscan out of the longest dependency chain. Since you didn't attach
the test harness, would you like to run this and see how it fares?
(v16-0001 is same as your 0001, and v16-0002 builds upon it.) I plan
to test myself as well, but since your test tries to model true
latency, I'm more interested in that one.
Not sure if the second one is worth the extra code.
I'd say it's not worth optimizing the case we think won't be taken
anyway. I also like having a simple path to assert against.
Attachments:
v16-0001-Speed-up-last-iteration-of-aligned-fasthash.patchtext/x-patch; charset=US-ASCII; name=v16-0001-Speed-up-last-iteration-of-aligned-fasthash.patchDownload
From be62303df785b80b1bf888b869c5b240a6777af0 Mon Sep 17 00:00:00 2001
From: Ants Aasma <ants@cybertec.at>
Date: Mon, 29 Jan 2024 15:16:02 +0200
Subject: [PATCH v16 1/2] Speed up last iteration of aligned fasthash
We know the length of the string so we can mask out end of the
string with a shift. Without this the aligned version was slower
than unaligned on small strings.
---
src/include/common/hashfn_unstable.h | 31 ++++++++++++++++++++++++----
1 file changed, 27 insertions(+), 4 deletions(-)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index 3d927e1fb1..8ee1b99a20 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -176,6 +176,19 @@ fasthash_accum(fasthash_state *hs, const char *k, int len)
#define haszero64(v) \
(((v) - 0x0101010101010101) & ~(v) & 0x8080808080808080)
+/*
+ * Returns non-zero when first byte in memory order is not NUL
+ */
+static inline int
+first_byte_nonzero(uint64 v)
+{
+#ifdef WORDS_BIGENDIAN
+ return v >> 56;
+#else
+ return v & 0xFF;
+#endif
+}
+
/*
* all-purpose workhorse for fasthash_accum_cstring
*/
@@ -211,11 +224,12 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
const char *const start = str;
int remainder;
uint64 zero_bytes_le;
+ uint64 chunk;
Assert(PointerIsAligned(start, uint64));
for (;;)
{
- uint64 chunk = *(uint64 *) str;
+ chunk = *(uint64 *) str;
/*
* With little-endian representation, we can use this calculation,
@@ -243,9 +257,18 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
* byte within the input word by counting the number of trailing (because
* little-endian) zeros and dividing the result by 8.
*/
- remainder = pg_rightmost_one_pos64(zero_bytes_le) / BITS_PER_BYTE;
- fasthash_accum(hs, str, remainder);
- str += remainder;
+ if (first_byte_nonzero(chunk))
+ {
+ remainder = pg_rightmost_one_pos64(zero_bytes_le) / BITS_PER_BYTE;
+#ifdef WORDS_BIGENDIAN
+ hs->accum = chunk & ((~0ULL) << (64 - BITS_PER_BYTE*remainder));
+#else
+ hs->accum = chunk & ((~0ULL) >> (64 - BITS_PER_BYTE*remainder));
+#endif
+ fasthash_combine(hs);
+
+ str += remainder;
+ }
return str - start;
}
--
2.43.0
v16-0002-Shorten-dependency-chain-for-computing-hash-mask.patchtext/x-patch; charset=US-ASCII; name=v16-0002-Shorten-dependency-chain-for-computing-hash-mask.patchDownload
From a1e1648f3f3a25001c62fffe7dcd422273619e3e Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Tue, 30 Jan 2024 16:14:57 +0700
Subject: [PATCH v16 2/2] Shorten dependency chain for computing hash mask
---
src/include/common/hashfn_unstable.h | 30 ++++++++++++----------------
1 file changed, 13 insertions(+), 17 deletions(-)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index 8ee1b99a20..0cac3aa380 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -176,19 +176,6 @@ fasthash_accum(fasthash_state *hs, const char *k, int len)
#define haszero64(v) \
(((v) - 0x0101010101010101) & ~(v) & 0x8080808080808080)
-/*
- * Returns non-zero when first byte in memory order is not NUL
- */
-static inline int
-first_byte_nonzero(uint64 v)
-{
-#ifdef WORDS_BIGENDIAN
- return v >> 56;
-#else
- return v & 0xFF;
-#endif
-}
-
/*
* all-purpose workhorse for fasthash_accum_cstring
*/
@@ -225,6 +212,7 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
int remainder;
uint64 zero_bytes_le;
uint64 chunk;
+ uint64 mask;
Assert(PointerIsAligned(start, uint64));
for (;;)
@@ -257,14 +245,22 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
* byte within the input word by counting the number of trailing (because
* little-endian) zeros and dividing the result by 8.
*/
- if (first_byte_nonzero(chunk))
+ /*
+ * Create a mask for the remaining bytes and
+ * combine them into the hash. It would be harmless if the mask also covered the NUL
+ * terminator, except for the case where it is the first byte in the last input read.
+ * In that case, we need to return, so we perform a check for that as we form the mask
+ * for the bytes we need.
+ */
+ mask = zero_bytes_le >> BITS_PER_BYTE;
+ if (mask)
{
remainder = pg_rightmost_one_pos64(zero_bytes_le) / BITS_PER_BYTE;
+ mask |= mask - 1;
#ifdef WORDS_BIGENDIAN
- hs->accum = chunk & ((~0ULL) << (64 - BITS_PER_BYTE*remainder));
-#else
- hs->accum = chunk & ((~0ULL) >> (64 - BITS_PER_BYTE*remainder));
+ mask = pg_bswap64(mask);
#endif
+ hs->accum = chunk & mask;
fasthash_combine(hs);
str += remainder;
--
2.43.0
On Tue, 30 Jan 2024 at 12:04, John Naylor <johncnaylorls@gmail.com> wrote:
On Tue, Jan 30, 2024 at 4:13 AM Ants Aasma <ants.aasma@cybertec.at> wrote:
But given that we know the data length and we have it in a register
already, it's easy enough to just mask out data past the end with a
shift. See patch 1. Performance benefit is about 1.5x Measured on a
small test harness that just hashes and finalizes an array of strings,
with a data dependency between consecutive hashes (next address
depends on the previous hash output).Interesting work! I've taken this idea and (I'm guessing, haven't
tested) improved it by re-using an intermediate step for the
conditional, simplifying the creation of the mask, and moving the
bitscan out of the longest dependency chain. Since you didn't attach
the test harness, would you like to run this and see how it fares?
(v16-0001 is same as your 0001, and v16-0002 builds upon it.) I plan
to test myself as well, but since your test tries to model true
latency, I'm more interested in that one.
It didn't calculate the same result because the if (mask) condition
was incorrect. Changed it to if (chunk & 0xFF) and removed the right
shift from the mask. It seems to be half a nanosecond faster, but as I
don't have a machine set up for microbenchmarking it's quite close to
measurement noise.
I didn't post the harness as it's currently so messy to be near
useless to others. But if you'd like to play around, I can tidy it up
a bit and post it.
Not sure if the second one is worth the extra code.
I'd say it's not worth optimizing the case we think won't be taken
anyway. I also like having a simple path to assert against.
Agreed.
As an addendum, I couldn't resist trying out using 256bit vectors with
two parallel AES hashes running, unaligned loads with special casing
page boundary straddling loads. Requires -march=x86-64-v3 -maes. About
20% faster than fasthash on short strings, 2.2x faster on 4k strings.
Right now requires 4 bytes alignment (uses vpmaskmovd), but could be
made to work with any alignment.
Regards,
Ants Aasma
Attachments:
On Tue, Jan 30, 2024 at 7:51 PM Ants Aasma <ants.aasma@cybertec.at> wrote:
It didn't calculate the same result because the if (mask) condition
was incorrect. Changed it to if (chunk & 0xFF) and removed the right
shift from the mask.
Yes, you're quite right.
It seems to be half a nanosecond faster, but as I
don't have a machine set up for microbenchmarking it's quite close to
measurement noise.
With my "throughput-ush" test, they look good:
pgbench -n -T 20 -f bench_cstr_aligned.sql -M prepared | grep latency
master:
latency average = 490.722 ms
(Ants Aantsma) v-17 0001:
latency average = 385.263 ms
v17 0001+0002:
latency average = 339.506 ms
I didn't post the harness as it's currently so messy to be near
useless to others. But if you'd like to play around, I can tidy it up
a bit and post it.
I'd be curious, thanks.
Attachments:
v17-0002-Shorten-dependency-chain-for-computing-hash-mask.patchtext/x-patch; charset=US-ASCII; name=v17-0002-Shorten-dependency-chain-for-computing-hash-mask.patchDownload
From 77d848a83930abe2badd8c0f1ade79c88c27b455 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Tue, 30 Jan 2024 16:14:57 +0700
Subject: [PATCH v17 2/2] Shorten dependency chain for computing hash mask
---
src/include/common/hashfn_unstable.h | 30 ++++++++++++----------------
1 file changed, 13 insertions(+), 17 deletions(-)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index 8ee1b99a20..3a74e6e33b 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -176,19 +176,6 @@ fasthash_accum(fasthash_state *hs, const char *k, int len)
#define haszero64(v) \
(((v) - 0x0101010101010101) & ~(v) & 0x8080808080808080)
-/*
- * Returns non-zero when first byte in memory order is not NUL
- */
-static inline int
-first_byte_nonzero(uint64 v)
-{
-#ifdef WORDS_BIGENDIAN
- return v >> 56;
-#else
- return v & 0xFF;
-#endif
-}
-
/*
* all-purpose workhorse for fasthash_accum_cstring
*/
@@ -225,6 +212,7 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
int remainder;
uint64 zero_bytes_le;
uint64 chunk;
+ uint64 mask;
Assert(PointerIsAligned(start, uint64));
for (;;)
@@ -257,14 +245,22 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
* byte within the input word by counting the number of trailing (because
* little-endian) zeros and dividing the result by 8.
*/
- if (first_byte_nonzero(chunk))
+ /* If the first byte in the input is the NUL terminator, we have nothing to do */
+ if ((zero_bytes_le & 0xFF) == 0)
{
remainder = pg_rightmost_one_pos64(zero_bytes_le) / BITS_PER_BYTE;
+ /*
+ * Create a mask for the remaining bytes and
+ * combine them into the hash. The mask also covers the NUL
+ * terminator, but that's harmless. The mask could contain 0x80
+ * in higher bytes where the input is non-zero, but only if the
+ * input byte is 0x01, so also harmless.
+ */
+ mask = zero_bytes_le | (zero_bytes_le - 1);
#ifdef WORDS_BIGENDIAN
- hs->accum = chunk & ((~0ULL) << (64 - BITS_PER_BYTE*remainder));
-#else
- hs->accum = chunk & ((~0ULL) >> (64 - BITS_PER_BYTE*remainder));
+ mask = pg_bswap64(mask);
#endif
+ hs->accum = chunk & mask;
fasthash_combine(hs);
str += remainder;
--
2.43.0
v17-0001-Speed-up-last-iteration-of-aligned-fasthash.patchtext/x-patch; charset=US-ASCII; name=v17-0001-Speed-up-last-iteration-of-aligned-fasthash.patchDownload
From 5617ee1450316ad4f494c9378ae1c53dbb095b89 Mon Sep 17 00:00:00 2001
From: Ants Aasma <ants@cybertec.at>
Date: Mon, 29 Jan 2024 15:16:02 +0200
Subject: [PATCH v17 1/2] Speed up last iteration of aligned fasthash
We know the length of the string so we can mask out end of the
string with a shift. Without this the aligned version was slower
than unaligned on small strings.
---
src/include/common/hashfn_unstable.h | 31 ++++++++++++++++++++++++----
1 file changed, 27 insertions(+), 4 deletions(-)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index 3d927e1fb1..8ee1b99a20 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -176,6 +176,19 @@ fasthash_accum(fasthash_state *hs, const char *k, int len)
#define haszero64(v) \
(((v) - 0x0101010101010101) & ~(v) & 0x8080808080808080)
+/*
+ * Returns non-zero when first byte in memory order is not NUL
+ */
+static inline int
+first_byte_nonzero(uint64 v)
+{
+#ifdef WORDS_BIGENDIAN
+ return v >> 56;
+#else
+ return v & 0xFF;
+#endif
+}
+
/*
* all-purpose workhorse for fasthash_accum_cstring
*/
@@ -211,11 +224,12 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
const char *const start = str;
int remainder;
uint64 zero_bytes_le;
+ uint64 chunk;
Assert(PointerIsAligned(start, uint64));
for (;;)
{
- uint64 chunk = *(uint64 *) str;
+ chunk = *(uint64 *) str;
/*
* With little-endian representation, we can use this calculation,
@@ -243,9 +257,18 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
* byte within the input word by counting the number of trailing (because
* little-endian) zeros and dividing the result by 8.
*/
- remainder = pg_rightmost_one_pos64(zero_bytes_le) / BITS_PER_BYTE;
- fasthash_accum(hs, str, remainder);
- str += remainder;
+ if (first_byte_nonzero(chunk))
+ {
+ remainder = pg_rightmost_one_pos64(zero_bytes_le) / BITS_PER_BYTE;
+#ifdef WORDS_BIGENDIAN
+ hs->accum = chunk & ((~0ULL) << (64 - BITS_PER_BYTE*remainder));
+#else
+ hs->accum = chunk & ((~0ULL) >> (64 - BITS_PER_BYTE*remainder));
+#endif
+ fasthash_combine(hs);
+
+ str += remainder;
+ }
return str - start;
}
--
2.43.0
I wrote:
It occurred to me that it's strange to have two places that length can
be passed. That was a side effect of the original, which used length
to both know how many bytes to read, and to modify the internal seed.
With the incremental API, it doesn't make sense to pass the length (or
a dummy macro) up front -- with a compile-time fixed length, it can't
possibly break a tie, so it's just noise.
This was a wart, so pushed removing initial length from the incremental API.
On Mon, Jan 22, 2024 at 11:16 AM Jeff Davis <pgsql@j-davis.com> wrote:
On Mon, 2024-01-22 at 09:03 +0700, John Naylor wrote:
v15-0004 is a stab at that. As an idea, it also renames zero_bytes_le
to zero_byte_low to reflect the effect better. There might be some
other comment edits needed to explain usage, so I plan to hold on to
this for later. Let me know what you think.0004 looks good to me. No urgency so feel free to hold it until a
convenient time.
Thanks for looking, I pushed this along with an expanded explanation of usage.
0002 and 0003 use fasthash for dynahash and GUC hash, respectively.
These cannot use the existing cstring hashing directly because of
truncation and case-folding, respectively. (Some simplehash uses can,
but that can come later)
I've re-attached these as well as a cleaned-up version of the tail
optimization. For the CF entry, the GUC hash function in this form
might only be necessary if we went ahead with simple hash. We don't
yet have a new benchmark to show if that's still worthwhile after
867dd2dc87 improved the one upthread.
For dynahash, one tricky part seems to be the comment about the
default and when it was an assertion error. I've tried to reword this,
but maybe needs work. When that's in shape, I'll incorporate removing
other strlen calls.
Attachments:
v18-0001-Use-fasthash-for-dynahash-s-default-string-hash.patchtext/x-patch; charset=US-ASCII; name=v18-0001-Use-fasthash-for-dynahash-s-default-string-hash.patchDownload
From 0acceeb59c44cacb1f8a1a0bdeb7fdef81499155 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sun, 21 Jan 2024 16:04:16 +0700
Subject: [PATCH v18 1/3] Use fasthash for dynahash's default string hash
This avoids strlen calls. string_hash is kept around in case
extensions are using it.
---
src/backend/utils/hash/dynahash.c | 52 +++++++++++++++++++++++++++----
src/common/hashfn.c | 3 +-
2 files changed, 48 insertions(+), 7 deletions(-)
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c
index a4152080b5..92c7989575 100644
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -98,6 +98,7 @@
#include "access/xact.h"
#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "port/pg_bitutils.h"
#include "storage/shmem.h"
#include "storage/spin.h"
@@ -307,6 +308,45 @@ string_compare(const char *key1, const char *key2, Size keysize)
return strncmp(key1, key2, keysize - 1);
}
+/*
+ * default_string_hash: hash function for keys that are NUL-terminated strings.
+ *
+ * NOTE: this is the default hash function if none is specified.
+ */
+static uint32
+default_string_hash(const void *key, Size keysize)
+{
+ const char *k = (const char *) key;
+ Size s_len = 0;
+ fasthash_state hs;
+
+ /*
+ * If the string exceeds keysize-1 bytes, we want to hash only that many,
+ * because when it is copied into the hash table it will be truncated at
+ * that length.
+ */
+
+ fasthash_init(&hs, 0);
+
+ while (*k && s_len < keysize - 1)
+ {
+ int chunk_len = 0;
+
+ while (k[chunk_len] != '\0' &&
+ s_len < keysize - 1 &&
+ chunk_len < FH_SIZEOF_ACCUM)
+ {
+ chunk_len++;
+ s_len++;
+ }
+
+ fasthash_accum(&hs, k, chunk_len);
+ k += chunk_len;
+ }
+
+ return fasthash_final32(&hs, s_len);
+}
+
/************************** CREATE ROUTINES **********************/
@@ -418,8 +458,8 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
else
{
/*
- * string_hash used to be considered the default hash method, and in a
- * non-assert build it effectively still is. But we now consider it
+ * string_hash used to be considered the default hash method, and
+ * it effectively still was until version 17. Since version 14 we consider it
* an assertion error to not say HASH_STRINGS explicitly. To help
* catch mistaken usage of HASH_STRINGS, we also insist on a
* reasonably long string length: if the keysize is only 4 or 8 bytes,
@@ -428,12 +468,12 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Assert(flags & HASH_STRINGS);
Assert(info->keysize > 8);
- hashp->hash = string_hash;
+ hashp->hash = default_string_hash;
}
/*
* If you don't specify a match function, it defaults to string_compare if
- * you used string_hash, and to memcmp otherwise.
+ * you used default_string_hash, and to memcmp otherwise.
*
* Note: explicitly specifying string_hash is deprecated, because this
* might not work for callers in loadable modules on some platforms due to
@@ -442,7 +482,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
*/
if (flags & HASH_COMPARE)
hashp->match = info->match;
- else if (hashp->hash == string_hash)
+ else if (hashp->hash == default_string_hash)
hashp->match = (HashCompareFunc) string_compare;
else
hashp->match = memcmp;
@@ -452,7 +492,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
*/
if (flags & HASH_KEYCOPY)
hashp->keycopy = info->keycopy;
- else if (hashp->hash == string_hash)
+ else if (hashp->hash == default_string_hash)
{
/*
* The signature of keycopy is meant for memcpy(), which returns
diff --git a/src/common/hashfn.c b/src/common/hashfn.c
index 4db468cf85..3090b3cbd9 100644
--- a/src/common/hashfn.c
+++ b/src/common/hashfn.c
@@ -654,7 +654,8 @@ hash_bytes_uint32_extended(uint32 k, uint64 seed)
/*
* string_hash: hash function for keys that are NUL-terminated strings.
*
- * NOTE: this is the default hash function if none is specified.
+ * NOTE: this was the default string hash for dynahash until vesion 17,
+ * and is now here only for backward compatibility.
*/
uint32
string_hash(const void *key, Size keysize)
--
2.43.0
v18-0002-Use-fasthash-for-guc_name_hash.patchtext/x-patch; charset=US-ASCII; name=v18-0002-Use-fasthash-for-guc_name_hash.patchDownload
From aa8d1f727b20f65b0506380c318ee823c0657849 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sun, 21 Jan 2024 17:49:22 +0700
Subject: [PATCH v18 2/3] Use fasthash for guc_name_hash
---
src/backend/utils/misc/guc.c | 36 +++++++++++++++++++++++++++---------
1 file changed, 27 insertions(+), 9 deletions(-)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 8f65ef3d89..67859baf69 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -33,6 +33,7 @@
#include "catalog/objectaccess.h"
#include "catalog/pg_authid.h"
#include "catalog/pg_parameter_acl.h"
+#include "common/hashfn_unstable.h"
#include "guc_internal.h"
#include "libpq/pqformat.h"
#include "parser/scansup.h"
@@ -1324,22 +1325,39 @@ guc_name_compare(const char *namea, const char *nameb)
static uint32
guc_name_hash(const void *key, Size keysize)
{
- uint32 result = 0;
const char *name = *(const char *const *) key;
+ const char *const start = name;
+ fasthash_state hs;
+
+ fasthash_init(&hs, 0);
while (*name)
{
- char ch = *name++;
+ int chunk_len = 0;
- /* Case-fold in the same way as guc_name_compare */
- if (ch >= 'A' && ch <= 'Z')
- ch += 'a' - 'A';
+ while (chunk_len < FH_SIZEOF_ACCUM && name[chunk_len] != '\0')
+ {
+ chunk_len++;
+ hs.accum <<= BITS_PER_BYTE;
+ hs.accum |= name[chunk_len];
+ }
- /* Merge into hash ... not very bright, but it needn't be */
- result = pg_rotate_left32(result, 5);
- result ^= (uint32) ch;
+ /*
+ * Quick ASCII-only downcasing. Note: This effectively pads spaces
+ * when the input is not a multiple of 8. This would be okay even if
+ * space were a valid name character, since the actual length acts as
+ * a tiebreaker for the finalizer.
+ */
+ hs.accum |= 0x2020202020202020;
+
+ /* merge into hash and reset for next iteration */
+ fasthash_combine(&hs);
+ hs.accum = 0;
+
+ name += chunk_len;
}
- return result;
+
+ return fasthash_final32(&hs, name - start);
}
/*
--
2.43.0
v18-0003-Speed-up-tail-processing-when-hashing-aligned-C-.patchtext/x-patch; charset=US-ASCII; name=v18-0003-Speed-up-tail-processing-when-hashing-aligned-C-.patchDownload
From 88a2bdb6b03dd4bc0e1cc1d289485461d025f51b Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Tue, 6 Feb 2024 13:11:33 +0700
Subject: [PATCH v18 3/3] Speed up tail processing when hashing aligned C
strings
After encountering the NUL terminator, the word-at-a-time loop exits
and we must hash the remaining bytes. Previously we calculated the
terminator's position and re-loaded the remaining bytes from the input
string. We already have all the data we need in a register, so lets's
just mask off the bytes we need and hash them immediately. The mask can
be cheaply computed without knowing the terminator's position. We still
need that position for the length calculation, but the CPU can now
do that in parallel with other work, shortening the dependency chain.
Ants Aasma and John Naylor
Discussion: https://postgr.es/m/CANwKhkP7pCiW_5fAswLhs71-JKGEz1c1%2BPC0a_w1fwY4iGMqUA%40mail.gmail.com
---
src/include/common/hashfn_unstable.h | 44 +++++++++++++++++++++-------
1 file changed, 34 insertions(+), 10 deletions(-)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index af80e65fef..308d1982c8 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -219,8 +219,9 @@ static inline int
fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
{
const char *const start = str;
- int remainder;
+ uint64 chunk;
uint64 zero_byte_low;
+ uint64 mask;
Assert(PointerIsAligned(start, uint64));
@@ -239,7 +240,7 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
*/
for (;;)
{
- uint64 chunk = *(uint64 *) str;
+ chunk = *(uint64 *) str;
#ifdef WORDS_BIGENDIAN
zero_byte_low = haszero64(pg_bswap64(chunk));
@@ -254,14 +255,37 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
str += FH_SIZEOF_ACCUM;
}
- /*
- * The byte corresponding to the NUL will be 0x80, so the rightmost bit
- * position will be in the range 7, 15, ..., 63. Turn this into byte
- * position by dividing by 8.
- */
- remainder = pg_rightmost_one_pos64(zero_byte_low) / BITS_PER_BYTE;
- fasthash_accum(hs, str, remainder);
- str += remainder;
+ if (zero_byte_low & 0xFF)
+ {
+ /*
+ * The next byte in the input is the NUL terminator, so we have
+ * nothing to do.
+ */
+ }
+ else
+ {
+ /*
+ * Create a mask for the remaining bytes so we can combine them into
+ * the hash. The mask also covers the NUL terminator, but that's
+ * harmless. The mask could contain 0x80 in bytes corresponding to the
+ * input past the terminator, but only where the input byte is zero or
+ * one, so also harmless.
+ */
+ mask = zero_byte_low | (zero_byte_low - 1);
+#ifdef WORDS_BIGENDIAN
+ /* need to mask the upper bytes */
+ mask = pg_bswap64(mask);
+#endif
+ hs->accum = chunk & mask;
+ fasthash_combine(hs);
+
+ /*
+ * The byte corresponding to the NUL will be 0x80, so the rightmost
+ * bit position will be in the range 15, 23, ..., 63. Turn this into
+ * byte position by dividing by 8.
+ */
+ str += pg_rightmost_one_pos64(zero_byte_low) / BITS_PER_BYTE;
+ }
return str - start;
}
--
2.43.0
On 22.01.24 03:03, John Naylor wrote:
I wrote:
fasthash_init(&hs, sizeof(Datum), kind);
fasthash_accum(&hs, (char *) &value, sizeof(Datum));
return fasthash_final32(&hs, 0);It occurred to me that it's strange to have two places that length can
be passed. That was a side effect of the original, which used length
to both know how many bytes to read, and to modify the internal seed.
With the incremental API, it doesn't make sense to pass the length (or
a dummy macro) up front -- with a compile-time fixed length, it can't
possibly break a tie, so it's just noise.0001 removes the length from initialization in the incremental
interface. The standalone functions use length directly the same as
before, but after initialization. Thoughts?
Unrelated related issue: src/include/common/hashfn_unstable.h currently
causes warnings from cpluspluscheck:
/tmp/cirrus-ci-build/src/include/common/hashfn_unstable.h: In function
‘int fasthash_accum_cstring_unaligned(fasthash_state*, const char*)’:
/tmp/cirrus-ci-build/src/include/common/hashfn_unstable.h:201:20:
warning: comparison of integer expressions of different signedness:
‘int’ and ‘long unsigned int’ [-Wsign-compare]
201 | while (chunk_len < FH_SIZEOF_ACCUM && str[chunk_len] != '\0')
| ^
and a few more like that.
I think it would be better to declare various int variables and
arguments as size_t instead. Even if you don't actually need the larger
range, it would make it more self-documenting.
On Wed, Feb 7, 2024 at 10:41 PM Peter Eisentraut <peter@eisentraut.org> wrote:
/tmp/cirrus-ci-build/src/include/common/hashfn_unstable.h: In function
‘int fasthash_accum_cstring_unaligned(fasthash_state*, const char*)’:
/tmp/cirrus-ci-build/src/include/common/hashfn_unstable.h:201:20:
warning: comparison of integer expressions of different signedness:
‘int’ and ‘long unsigned int’ [-Wsign-compare]
201 | while (chunk_len < FH_SIZEOF_ACCUM && str[chunk_len] != '\0')
| ^and a few more like that.
I think it would be better to declare various int variables and
arguments as size_t instead. Even if you don't actually need the larger
range, it would make it more self-documenting.
Thanks for the report! I can reproduce and have pushed that change.
On Tue, Jan 30, 2024 at 5:04 PM John Naylor <johncnaylorls@gmail.com> wrote:
On Tue, Jan 30, 2024 at 4:13 AM Ants Aasma <ants.aasma@cybertec.at> wrote:
But given that we know the data length and we have it in a register
already, it's easy enough to just mask out data past the end with a
shift. See patch 1. Performance benefit is about 1.5x Measured on a
small test harness that just hashes and finalizes an array of strings,
with a data dependency between consecutive hashes (next address
depends on the previous hash output).Interesting work! I've taken this idea and (I'm guessing, haven't
tested) improved it by re-using an intermediate step for the
conditional, simplifying the creation of the mask, and moving the
bitscan out of the longest dependency chain.
This needed a rebase, and is now 0001. I plan to push this soon.
I also went and looked at the simplehash instances and found a few
that would be easy to switch over. Rather than try to figure out which
could benefit from shaving cycles, I changed all the string hashes,
and one more, in 0002 so they can act as examples.
0003 uses fasthash for resowner, as suggested by Heikki upthread. Now
murmur64 has no callers, but it (or similar *) could be used in
pg_dump/common.c for hashing CatalogId (8 bytes).
Commit 42a1de3013 added a new use for string_hash, but I can't tell
from a quick glance whether it uses the truncation, so I'm going to
take a closer look before re-attaching the proposed dynahash change
again.
* some examples here:
https://www.boost.org/doc/libs/1_84_0/boost/container_hash/detail/hash_mix.hpp
Attachments:
v19-0001-Speed-up-tail-processing-when-hashing-aligned-C-.patchtext/x-patch; charset=US-ASCII; name=v19-0001-Speed-up-tail-processing-when-hashing-aligned-C-.patchDownload
From 63d8140f146b58ea044f3516ae5472febd6d1caf Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Tue, 6 Feb 2024 13:11:33 +0700
Subject: [PATCH v19 1/3] Speed up tail processing when hashing aligned C
strings
After encountering the NUL terminator, the word-at-a-time loop exits
and we must hash the remaining bytes. Previously we calculated the
terminator's position and re-loaded the remaining bytes from the input
string. We already have all the data we need in a register, so let's
just mask off the bytes we need and hash them immediately. The mask can
be cheaply computed without knowing the terminator's position. We still
need that position for the length calculation, but the CPU can now
do that in parallel with other work, shortening the dependency chain.
Ants Aasma and John Naylor
Discussion: https://postgr.es/m/CANwKhkP7pCiW_5fAswLhs71-JKGEz1c1%2BPC0a_w1fwY4iGMqUA%40mail.gmail.com
---
src/include/common/hashfn_unstable.h | 44 +++++++++++++++++++++-------
1 file changed, 34 insertions(+), 10 deletions(-)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index 791750d136..bd7323fe05 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -219,8 +219,9 @@ static inline size_t
fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
{
const char *const start = str;
- size_t remainder;
+ uint64 chunk;
uint64 zero_byte_low;
+ uint64 mask;
Assert(PointerIsAligned(start, uint64));
@@ -239,7 +240,7 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
*/
for (;;)
{
- uint64 chunk = *(uint64 *) str;
+ chunk = *(uint64 *) str;
#ifdef WORDS_BIGENDIAN
zero_byte_low = haszero64(pg_bswap64(chunk));
@@ -254,14 +255,37 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
str += FH_SIZEOF_ACCUM;
}
- /*
- * The byte corresponding to the NUL will be 0x80, so the rightmost bit
- * position will be in the range 7, 15, ..., 63. Turn this into byte
- * position by dividing by 8.
- */
- remainder = pg_rightmost_one_pos64(zero_byte_low) / BITS_PER_BYTE;
- fasthash_accum(hs, str, remainder);
- str += remainder;
+ if (zero_byte_low & 0xFF)
+ {
+ /*
+ * The next byte in the input is the NUL terminator, so we have
+ * nothing to do.
+ */
+ }
+ else
+ {
+ /*
+ * Create a mask for the remaining bytes so we can combine them into
+ * the hash. The mask also covers the NUL terminator, but that's
+ * harmless. The mask could contain 0x80 in bytes corresponding to the
+ * input past the terminator, but only where the input byte is zero or
+ * one, so also harmless.
+ */
+ mask = zero_byte_low | (zero_byte_low - 1);
+#ifdef WORDS_BIGENDIAN
+ /* need to mask the upper bytes */
+ mask = pg_bswap64(mask);
+#endif
+ hs->accum = chunk & mask;
+ fasthash_combine(hs);
+
+ /*
+ * The byte corresponding to the NUL will be 0x80, so the rightmost
+ * bit position will be in the range 15, 23, ..., 63. Turn this into
+ * byte position by dividing by 8.
+ */
+ str += pg_rightmost_one_pos64(zero_byte_low) / BITS_PER_BYTE;
+ }
return str - start;
}
--
2.43.0
v19-0003-Use-fasthash-for-hash_resource_elem.patchtext/x-patch; charset=US-ASCII; name=v19-0003-Use-fasthash-for-hash_resource_elem.patchDownload
From 5dad8c783bc5d0d5f573ea136b13e79aa22d0371 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Tue, 5 Mar 2024 17:22:16 +0700
Subject: [PATCH v19 3/3] Use fasthash for hash_resource_elem
---
src/backend/utils/resowner/resowner.c | 11 +++--------
1 file changed, 3 insertions(+), 8 deletions(-)
diff --git a/src/backend/utils/resowner/resowner.c b/src/backend/utils/resowner/resowner.c
index ab9343bc5c..ea47c6cade 100644
--- a/src/backend/utils/resowner/resowner.c
+++ b/src/backend/utils/resowner/resowner.c
@@ -45,7 +45,7 @@
*/
#include "postgres.h"
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "common/int.h"
#include "storage/ipc.h"
#include "storage/predicate.h"
@@ -220,14 +220,9 @@ hash_resource_elem(Datum value, const ResourceOwnerDesc *kind)
* the hash too, otherwise those resources will collide a lot. But
* because there are only a few resource kinds like that - and only a few
* resource kinds to begin with - we don't need to work too hard to mix
- * 'kind' into the hash. Just add it with hash_combine(), it perturbs the
- * result enough for our purposes.
+ * 'kind' into the hash. Just use it as the seed for fasthash.
*/
-#if SIZEOF_DATUM == 8
- return hash_combine64(murmurhash64((uint64) value), (uint64) kind);
-#else
- return hash_combine(murmurhash32((uint32) value), (uint32) kind);
-#endif
+ return fasthash32((char *) &value, sizeof(value), (uint64) kind);
}
/*
--
2.43.0
v19-0002-Convert-simplehash-hash-functions-on-strings-to-.patchtext/x-patch; charset=US-ASCII; name=v19-0002-Convert-simplehash-hash-functions-on-strings-to-.patchDownload
From 3f87dc06e40dab708a70fe1a96cf9d909b6652cb Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Tue, 5 Mar 2024 16:59:39 +0700
Subject: [PATCH v19 2/3] Convert simplehash hash functions on strings to
fasthash
---
src/bin/pg_combinebackup/load_manifest.c | 12 +++++++++---
src/bin/pg_dump/pg_dumpall.c | 12 +++++++++---
src/bin/pg_rewind/filemap.c | 12 +++++++++---
src/bin/pg_verifybackup/pg_verifybackup.c | 12 +++++++++---
src/common/blkreftable.c | 4 ++--
5 files changed, 38 insertions(+), 14 deletions(-)
diff --git a/src/bin/pg_combinebackup/load_manifest.c b/src/bin/pg_combinebackup/load_manifest.c
index 2b8e74fcf3..514e657ddc 100644
--- a/src/bin/pg_combinebackup/load_manifest.c
+++ b/src/bin/pg_combinebackup/load_manifest.c
@@ -15,7 +15,7 @@
#include <sys/stat.h>
#include <unistd.h>
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "common/logging.h"
#include "common/parse_manifest.h"
#include "load_manifest.h"
@@ -239,7 +239,13 @@ combinebackup_per_wal_range_cb(JsonManifestParseContext *context,
static uint32
hash_string_pointer(char *s)
{
- unsigned char *ss = (unsigned char *) s;
+ fasthash_state hs;
+ size_t s_len;
- return hash_bytes(ss, strlen(s));
+ fasthash_init(&hs, 0);
+
+ /* Hash string and save the length for tweaking the final mix. */
+ s_len = fasthash_accum_cstring(&hs, s);
+
+ return fasthash_final32(&hs, s_len);
}
diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c
index 491311fe79..00ad33d803 100644
--- a/src/bin/pg_dump/pg_dumpall.c
+++ b/src/bin/pg_dump/pg_dumpall.c
@@ -21,7 +21,7 @@
#include "catalog/pg_authid_d.h"
#include "common/connect.h"
#include "common/file_utils.h"
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "common/logging.h"
#include "common/string.h"
#include "dumputils.h"
@@ -1941,9 +1941,15 @@ dumpTimestamp(const char *msg)
static uint32
hash_string_pointer(char *s)
{
- unsigned char *ss = (unsigned char *) s;
+ fasthash_state hs;
+ size_t s_len;
- return hash_bytes(ss, strlen(s));
+ fasthash_init(&hs, 0);
+
+ /* Hash string and save the length for tweaking the final mix. */
+ s_len = fasthash_accum_cstring(&hs, s);
+
+ return fasthash_final32(&hs, s_len);
}
/*
diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c
index 255ddf2ffa..fbc8df27ba 100644
--- a/src/bin/pg_rewind/filemap.c
+++ b/src/bin/pg_rewind/filemap.c
@@ -28,7 +28,7 @@
#include "catalog/pg_tablespace_d.h"
#include "common/file_utils.h"
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "common/string.h"
#include "datapagemap.h"
#include "filemap.h"
@@ -829,7 +829,13 @@ decide_file_actions(void)
static uint32
hash_string_pointer(const char *s)
{
- unsigned char *ss = (unsigned char *) s;
+ fasthash_state hs;
+ size_t s_len;
- return hash_bytes(ss, strlen(s));
+ fasthash_init(&hs, 0);
+
+ /* Hash string and save the length for tweaking the final mix. */
+ s_len = fasthash_accum_cstring(&hs, s);
+
+ return fasthash_final32(&hs, s_len);
}
diff --git a/src/bin/pg_verifybackup/pg_verifybackup.c b/src/bin/pg_verifybackup/pg_verifybackup.c
index ae8c18f373..40c48a2e54 100644
--- a/src/bin/pg_verifybackup/pg_verifybackup.c
+++ b/src/bin/pg_verifybackup/pg_verifybackup.c
@@ -18,7 +18,7 @@
#include <sys/stat.h>
#include <time.h>
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "common/logging.h"
#include "common/parse_manifest.h"
#include "fe_utils/simple_list.h"
@@ -923,9 +923,15 @@ should_ignore_relpath(verifier_context *context, char *relpath)
static uint32
hash_string_pointer(char *s)
{
- unsigned char *ss = (unsigned char *) s;
+ fasthash_state hs;
+ size_t s_len;
- return hash_bytes(ss, strlen(s));
+ fasthash_init(&hs, 0);
+
+ /* Hash string and save the length for tweaking the final mix. */
+ s_len = fasthash_accum_cstring(&hs, s);
+
+ return fasthash_final32(&hs, s_len);
}
/*
diff --git a/src/common/blkreftable.c b/src/common/blkreftable.c
index bfa6f7ab5d..980f44c9df 100644
--- a/src/common/blkreftable.c
+++ b/src/common/blkreftable.c
@@ -37,7 +37,7 @@
#endif
#include "common/blkreftable.h"
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "port/pg_crc32c.h"
/*
@@ -124,7 +124,7 @@ struct BlockRefTableEntry
#define SH_KEY_TYPE BlockRefTableKey
#define SH_KEY key
#define SH_HASH_KEY(tb, key) \
- hash_bytes((const unsigned char *) &key, sizeof(BlockRefTableKey))
+ fasthash32((const char *) &key, sizeof(BlockRefTableKey), 0)
#define SH_EQUAL(tb, a, b) (memcmp(&a, &b, sizeof(BlockRefTableKey)) == 0)
#define SH_SCOPE static inline
#ifdef FRONTEND
--
2.43.0
On Tue, Mar 5, 2024 at 5:30 PM John Naylor <johncnaylorls@gmail.com> wrote:
On Tue, Jan 30, 2024 at 5:04 PM John Naylor <johncnaylorls@gmail.com> wrote:
On Tue, Jan 30, 2024 at 4:13 AM Ants Aasma <ants.aasma@cybertec.at> wrote:
But given that we know the data length and we have it in a register
already, it's easy enough to just mask out data past the end with a
shift. See patch 1. Performance benefit is about 1.5x Measured on a
small test harness that just hashes and finalizes an array of strings,
with a data dependency between consecutive hashes (next address
depends on the previous hash output).Interesting work! I've taken this idea and (I'm guessing, haven't
tested) improved it by re-using an intermediate step for the
conditional, simplifying the creation of the mask, and moving the
bitscan out of the longest dependency chain.This needed a rebase, and is now 0001. I plan to push this soon.
I held off on this because CI was failing, but it wasn't because of this.
I also went and looked at the simplehash instances and found a few
that would be easy to switch over. Rather than try to figure out which
could benefit from shaving cycles, I changed all the string hashes,
and one more, in 0002 so they can act as examples.
This was the culprit. The search path cache didn't trigger this when
it went in, but it seems for frontend a read past the end of malloc
fails -fsantize=address. By the same token, I'm guessing the only
reason this didn't fail for backend is because almost all strings
you'd want to use as a hash key won't use a malloc'd external block.
I found that adding __attribute__((no_sanitize_address)) to
fasthash_accum_cstring_aligned() passes CI. While this kind of
exception is warned against (for good reason), I think it's fine here
given that glibc and NetBSD, and probably others, do something similar
for optimized strlen(). Before I write the proper macro for that, are
there any objections? Better ideas?
Commit 42a1de3013 added a new use for string_hash, but I can't tell
from a quick glance whether it uses the truncation, so I'm going to
take a closer look before re-attaching the proposed dynahash change
again.
After looking, I think the thing to do here is create a
hashfn_unstable.c file for global functions:
- hash_string() to replace all those duplicate definitions of
hash_string_pointer() in all the frontend code
- hash_string_with_limit() for dynahash and dshash.
On Wed, 2024-03-20 at 14:26 +0700, John Naylor wrote:
This was the culprit. The search path cache didn't trigger this when
it went in, but it seems for frontend a read past the end of malloc
fails -fsantize=address. By the same token, I'm guessing the only
reason this didn't fail for backend is because almost all strings
you'd want to use as a hash key won't use a malloc'd external block.I found that adding __attribute__((no_sanitize_address)) to
fasthash_accum_cstring_aligned() passes CI. While this kind of
exception is warned against (for good reason), I think it's fine here
given that glibc and NetBSD, and probably others, do something
similar
for optimized strlen(). Before I write the proper macro for that, are
there any objections? Better ideas?
It appears that the spelling no_sanitize_address is deprecated in
clang[1]https://clang.llvm.org/docs/AddressSanitizer.html#disabling-instrumentation-with-attribute-no-sanitize-address in favor of 'no_sanitize("address")'. It doesn't appear to be
deprecated in gcc[2]https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html.
Aside from that, +1.
Regards,
Jeff Davis
[1]: https://clang.llvm.org/docs/AddressSanitizer.html#disabling-instrumentation-with-attribute-no-sanitize-address
https://clang.llvm.org/docs/AddressSanitizer.html#disabling-instrumentation-with-attribute-no-sanitize-address
[2]: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html
On Wed, Mar 20, 2024 at 11:01 PM Jeff Davis <pgsql@j-davis.com> wrote:
I found that adding __attribute__((no_sanitize_address)) to
fasthash_accum_cstring_aligned() passes CI. While this kind of
exception is warned against (for good reason), I think it's fine here
given that glibc and NetBSD, and probably others, do something
similar
for optimized strlen(). Before I write the proper macro for that, are
there any objections? Better ideas?It appears that the spelling no_sanitize_address is deprecated in
clang[1] in favor of 'no_sanitize("address")'. It doesn't appear to be
deprecated in gcc[2].
Thanks for the pointers! In v20-0001, I've drafted checking thes
spelling first, since pg_attribute_no_sanitize_alignment has a similar
version check. Then it checks for no_sanitize_address using
__has_attribute, which goes back to gcc 5. That's plenty for the
buildfarm and CI, and I'm not sure it's worth expending additional
effort to cover more cases. (A similar attribute exists for MSVC in
case it comes up.)
v21-0003 adds a new file hashfn_unstable.c for convenience functions
and converts all the duplicate frontend uses of hash_string_pointer.
This will be where a similar hash_string_with_len will live for
dynash/dshash, which I tested some time ago. I haven't decided whether
to merge that earlier work here or keep it in a separate patch, but
regardless of how 0003 ends up I'd like to push 0001/0002 shortly.
Attachments:
v20-0002-Speed-up-tail-processing-when-hashing-aligned-C-.patchtext/x-patch; charset=US-ASCII; name=v20-0002-Speed-up-tail-processing-when-hashing-aligned-C-.patchDownload
From 690061ff4a54e7baef213bb16e7cc4c4f4c79dbd Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Tue, 6 Feb 2024 13:11:33 +0700
Subject: [PATCH v20 2/3] Speed up tail processing when hashing aligned C
strings
After encountering the NUL terminator, the word-at-a-time loop exits
and we must hash the remaining bytes. Previously we calculated the
terminator's position and re-loaded the remaining bytes from the input
string. We already have all the data we need in a register, so let's
just mask off the bytes we need and hash them immediately. The mask can
be cheaply computed without knowing the terminator's position. We still
need that position for the length calculation, but the CPU can now
do that in parallel with other work, shortening the dependency chain.
Ants Aasma and John Naylor
Discussion: https://postgr.es/m/CANwKhkP7pCiW_5fAswLhs71-JKGEz1c1%2BPC0a_w1fwY4iGMqUA%40mail.gmail.com
---
src/include/common/hashfn_unstable.h | 44 +++++++++++++++++++++-------
1 file changed, 34 insertions(+), 10 deletions(-)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index 4227afd141..8998475ccf 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -222,8 +222,9 @@ static inline size_t
fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
{
const char *const start = str;
- size_t remainder;
+ uint64 chunk;
uint64 zero_byte_low;
+ uint64 mask;
Assert(PointerIsAligned(start, uint64));
@@ -242,7 +243,7 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
*/
for (;;)
{
- uint64 chunk = *(uint64 *) str;
+ chunk = *(uint64 *) str;
#ifdef WORDS_BIGENDIAN
zero_byte_low = haszero64(pg_bswap64(chunk));
@@ -257,14 +258,37 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
str += FH_SIZEOF_ACCUM;
}
- /*
- * The byte corresponding to the NUL will be 0x80, so the rightmost bit
- * position will be in the range 7, 15, ..., 63. Turn this into byte
- * position by dividing by 8.
- */
- remainder = pg_rightmost_one_pos64(zero_byte_low) / BITS_PER_BYTE;
- fasthash_accum(hs, str, remainder);
- str += remainder;
+ if (zero_byte_low & 0xFF)
+ {
+ /*
+ * The next byte in the input is the NUL terminator, so we have
+ * nothing to do.
+ */
+ }
+ else
+ {
+ /*
+ * Create a mask for the remaining bytes so we can combine them into
+ * the hash. The mask also covers the NUL terminator, but that's
+ * harmless. The mask could contain 0x80 in bytes corresponding to the
+ * input past the terminator, but only where the input byte is zero or
+ * one, so also harmless.
+ */
+ mask = zero_byte_low | (zero_byte_low - 1);
+#ifdef WORDS_BIGENDIAN
+ /* need to mask the upper bytes */
+ mask = pg_bswap64(mask);
+#endif
+ hs->accum = chunk & mask;
+ fasthash_combine(hs);
+
+ /*
+ * The byte corresponding to the NUL will be 0x80, so the rightmost
+ * bit position will be in the range 15, 23, ..., 63. Turn this into
+ * byte position by dividing by 8.
+ */
+ str += pg_rightmost_one_pos64(zero_byte_low) / BITS_PER_BYTE;
+ }
return str - start;
}
--
2.44.0
v20-0003-Convert-some-frontend-hash-functions-to-fasthash.patchtext/x-patch; charset=US-ASCII; name=v20-0003-Convert-some-frontend-hash-functions-to-fasthash.patchDownload
From 97c2996c4e081c5612c06da6c80ae87a1d273090 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Tue, 5 Mar 2024 16:59:39 +0700
Subject: [PATCH v20 3/3] Convert some frontend hash functions to fasthash
Also go one step further and remove duplication of
function definitions by creating a new function,
hash_string(), in a new file hashfn_unstable.c.
It's not clear how many of these are perfomance-
sensitive enough to benefit from removing strlen()
calls, but the simplification is worth it on its
own.
WIP: hash_string_with_len() for dynahash/dshash
---
src/bin/pg_combinebackup/load_manifest.c | 16 ++---------
src/bin/pg_dump/pg_dumpall.c | 17 ++----------
src/bin/pg_rewind/filemap.c | 17 ++----------
src/bin/pg_verifybackup/pg_verifybackup.c | 16 ++---------
src/common/Makefile | 1 +
src/common/blkreftable.c | 4 +--
src/common/hashfn_unstable.c | 34 +++++++++++++++++++++++
src/common/meson.build | 1 +
src/include/common/hashfn_unstable.h | 3 ++
9 files changed, 49 insertions(+), 60 deletions(-)
create mode 100644 src/common/hashfn_unstable.c
diff --git a/src/bin/pg_combinebackup/load_manifest.c b/src/bin/pg_combinebackup/load_manifest.c
index 7bc10fbe10..d9e7f9e1b4 100644
--- a/src/bin/pg_combinebackup/load_manifest.c
+++ b/src/bin/pg_combinebackup/load_manifest.c
@@ -15,7 +15,7 @@
#include <sys/stat.h>
#include <unistd.h>
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "common/logging.h"
#include "common/parse_manifest.h"
#include "load_manifest.h"
@@ -38,12 +38,11 @@
* Define a hash table which we can use to store information about the files
* mentioned in the backup manifest.
*/
-static uint32 hash_string_pointer(char *s);
#define SH_PREFIX manifest_files
#define SH_ELEMENT_TYPE manifest_file
#define SH_KEY_TYPE char *
#define SH_KEY pathname
-#define SH_HASH_KEY(tb, key) hash_string_pointer(key)
+#define SH_HASH_KEY(tb, key) hash_string(key)
#define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0)
#define SH_SCOPE extern
#define SH_RAW_ALLOCATOR pg_malloc0
@@ -263,14 +262,3 @@ combinebackup_per_wal_range_cb(JsonManifestParseContext *context,
manifest->last_wal_range->next = range;
manifest->last_wal_range = range;
}
-
-/*
- * Helper function for manifest_files hash table.
- */
-static uint32
-hash_string_pointer(char *s)
-{
- unsigned char *ss = (unsigned char *) s;
-
- return hash_bytes(ss, strlen(s));
-}
diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c
index 046c0dc3b3..73337f3392 100644
--- a/src/bin/pg_dump/pg_dumpall.c
+++ b/src/bin/pg_dump/pg_dumpall.c
@@ -21,7 +21,7 @@
#include "catalog/pg_authid_d.h"
#include "common/connect.h"
#include "common/file_utils.h"
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "common/logging.h"
#include "common/string.h"
#include "dumputils.h"
@@ -33,8 +33,6 @@
/* version string we expect back from pg_dump */
#define PGDUMP_VERSIONSTR "pg_dump (PostgreSQL) " PG_VERSION "\n"
-static uint32 hash_string_pointer(char *s);
-
typedef struct
{
uint32 status;
@@ -46,7 +44,7 @@ typedef struct
#define SH_ELEMENT_TYPE RoleNameEntry
#define SH_KEY_TYPE char *
#define SH_KEY rolename
-#define SH_HASH_KEY(tb, key) hash_string_pointer(key)
+#define SH_HASH_KEY(tb, key) hash_string(key)
#define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0)
#define SH_STORE_HASH
#define SH_GET_HASH(tb, a) (a)->hashval
@@ -1938,17 +1936,6 @@ dumpTimestamp(const char *msg)
fprintf(OPF, "-- %s %s\n\n", msg, buf);
}
-/*
- * Helper function for rolename_hash hash table.
- */
-static uint32
-hash_string_pointer(char *s)
-{
- unsigned char *ss = (unsigned char *) s;
-
- return hash_bytes(ss, strlen(s));
-}
-
/*
* read_dumpall_filters - retrieve database identifier patterns from file
*
diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c
index 255ddf2ffa..4458324c9d 100644
--- a/src/bin/pg_rewind/filemap.c
+++ b/src/bin/pg_rewind/filemap.c
@@ -28,7 +28,7 @@
#include "catalog/pg_tablespace_d.h"
#include "common/file_utils.h"
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "common/string.h"
#include "datapagemap.h"
#include "filemap.h"
@@ -38,12 +38,11 @@
* Define a hash table which we can use to store information about the files
* appearing in source and target systems.
*/
-static uint32 hash_string_pointer(const char *s);
#define SH_PREFIX filehash
#define SH_ELEMENT_TYPE file_entry_t
#define SH_KEY_TYPE const char *
#define SH_KEY path
-#define SH_HASH_KEY(tb, key) hash_string_pointer(key)
+#define SH_HASH_KEY(tb, key) hash_string(key)
#define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0)
#define SH_SCOPE static inline
#define SH_RAW_ALLOCATOR pg_malloc0
@@ -821,15 +820,3 @@ decide_file_actions(void)
return filemap;
}
-
-
-/*
- * Helper function for filemap hash table.
- */
-static uint32
-hash_string_pointer(const char *s)
-{
- unsigned char *ss = (unsigned char *) s;
-
- return hash_bytes(ss, strlen(s));
-}
diff --git a/src/bin/pg_verifybackup/pg_verifybackup.c b/src/bin/pg_verifybackup/pg_verifybackup.c
index 0e9b59f2a8..229c642633 100644
--- a/src/bin/pg_verifybackup/pg_verifybackup.c
+++ b/src/bin/pg_verifybackup/pg_verifybackup.c
@@ -19,7 +19,7 @@
#include <time.h>
#include "common/controldata_utils.h"
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "common/logging.h"
#include "common/parse_manifest.h"
#include "fe_utils/simple_list.h"
@@ -68,12 +68,11 @@ typedef struct manifest_file
* Define a hash table which we can use to store information about the files
* mentioned in the backup manifest.
*/
-static uint32 hash_string_pointer(char *s);
#define SH_PREFIX manifest_files
#define SH_ELEMENT_TYPE manifest_file
#define SH_KEY_TYPE char *
#define SH_KEY pathname
-#define SH_HASH_KEY(tb, key) hash_string_pointer(key)
+#define SH_HASH_KEY(tb, key) hash_string(key)
#define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0)
#define SH_SCOPE static inline
#define SH_RAW_ALLOCATOR pg_malloc0
@@ -986,17 +985,6 @@ should_ignore_relpath(verifier_context *context, char *relpath)
return false;
}
-/*
- * Helper function for manifest_files hash table.
- */
-static uint32
-hash_string_pointer(char *s)
-{
- unsigned char *ss = (unsigned char *) s;
-
- return hash_bytes(ss, strlen(s));
-}
-
/*
* Print a progress report based on the global variables.
*
diff --git a/src/common/Makefile b/src/common/Makefile
index 3d83299432..43a53e6d9d 100644
--- a/src/common/Makefile
+++ b/src/common/Makefile
@@ -59,6 +59,7 @@ OBJS_COMMON = \
file_perm.o \
file_utils.o \
hashfn.o \
+ hashfn_unstable.o \
ip.o \
jsonapi.o \
keywords.o \
diff --git a/src/common/blkreftable.c b/src/common/blkreftable.c
index bfa6f7ab5d..980f44c9df 100644
--- a/src/common/blkreftable.c
+++ b/src/common/blkreftable.c
@@ -37,7 +37,7 @@
#endif
#include "common/blkreftable.h"
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "port/pg_crc32c.h"
/*
@@ -124,7 +124,7 @@ struct BlockRefTableEntry
#define SH_KEY_TYPE BlockRefTableKey
#define SH_KEY key
#define SH_HASH_KEY(tb, key) \
- hash_bytes((const unsigned char *) &key, sizeof(BlockRefTableKey))
+ fasthash32((const char *) &key, sizeof(BlockRefTableKey), 0)
#define SH_EQUAL(tb, a, b) (memcmp(&a, &b, sizeof(BlockRefTableKey)) == 0)
#define SH_SCOPE static inline
#ifdef FRONTEND
diff --git a/src/common/hashfn_unstable.c b/src/common/hashfn_unstable.c
new file mode 100644
index 0000000000..8a2fbd0c3e
--- /dev/null
+++ b/src/common/hashfn_unstable.c
@@ -0,0 +1,34 @@
+/*-------------------------------------------------------------------------
+ *
+ * hashfn_unstable.c
+ * Convenience hashing functions based on hashfn_unstable.h.
+ * As described in that header, they must not be used in indexes
+ * or other on-disk structures.
+ *
+ *
+ * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/common/hashfn_unstable.c
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+
+#include "common/hashfn_unstable.h"
+
+
+uint32
+hash_string(const char *s)
+{
+ fasthash_state hs;
+ size_t s_len;
+
+ fasthash_init(&hs, 0);
+
+ /* Hash string and save the length for tweaking the final mix. */
+ s_len = fasthash_accum_cstring(&hs, s);
+
+ return fasthash_final32(&hs, s_len);
+}
diff --git a/src/common/meson.build b/src/common/meson.build
index de68e408fa..f5bf755b89 100644
--- a/src/common/meson.build
+++ b/src/common/meson.build
@@ -13,6 +13,7 @@ common_sources = files(
'file_perm.c',
'file_utils.c',
'hashfn.c',
+ 'hashfn_unstable.c',
'ip.c',
'jsonapi.c',
'keywords.c',
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index 8998475ccf..3b1e6bf2b5 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -394,4 +394,7 @@ fasthash32(const char *k, size_t len, uint64 seed)
return fasthash_reduce32(fasthash64(k, len, seed));
}
+
+extern uint32 hash_string(const char *s);
+
#endif /* HASHFN_UNSTABLE_H */
--
2.44.0
v20-0001-Add-macro-to-disable-address-safety-instrumentat.patchtext/x-patch; charset=US-ASCII; name=v20-0001-Add-macro-to-disable-address-safety-instrumentat.patchDownload
From 29fbfedd806030a14d817132d29d6755e32daec7 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Fri, 22 Mar 2024 13:01:33 +0700
Subject: [PATCH v20 1/3] Add macro to disable address safety instrumentation
fasthash_accum_cstring_aligned() uses a technique, found in various
strlen() implementations, to detect a string's NUL terminator by
reading a word at at time. That triggers failures when testing with
"-fsanitize=address", at least with frontend code. To enable using
this function anywhere, add a function attribute macro to disable
such testing.
Reviewed by
Discussion: https://postgr.es/m/CANWCAZbwvp7oUEkbw-xP4L0_S_WNKq-J-ucP4RCNDPJnrakUPw%40mail.gmail.com
---
src/include/c.h | 13 +++++++++++++
src/include/common/hashfn_unstable.h | 5 ++++-
2 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/src/include/c.h b/src/include/c.h
index cf37e02fe1..dc1841346c 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -135,6 +135,19 @@
#define pg_nodiscard
#endif
+/*
+ * This macro will disable address safety instrumentation for a function
+ * when running with "-fsanitize=address". Think twice before using this!
+ */
+#if defined(__clang__) || __GNUC__ >= 8
+#define pg_attribute_no_sanitize_address() __attribute__((no_sanitize("address")))
+#elif __has_attribute(no_sanitize_address)
+/* This would work for clang, but it's deprecated. */
+#define pg_attribute_no_sanitize_address() __attribute__((no_sanitize_address))
+#else
+#define pg_attribute_no_sanitize_address()
+#endif
+
/*
* Place this macro before functions that should be allowed to make misaligned
* accesses. Think twice before using it on non-x86-specific code!
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index 791750d136..4227afd141 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -213,8 +213,11 @@ fasthash_accum_cstring_unaligned(fasthash_state *hs, const char *str)
*
* With an aligned pointer, we consume the string a word at a time.
* Loading the word containing the NUL terminator cannot segfault since
- * allocation boundaries are suitably aligned.
+ * allocation boundaries are suitably aligned. To keep from setting
+ * off alarms with address sanitizers, exclude this function from
+ * such testing.
*/
+pg_attribute_no_sanitize_address()
static inline size_t
fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
{
--
2.44.0
On Wed, 2024-03-27 at 13:44 +0700, John Naylor wrote:
Thanks for the pointers! In v20-0001, I've drafted checking thes
spelling first, since pg_attribute_no_sanitize_alignment has a
similar
version check. Then it checks for no_sanitize_address using
__has_attribute, which goes back to gcc 5. That's plenty for the
buildfarm and CI, and I'm not sure it's worth expending additional
effort to cover more cases. (A similar attribute exists for MSVC in
case it comes up.)
0001 looks good to me, thank you.
v21-0003 adds a new file hashfn_unstable.c for convenience functions
and converts all the duplicate frontend uses of hash_string_pointer.
Why not make hash_string() inline, too? I'm fine with it either way,
I'm just curious why you went to the trouble to create a new .c file so
it didn't have to be inlined.
Regards,
Jeff Davis
On Thu, Mar 28, 2024 at 12:37 PM Jeff Davis <pgsql@j-davis.com> wrote:
v21-0003 adds a new file hashfn_unstable.c for convenience functions
and converts all the duplicate frontend uses of hash_string_pointer.Why not make hash_string() inline, too? I'm fine with it either way,
I'm just curious why you went to the trouble to create a new .c file so
it didn't have to be inlined.
Yeah, it's a bit strange looking in isolation, and I'm not sure I'll
go that route. When I was thinking of this, I also had dynahash and
dshash in mind, which do indirect calls, even if the function is
defined in the same file. That would still work with an inline
definition in the header, just duplicated in the different translation
units. Maybe that's not worth worrying about, since I imagine use
cases with indirect calls will remain rare.
On Tue, Mar 5, 2024 at 5:30 PM John Naylor <johncnaylorls@gmail.com> wrote:
On Tue, Jan 30, 2024 at 5:04 PM John Naylor <johncnaylorls@gmail.com> wrote:
On Tue, Jan 30, 2024 at 4:13 AM Ants Aasma <ants.aasma@cybertec.at> wrote:
But given that we know the data length and we have it in a register
already, it's easy enough to just mask out data past the end with a
shift. See patch 1. Performance benefit is about 1.5x Measured on a
small test harness that just hashes and finalizes an array of strings,
with a data dependency between consecutive hashes (next address
depends on the previous hash output).Interesting work! I've taken this idea and (I'm guessing, haven't
tested) improved it by re-using an intermediate step for the
conditional, simplifying the creation of the mask, and moving the
bitscan out of the longest dependency chain.This needed a rebase, and is now 0001. I plan to push this soon.
I pushed but had to revert -- my version (and I believe both) failed
to keep the invariant that the aligned and unaligned must result in
the same hash. It's clear to me how to fix, but I've injured my strong
hand and won't be typing much in for a cuople days. I'll prioritize
the removal of strlen calls for v17, since the optimization can wait
and there is also a valgrind issue I haven't looked into.
On Tue, Jan 30, 2024 at 4:13 AM Ants Aasma <ants.aasma@cybertec.at> wrote:
But given that we know the data length and we have it in a register
already, it's easy enough to just mask out data past the end with a
shift. See patch 1. Performance benefit is about 1.5x Measured on a
small test harness that just hashes and finalizes an array of strings,
with a data dependency between consecutive hashes (next address
depends on the previous hash output).
I pushed this with a couple cosmetic adjustments, after fixing the
endianness issue. I'm not sure why valgrind is fine with this way, and
the other ways I tried forming the (little-endian) mask raised errors.
In addition to "zero_byte_low | (zero_byte_low - 1)", I tried
"~zero_byte_low & (zero_byte_low - 1)" and "zero_byte_low ^
(zero_byte_low - 1)" to no avail.
On Thu, Mar 28, 2024 at 12:37 PM Jeff Davis <pgsql@j-davis.com> wrote:
0001 looks good to me, thank you.
v21-0003 adds a new file hashfn_unstable.c for convenience functions
and converts all the duplicate frontend uses of hash_string_pointer.Why not make hash_string() inline, too? I'm fine with it either way,
I'm just curious why you went to the trouble to create a new .c file so
it didn't have to be inlined.
Thanks for looking! I pushed these, with hash_string() inlined.
I've attached (not reindented for clarity) an update of something
mentioned a few times already -- removing strlen calls for dynahash
and dshash string keys. I'm not quite sure how the comments should be
updated about string_hash being deprecated to call directly. This
patch goes further and semi-deprecates calling it at all, so these
comments seem a bit awkward now.
Attachments:
v21-0001-Use-fasthash-for-string-keys-in-dynahash-and-dsh.patchtext/x-patch; charset=US-ASCII; name=v21-0001-Use-fasthash-for-string-keys-in-dynahash-and-dsh.patchDownload
From 2e41e683b2fe2bc76b808e58ca2fea9067bff4e1 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Fri, 5 Apr 2024 13:59:13 +0700
Subject: [PATCH v21] Use fasthash for string keys in dynahash and dshash
This avoids strlen calls. string_hash is kept around in case
extensions are using it.
---
src/backend/lib/dshash.c | 5 +++--
src/backend/utils/hash/dynahash.c | 25 ++++++++++++++++-----
src/common/hashfn.c | 4 +++-
src/include/common/hashfn_unstable.h | 33 ++++++++++++++++++++++++++++
src/include/lib/dshash.h | 2 +-
5 files changed, 59 insertions(+), 10 deletions(-)
diff --git a/src/backend/lib/dshash.c b/src/backend/lib/dshash.c
index 93a9e21ddd..8bebf92cb8 100644
--- a/src/backend/lib/dshash.c
+++ b/src/backend/lib/dshash.c
@@ -32,6 +32,7 @@
#include "postgres.h"
#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "lib/dshash.h"
#include "storage/lwlock.h"
#include "utils/dsa.h"
@@ -605,14 +606,14 @@ dshash_strcmp(const void *a, const void *b, size_t size, void *arg)
}
/*
- * A hash function that forwards to string_hash.
+ * A hash function that forwards to hash_string_with_len.
*/
dshash_hash
dshash_strhash(const void *v, size_t size, void *arg)
{
Assert(strlen((const char *) v) < size);
- return string_hash((const char *) v, size);
+ return hash_string_with_len((const char *) v, size - 1);
}
/*
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c
index 145e058fe6..9b85af2743 100644
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -98,6 +98,7 @@
#include "access/xact.h"
#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
#include "port/pg_bitutils.h"
#include "storage/shmem.h"
#include "storage/spin.h"
@@ -309,6 +310,18 @@ string_compare(const char *key1, const char *key2, Size keysize)
return strncmp(key1, key2, keysize - 1);
}
+/*
+ * hash function used when HASH_STRINGS is set
+ *
+ * If the string exceeds keysize-1 bytes, we want to hash only that many,
+ * because when it is copied into the hash table it will be truncated at
+ * that length.
+ */
+static uint32
+default_string_hash(const void *key, Size keysize)
+{
+ return hash_string_with_len((const char *) key, keysize - 1);
+}
/************************** CREATE ROUTINES **********************/
@@ -420,8 +433,8 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
else
{
/*
- * string_hash used to be considered the default hash method, and in a
- * non-assert build it effectively still is. But we now consider it
+ * string_hash used to be considered the default hash method, and
+ * it effectively still was until version 17. Since version 14 we consider it
* an assertion error to not say HASH_STRINGS explicitly. To help
* catch mistaken usage of HASH_STRINGS, we also insist on a
* reasonably long string length: if the keysize is only 4 or 8 bytes,
@@ -430,12 +443,12 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Assert(flags & HASH_STRINGS);
Assert(info->keysize > 8);
- hashp->hash = string_hash;
+ hashp->hash = default_string_hash;
}
/*
* If you don't specify a match function, it defaults to string_compare if
- * you used string_hash, and to memcmp otherwise.
+ * you used default_string_hash, and to memcmp otherwise.
*
* Note: explicitly specifying string_hash is deprecated, because this
* might not work for callers in loadable modules on some platforms due to
@@ -444,7 +457,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
*/
if (flags & HASH_COMPARE)
hashp->match = info->match;
- else if (hashp->hash == string_hash)
+ else if (hashp->hash == default_string_hash)
hashp->match = (HashCompareFunc) string_compare;
else
hashp->match = memcmp;
@@ -454,7 +467,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
*/
if (flags & HASH_KEYCOPY)
hashp->keycopy = info->keycopy;
- else if (hashp->hash == string_hash)
+ else if (hashp->hash == default_string_hash)
{
/*
* The signature of keycopy is meant for memcpy(), which returns
diff --git a/src/common/hashfn.c b/src/common/hashfn.c
index 4db468cf85..5cfa4815a1 100644
--- a/src/common/hashfn.c
+++ b/src/common/hashfn.c
@@ -654,7 +654,9 @@ hash_bytes_uint32_extended(uint32 k, uint64 seed)
/*
* string_hash: hash function for keys that are NUL-terminated strings.
*
- * NOTE: this is the default hash function if none is specified.
+ * NOTE: this was the default string hash for dynahash until version 17,
+ * and is now here only for backward compatibility. It's more efficient
+ * to use hash_string_with_len instead.
*/
uint32
string_hash(const void *key, Size keysize)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index 7b647470ab..38abf690a0 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -450,4 +450,37 @@ hash_string(const char *s)
return fasthash_final32(&hs, s_len);
}
+/*
+ * hash_string_with_len: Like hash_string but only hashes
+ * 'len' bytes. Callers will likely want to 'len' to be
+ * one less than the space they have in mind, to leave
+ * room for the NUL terminator.
+ */
+static inline uint32
+hash_string_with_len(const char *s, Size len)
+{
+ fasthash_state hs;
+ size_t s_len = 0;
+
+ fasthash_init(&hs, 0);
+
+ while (*s && s_len < len)
+ {
+ int chunk_len = 0;
+
+ while (s[chunk_len] != '\0' &&
+ s_len < len &&
+ chunk_len < FH_SIZEOF_ACCUM)
+ {
+ chunk_len++;
+ s_len++;
+ }
+
+ fasthash_accum(&hs, s, chunk_len);
+ s += chunk_len;
+ }
+
+ return fasthash_final32(&hs, s_len);
+}
+
#endif /* HASHFN_UNSTABLE_H */
diff --git a/src/include/lib/dshash.h b/src/include/lib/dshash.h
index 7dda269b75..b72903c02e 100644
--- a/src/include/lib/dshash.h
+++ b/src/include/lib/dshash.h
@@ -117,7 +117,7 @@ extern dshash_hash dshash_memhash(const void *v, size_t size, void *arg);
extern void dshash_memcpy(void *dest, const void *src, size_t size, void *arg);
/*
- * Convenience hash, compare, and copy functions wrapping strcmp, string_hash,
+ * Convenience hash, compare, and copy functions wrapping strcmp, hash_string_with_len,
* and strcpy.
*/
extern int dshash_strcmp(const void *a, const void *b, size_t size, void *arg);
--
2.44.0
Hi!
Found that https://github.com/postgres/postgres/commit/0aba2554409ee3251d7558567edd114d8ed36dcc
produces a valgrind error in initdb.
Such a steps:
CPPFLAGS="-DUSE_VALGRIND -Og" ./configure --enable-debug --enable-tap-tests --enable-cassert --with-icu
make ...
valgrind --quiet --exit-on-first-error=yes --error-exitcode=1 --leak-check=no --time-stamp=yes \
--gen-suppressions=all --trace-children=yes <path-to>/initdb -k -D <path-to>/data
give an error:
running bootstrap script ... ok
performing post-bootstrap initialization ... ==00:00:00:01.856 967784== Conditional jump or move depends on uninitialised value(s)
==00:00:00:01.856 967784== at 0x2F41F4: fasthash_accum (hashfn_unstable.h:136)
==00:00:00:01.856 967784== by 0x2F41F4: fasthash_accum_cstring_aligned (hashfn_unstable.h:247)
==00:00:00:01.856 967784== by 0x2F41F4: fasthash_accum_cstring (hashfn_unstable.h:271)
==00:00:00:01.856 967784== by 0x2F41F4: spcachekey_hash (namespace.c:268)
==00:00:00:01.856 967784== by 0x2F479F: nsphash_lookup (simplehash.h:836)
==00:00:00:01.856 967784== by 0x2F479F: spcache_insert (namespace.c:379)
==00:00:00:01.856 967784== by 0x2F533C: cachedNamespacePath (namespace.c:4236)
==00:00:00:01.856 967784== by 0x2F5425: recomputeNamespacePath (namespace.c:4294)
==00:00:00:01.856 967784== by 0x2F5516: RelnameGetRelid (namespace.c:875)
==00:00:00:01.856 967784== by 0x2F6CD5: RangeVarGetRelidExtended (namespace.c:524)
==00:00:00:01.856 967784== by 0x2DD1C7: objectNamesToOids (aclchk.c:701)
==00:00:00:01.856 967784== by 0x2E2A9D: ExecuteGrantStmt (aclchk.c:441)
==00:00:00:01.856 967784== by 0x61FF62: ProcessUtilitySlow (utility.c:1816)
==00:00:00:01.856 967784== by 0x61E948: standard_ProcessUtility (utility.c:973)
==00:00:00:01.856 967784== by 0x61EC1A: ProcessUtility (utility.c:530)
==00:00:00:01.856 967784== by 0x61C059: PortalRunUtility (pquery.c:1158)
==00:00:00:01.856 967784==
{
<insert_a_suppression_name_here>
Memcheck:Cond
fun:fasthash_accum
fun:fasthash_accum_cstring_aligned
fun:fasthash_accum_cstring
fun:spcachekey_hash
fun:nsphash_lookup
fun:spcache_insert
fun:cachedNamespacePath
fun:recomputeNamespacePath
fun:RelnameGetRelid
fun:RangeVarGetRelidExtended
fun:objectNamesToOids
fun:ExecuteGrantStmt
fun:ProcessUtilitySlow
fun:standard_ProcessUtility
fun:ProcessUtility
fun:PortalRunUtility
}
==00:00:00:01.856 967784==
==00:00:00:01.856 967784== Exit program on first error (--exit-on-first-error=yes)
child process exited with exit code 1
The current master at b7493e1 also has this error.
With the best regards,
--
Anton A. Melnikov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
On Thu, Dec 19, 2024 at 7:10 AM Anton A. Melnikov
<a.melnikov@postgrespro.ru> wrote:
Found that https://github.com/postgres/postgres/commit/0aba2554409ee3251d7558567edd114d8ed36dcc
produces a valgrind error in initdb.
What architecture and valgrind version is this? We've been bitten
before by different results on Arm vs x86.
The offending code is not even my preferred way to handle the last
word of the string (see f4ad0021af), so if the current way is still
not valgrind-clean, I wonder if we should give up and add an
exception, since we know any garbage bits are masked off.
--
John Naylor
Amazon Web Services
I wrote:
The offending code is not even my preferred way to handle the last
word of the string (see f4ad0021af), so if the current way is still
not valgrind-clean, I wonder if we should give up and add an
exception, since we know any garbage bits are masked off.
That would actually be a maintenance headache because the function is
inlined, but here's a better idea: We already have a fallback path for
when the string is not suitably aligned, or in 32-bit builds. We could
just use that under Valgrind:
static inline size_t
fasthash_accum_cstring(fasthash_state *hs, const char *str)
{
-#if SIZEOF_VOID_P >= 8
+#if SIZEOF_VOID_P >= 8 && !defined(USE_VALGRIND)
Any objections?
--
John Naylor
Amazon Web Services
Hi!
On 19.12.2024 06:21, John Naylor wrote:
On Thu, Dec 19, 2024 at 7:10 AM Anton A. Melnikov
<a.melnikov@postgrespro.ru> wrote:Found that https://github.com/postgres/postgres/commit/0aba2554409ee3251d7558567edd114d8ed36dcc
produces a valgrind error in initdb.What architecture and valgrind version is this? We've been bitten
before by different results on Arm vs x86.
It was x86 AMD Laptop: HP Probook 455 g7 with AMD Ryzen 7 4700U and 64GB DDR4 RAM.
OS: Linux 5.15.0-130-generic #140~20.04.1-Ubuntu SMP Wed Dec 18 21:35:34 UTC 2024 x86_64 x86_64 x86_64 GNU/Linux.
~$ valgrind --version
valgrind-3.15.0
With the best regards,
--
Anton A. Melnikov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
Hi, John!
On 19.12.2024 12:48, John Naylor wrote:
That would actually be a maintenance headache because the function is
inlined, but here's a better idea: We already have a fallback path for
when the string is not suitably aligned, or in 32-bit builds. We could
just use that under Valgrind:static inline size_t fasthash_accum_cstring(fasthash_state *hs, const char *str) { -#if SIZEOF_VOID_P >= 8 +#if SIZEOF_VOID_P >= 8 && !defined(USE_VALGRIND)Any objections?
This variant doesn't produce error and helped me to move
further beyond initdb in the some tests under valgrind
and fix a number of bugs.
Thank you very much!
Seems it is possible to exclude much less code from checking
under valgrind and get the same result by replacing the only
function call pg_rightmost_one_pos64() with a valgrind-safe
code. See the attached patch, please.
The pg_rightmost_one_pos64() itself can also be valgrind-safe
in some cases when the last version of its code works.
But i'm not sure if it's worth writing extra preprocessor instructions
to make this small piece of code also checkable under valgrind.
So in the patch i made a simple variant without it.
With the best wishes,
--
Anton A. Melnikov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
Attachments:
v1-0001-Add-valgrind-safe-code-to-find-rightmost-bytes.patchtext/x-patch; charset=UTF-8; name=v1-0001-Add-valgrind-safe-code-to-find-rightmost-bytes.patchDownload
From 7af05703e59fb8f1e51ec1d667ddc150c40756f0 Mon Sep 17 00:00:00 2001
From: "Anton A. Melnikov" <a.melnikov@postgrespro.ru>
Date: Thu, 16 Jan 2025 02:56:52 +0300
Subject: [PATCH] Add valgrind-safe code to find a number of rightmost zero
bytes.
---
src/include/common/hashfn_unstable.h | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index a09c0b7d5ce..7674c39b1ca 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -302,13 +302,22 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
size_t remainder;
uint64 mask;
+#if !defined(USE_VALGRIND)
/*
* The byte corresponding to the NUL will be 0x80, so the rightmost
* bit position will be in the range 15, 23, ..., 63. Turn this into
* byte position by dividing by 8.
*/
remainder = pg_rightmost_one_pos64(zero_byte_low) / BITS_PER_BYTE;
-
+#else
+ /* valgrind-safe variant */
+ remainder = 0;
+ while ((zero_byte_low & 0xFF) == 0)
+ {
+ zero_byte_low >>= 8;
+ ++remainder;
+ }
+#endif
/*
* Create a mask for the remaining bytes so we can combine them into
* the hash. This must have the same result as mixing the remaining
--
2.48.1
"Anton A. Melnikov" <a.melnikov@postgrespro.ru> writes:
Seems it is possible to exclude much less code from checking
under valgrind and get the same result by replacing the only
function call pg_rightmost_one_pos64() with a valgrind-safe
code. See the attached patch, please.
There is no place anywhere in our code base where we hide unsafe
code from valgrind rather than fixing said code. This does not
seem like a place to start such an ugly practice. Performance
does not trump everything else.
I'd be inclined to just remove the pg_rightmost_one_pos64 call
in favor of the other coding you suggest.
regards, tom lane
Hi!
On 16.01.2025 04:36, Tom Lane wrote:
"Anton A. Melnikov" <a.melnikov@postgrespro.ru> writes:
Seems it is possible to exclude much less code from checking
under valgrind and get the same result by replacing the only
function call pg_rightmost_one_pos64() with a valgrind-safe
code. See the attached patch, please.There is no place anywhere in our code base where we hide unsafe
code from valgrind rather than fixing said code. This does not
seem like a place to start such an ugly practice. Performance
does not trump everything else.
Thanks for remark. Agreed.
I'd be inclined to just remove the pg_rightmost_one_pos64 call
in favor of the other coding you suggest.
Here is a patch like that.
With the best wishes,
--
Anton A. Melnikov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
Attachments:
v2-0001-Add-valgrind-safe-code-to-find-rightmost-bytes.patchtext/x-patch; charset=UTF-8; name=v2-0001-Add-valgrind-safe-code-to-find-rightmost-bytes.patchDownload
From bac26e0c7ee900246d454dd98de45b3ecefc15ec Mon Sep 17 00:00:00 2001
From: "Anton A. Melnikov" <a.melnikov@postgrespro.ru>
Date: Thu, 16 Jan 2025 02:56:52 +0300
Subject: [PATCH] Use valgrind-safe code to find a number of rightmost zero
bytes.
---
src/include/common/hashfn_unstable.h | 16 +++++++++++-----
1 file changed, 11 insertions(+), 5 deletions(-)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index a09c0b7d5ce..fa9b954c6b9 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -299,15 +299,21 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
if (firstbyte64(chunk) != 0)
{
- size_t remainder;
+ size_t remainder = 0;
uint64 mask;
/*
- * The byte corresponding to the NUL will be 0x80, so the rightmost
- * bit position will be in the range 15, 23, ..., 63. Turn this into
- * byte position by dividing by 8.
+ * The byte corresponding to the NUL terminator will be the rightmost 0x80.
+ * All zero bytes to the right of it correspond to the tail of the string.
+ * It remains to count them.
*/
- remainder = pg_rightmost_one_pos64(zero_byte_low) / BITS_PER_BYTE;
+ while ((zero_byte_low & 0xFF) == 0)
+ {
+ zero_byte_low >>= 8;
+ ++remainder;
+ }
+
+ Assert(remainder != 0);
/*
* Create a mask for the remaining bytes so we can combine them into
--
2.48.1
On Wed, Jan 15, 2025 at 7:08 PM Anton A. Melnikov
<a.melnikov@postgrespro.ru> wrote:
It was x86 AMD Laptop: HP Probook 455 g7 with AMD Ryzen 7 4700U and 64GB DDR4 RAM.
OS: Linux 5.15.0-130-generic #140~20.04.1-Ubuntu SMP Wed Dec 18 21:35:34 UTC 2024 x86_64 x86_64 x86_64 GNU/Linux.~$ valgrind --version
valgrind-3.15.0
Thanks for getting back to us! 3.24 on x86_64 doesn't raise a warning.
If anyone believes the difference from a release six years ago
represents a regression in diagnostic ability, perhaps that warrants a
bug report to Valgrind?
I'd be inclined to just remove the pg_rightmost_one_pos64 call
in favor of the other coding you suggest.Here is a patch like that.
It would be a lot more readable to revert the offending commit
instead, since its predecessor had a much simpler bytewise loop.
--
John Naylor
Amazon Web Services
On Fri, Jan 17, 2025 at 4:50 PM John Naylor <johncnaylorls@gmail.com> wrote:
It would be a lot more readable to revert the offending commit
instead, since its predecessor had a much simpler bytewise loop.
This will require a backpatch to v17. I'll take care of that soon.
--
John Naylor
Amazon Web Services
Hi!
On 22.01.2025 11:37, John Naylor wrote:
On Fri, Jan 17, 2025 at 4:50 PM John Naylor <johncnaylorls@gmail.com> wrote:
It would be a lot more readable to revert the offending commit
instead, since its predecessor had a much simpler bytewise loop.
Agreed that reverting seems as a preferable way, and here's why.
I found that this valgrind error during initdb first appeared
after 0aba25544. At the previous e97b672c88 where there is no error
i did a small experiment on my laptop.
With -O2 compilation from src/backend/catalog/namespace.с:369
that really executes inlined spcachekey_hash()
to src/backend/catalog/namespace.с:369
123 asm instructions are executed when hashing the string "pg_catalog".
In the master at 630f9a43 the spcachekey_hash() is not inlined
and asm call <spcachekey_hash> executes 204 asm inctructions
at the same conditions. HAVE__BUILTIN_CTZ is defined on my pc
so finding the first non-zero rightmost bit requires
the only asm command.
With patch v2-0001-Add-valgrind-safe-code the same will take 216
asm instructions.
Of cause, if the common average length of a hashed string is known,
can be performed experiments that better correspond to reality.
But, it seems to me, there shouldn't be any considerably
large strings here, so the general trend is clear.
Please correct me if I'm wrong.
With the best regards,
--
Anton A. Melnikov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
On Thu, Jan 23, 2025 at 8:52 AM Anton A. Melnikov
<a.melnikov@postgrespro.ru> wrote:
Hi!
On 22.01.2025 11:37, John Naylor wrote:
On Fri, Jan 17, 2025 at 4:50 PM John Naylor <johncnaylorls@gmail.com> wrote:
It would be a lot more readable to revert the offending commit
instead, since its predecessor had a much simpler bytewise loop.Agreed that reverting seems as a preferable way, and here's why.
This is done -- thanks for the report, and for testing.
--
John Naylor
Amazon Web Services
Hi!
On 29.01.2025 10:02, John Naylor wrote:
This is done -- thanks for the report, and for testing.
It's good that this is done! But i still see the problem.
At ecb8226a in master with the same configure as in [1]/messages/by-id/a3a959f6-14b8-4819-ac04-eaf2aa2e868d@postgrespro.ru
(with asserts) valgrind gives:
==00:00:00:23.937 285792== Conditional jump or move depends on uninitialised value(s)
==00:00:00:23.937 285792== at 0x3084A3: fasthash_accum (hashfn_unstable.h:142)
==00:00:00:23.937 285792== by 0x3084A3: fasthash_accum_cstring_aligned (hashfn_unstable.h:299)
==00:00:00:23.937 285792== by 0x3084A3: fasthash_accum_cstring (hashfn_unstable.h:323)
==00:00:00:23.937 285792== by 0x3084A3: spcachekey_hash (namespace.c:268)
==00:00:00:23.937 285792== by 0x308A3C: nsphash_lookup (simplehash.h:836)
==00:00:00:23.937 285792== by 0x308A3C: spcache_insert (namespace.c:394)
==00:00:00:23.937 285792== by 0x3095D9: cachedNamespacePath (namespace.c:4251)
==00:00:00:23.937 285792== by 0x3096C2: recomputeNamespacePath (namespace.c:4309)
==00:00:00:23.937 285792== by 0x3097B3: RelnameGetRelid (namespace.c:890)
==00:00:00:23.937 285792== by 0x30AF72: RangeVarGetRelidExtended (namespace.c:539)
==00:00:00:23.937 285792== by 0x2F0738: objectNamesToOids (aclchk.c:701)
==00:00:00:23.937 285792== by 0x2F5EFF: ExecuteGrantStmt (aclchk.c:425)
==00:00:00:23.937 285792== by 0x65E2E7: ProcessUtilitySlow (utility.c:1812)
==00:00:00:23.937 285792== by 0x65CCB2: standard_ProcessUtility (utility.c:969)
==00:00:00:23.937 285792== by 0x65CF84: ProcessUtility (utility.c:523)
==00:00:00:23.937 285792== by 0x65A426: PortalRunUtility (pquery.c:1152)
Please see backtrace at bt-with-asserts-Og.txt attached.
Without asserts it falls in similar way:
==00:00:00:23.391 271086== Conditional jump or move depends on uninitialised value(s)
==00:00:00:23.391 271086== at 0x2BE8FE: fasthash_accum (hashfn_unstable.h:180)
==00:00:00:23.391 271086== by 0x2BE8FE: fasthash_accum_cstring_aligned (hashfn_unstable.h:299)
==00:00:00:23.391 271086== by 0x2BE8FE: fasthash_accum_cstring (hashfn_unstable.h:323)
==00:00:00:23.391 271086== by 0x2BE8FE: spcachekey_hash (namespace.c:268)
See bt-wo-asserts-Og.txt
In addition the -O0 build with asserts gives an error in the
pg_rightmost_one_pos64(), not in the fasthash_accum():
==00:00:00:16.360 100422== at 0x3424D6: pg_rightmost_one_pos64 (pg_bitutils.h:148)
==00:00:00:16.360 100422== by 0x342909: fasthash_accum_cstring_aligned (hashfn_unstable.h:298)
==00:00:00:16.360 100422== by 0x3429AB: fasthash_accum_cstring (hashfn_unstable.h:323)
==00:00:00:16.360 100422== by 0x342AFC: spcachekey_hash (namespace.c:268)
==00:00:00:16.360 100422== by 0x3437C2: nsphash_lookup (simplehash.h:836)
See bt-with-asserts-O0.txt, please. It is clear as "word" contains 5 undefined bytes.
(Maybe compiler swallowed this line in -Og build.)
The previous two cases not so clear because valgrind decided that
the "remainder" in the fasthash_accum_cstring_aligned() was undefined.
It thinks that if the argument of __builtin_ctzl() contains some
undefined bytes then the result will be undefined although all rightmost
bits and the first non-zero bit are located in the defined bytes.
On the other hand the presence of the line "Assert(word != 0);"
in the pg_rightmost_one_pos64() already appears to be a
valid reason to use a valgrind-safe solution that will
not allow any undefined bits in its argument.
Besides i found that in the case described above it takes 214
asm instructions to perform "call 0xXXX <spcachekey_hash>" for
existing implementation in the master while the patch from [2]/messages/by-id/0647027b-9c9a-4f16-8f7c-3f9f3eb9451e@postgrespro.ru
was a bit faster - 211 instructions.
So i rebased it on the current master and kindly ask to take
it into account as well.
With the best regards,
--
Anton A. Melnikov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
[1]: /messages/by-id/a3a959f6-14b8-4819-ac04-eaf2aa2e868d@postgrespro.ru
[2]: /messages/by-id/0647027b-9c9a-4f16-8f7c-3f9f3eb9451e@postgrespro.ru
Attachments:
v3-0001-Add-valgrind-safe-code-to-find-rightmost-bytes.patchtext/x-patch; charset=UTF-8; name=v3-0001-Add-valgrind-safe-code-to-find-rightmost-bytes.patchDownload
From a04e825e776ebb5684a94a310b1dc2c54e515d0c Mon Sep 17 00:00:00 2001
From: "Anton A. Melnikov" <a.melnikov@postgrespro.ru>
Date: Wed, 12 Feb 2025 20:22:45 +0300
Subject: [PATCH] Use valgrind-safe code to find a number of rightmost zero
bytes.
---
src/include/common/hashfn_unstable.h | 17 ++++++++++++-----
1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index e07c0226c1f..82158f0d9c3 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -255,7 +255,7 @@ static inline size_t
fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
{
const char *const start = str;
- size_t remainder;
+ size_t remainder = 0;
uint64 zero_byte_low;
Assert(PointerIsAligned(start, uint64));
@@ -291,11 +291,18 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
}
/*
- * The byte corresponding to the NUL will be 0x80, so the rightmost bit
- * position will be in the range 7, 15, ..., 63. Turn this into byte
- * position by dividing by 8.
+ * The byte corresponding to the NUL terminator will be the rightmost 0x80.
+ * pAll zero bytes to the right of it correspond to the tail of the string.
+ * It remains to count them.
*/
- remainder = pg_rightmost_one_pos64(zero_byte_low) / BITS_PER_BYTE;
+ while ((zero_byte_low & 0xFF) == 0)
+ {
+ zero_byte_low >>= 8;
+ ++remainder;
+ }
+
+ Assert(remainder != 0);
+
fasthash_accum(hs, str, remainder);
str += remainder;
--
2.48.1
On Thu, Feb 13, 2025 at 3:42 AM Anton A. Melnikov
<a.melnikov@postgrespro.ru> wrote:
Hi!
On 29.01.2025 10:02, John Naylor wrote:
This is done -- thanks for the report, and for testing.
It's good that this is done! But i still see the problem.
Hi, my understanding was you previously tested with the revert. Did
you not actually test, or are you building differently for these
cases?
--
John Naylor
Amazon Web Services
Hi, John!
On 13.02.2025 04:49, John Naylor wrote:
On Thu, Feb 13, 2025 at 3:42 AM Anton A. Melnikov
<a.melnikov@postgrespro.ru> wrote:
On 29.01.2025 10:02, John Naylor wrote:
This is done -- thanks for the report, and for testing.
It's good that this is done! But i still see the problem.
Hi, my understanding was you previously tested with the revert. Did
you not actually test, or are you building differently for these
cases?
My first test [1]/messages/by-id/a3a959f6-14b8-4819-ac04-eaf2aa2e868d@postgrespro.ru was made at b7493e1
while the second [2]/messages/by-id/f3aa2d45-3b28-41c5-9499-a1bc30e0f8ec@postgrespro.ru at ecb8226a after reverting in the 235328ee.
The build process was the same in both cases.
Both [1]/messages/by-id/a3a959f6-14b8-4819-ac04-eaf2aa2e868d@postgrespro.ru and [2]/messages/by-id/f3aa2d45-3b28-41c5-9499-a1bc30e0f8ec@postgrespro.ru contain pg_rightmost_one_pos64() call
that lead to a valgrind error.
Also i did this test very far back at e97b672c88 [3]/messages/by-id/4c739718-27d6-44fe-9113-56a251c13275@postgrespro.ru
and found no errors.
With the best regards,
--
Anton A. Melnikov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
[1]: /messages/by-id/a3a959f6-14b8-4819-ac04-eaf2aa2e868d@postgrespro.ru
[2]: /messages/by-id/f3aa2d45-3b28-41c5-9499-a1bc30e0f8ec@postgrespro.ru
[3]: /messages/by-id/4c739718-27d6-44fe-9113-56a251c13275@postgrespro.ru
On Thu, Feb 13, 2025 at 9:58 AM Anton A. Melnikov
<a.melnikov@postgrespro.ru> wrote:
Hi, my understanding was you previously tested with the revert. Did
you not actually test, or are you building differently for these
cases?My first test [1] was made at b7493e1
while the second [2] at ecb8226a after reverting in the 235328ee.
Three weeks ago, you said "Agreed that reverting seems as a preferable
way, and here's why." I assumed that meant you tested it, so my
mistake. I'll take a look.
--
John Naylor
Amazon Web Services
Hi!
On 13.02.2025 09:03, John Naylor wrote:
Three weeks ago, you said "Agreed that reverting seems as a preferable
way, and here's why." I assumed that meant you tested it, so my
mistake. I'll take a look.
Sorry! I was wrong not to express my thoughts clearly here.
I meant revert in a common sense. Maybe before the optimization started
at all, because there was the fastest result on short lines.
Although it is unknown how it will be in reality and it could be
worse than optimization with some fixes.
With the best regards,
--
Anton A. Melnikov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
Hi Anton, could you please test if the attached passes for you? This
seems the simplest way.
--
John Naylor
Amazon Web Services
Attachments:
valgrind-fix-take-2.patchapplication/x-patch; name=valgrind-fix-take-2.patchDownload
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index e07c0226c1..bb09f87abe 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -290,13 +290,7 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
str += FH_SIZEOF_ACCUM;
}
- /*
- * The byte corresponding to the NUL will be 0x80, so the rightmost bit
- * position will be in the range 7, 15, ..., 63. Turn this into byte
- * position by dividing by 8.
- */
- remainder = pg_rightmost_one_pos64(zero_byte_low) / BITS_PER_BYTE;
- fasthash_accum(hs, str, remainder);
+ remainder = fasthash_accum_cstring_unaligned(hs, str);
str += remainder;
return str - start;
Hi, Jhon!
On 14.02.2025 08:17, John Naylor wrote:
Hi Anton, could you please test if the attached passes for you? This
seems the simplest way.
Yes, of course. I tested this patch on the current master at 9e17ac997
in the same way and found no valgrind errors.
Moreover, with -O2 build on my PC the nsphash_lookup() has
become fully inlined with this patch and almost two time
faster - 112 asm instructions under the same conditions.
Thanks!
With the best wishes,
--
Anton A. Melnikov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
On Fri, Feb 14, 2025 at 6:40 PM Anton A. Melnikov
<a.melnikov@postgrespro.ru> wrote:
Yes, of course. I tested this patch on the current master at 9e17ac997
in the same way and found no valgrind errors.
Thanks, I'll push next week after the next minor release.
PS: I now realize the source of the confusion: In the time after your
initial report, I misremembered what the bad commit was. Sorry about
that!
--
John Naylor
Amazon Web Services
On Sat, Feb 15, 2025 at 12:28 PM John Naylor <johncnaylorls@gmail.com> wrote:
On Fri, Feb 14, 2025 at 6:40 PM Anton A. Melnikov
<a.melnikov@postgrespro.ru> wrote:Yes, of course. I tested this patch on the current master at 9e17ac997
in the same way and found no valgrind errors.Thanks, I'll push next week after the next minor release.
Committed.
--
John Naylor
Amazon Web Services
On 25.02.2025 06:55, John Naylor wrote:
On Sat, Feb 15, 2025 at 12:28 PM John Naylor <johncnaylorls@gmail.com> wrote:
On Fri, Feb 14, 2025 at 6:40 PM Anton A. Melnikov
<a.melnikov@postgrespro.ru> wrote:Yes, of course. I tested this patch on the current master at 9e17ac997
in the same way and found no valgrind errors.Thanks, I'll push next week after the next minor release.
Committed.
Thanks!
With the best wishes,
--
Anton A. Melnikov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company