From 7fbab106f422a9da7658c9a1580d2b4e7229731a Mon Sep 17 00:00:00 2001 From: John Naylor Date: Mon, 2 Feb 2026 19:51:46 +0700 Subject: [PATCH v7 5/5] Bypass function call on x86 This allows a single bitmap word to be handled inline without a lookup table. Also, make 32-bit inline function similar to 64-bit, looks cleaner this way and is not performance critical. --- src/backend/nodes/bitmapset.c | 4 ++++ src/include/port/pg_bitutils.h | 36 +++++++++++++--------------------- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c index 23c91fdb6c9..c274da98798 100644 --- a/src/backend/nodes/bitmapset.c +++ b/src/backend/nodes/bitmapset.c @@ -748,6 +748,10 @@ bms_num_members(const Bitmapset *a) if (a == NULL) return 0; + /* fastpath for the common case */ + if (a->nwords == 1) + return bmw_popcount(a->words[0]); + return pg_popcount((const char *) a->words, a->nwords * sizeof(bitmapword)); } diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h index a3e0f346ef6..98ede3e2686 100644 --- a/src/include/port/pg_bitutils.h +++ b/src/include/port/pg_bitutils.h @@ -297,23 +297,15 @@ extern uint64 pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mas /* * pg_popcount32 * Return the number of 1 bits set in word + * + * from https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel */ static inline int pg_popcount32(uint32 word) { -#ifdef HAVE__BUILTIN_POPCOUNT - return __builtin_popcount(word); -#else /* !HAVE__BUILTIN_POPCOUNT */ - int result = 0; - - while (word != 0) - { - result += pg_number_of_ones[word & 255]; - word >>= 8; - } - - return result; -#endif /* HAVE__BUILTIN_POPCOUNT */ + word = word - ((word >> 1) & 0x55555555); + word = (word & 0x33333333) + ((word >> 2) & 0x33333333); + return ((word + (word >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; } /* @@ -323,7 +315,11 @@ pg_popcount32(uint32 word) static inline int pg_popcount64(uint64 word) { -#ifdef HAVE__BUILTIN_POPCOUNT + /* + * On x86, gcc generates a function call for this builtin unless told + * otherwise, so we use the equivalent pure C here to allow inlining. + */ +#if defined(HAVE__BUILTIN_POPCOUNT) && (defined(__POPCNT__) || !defined(__x86_64__)) #if SIZEOF_LONG == 8 return __builtin_popcountl(word); #elif SIZEOF_LONG_LONG == 8 @@ -332,15 +328,11 @@ pg_popcount64(uint64 word) #error "cannot find integer of the same size as uint64_t" #endif #else /* !HAVE__BUILTIN_POPCOUNT */ - int result = 0; - while (word != 0) - { - result += pg_number_of_ones[word & 255]; - word >>= 8; - } - - return result; + word = word - ((word >> 1) & 0x5555555555555555); + word = (word & 0x3333333333333333) + ((word >> 2) & 0x3333333333333333); + word = (word + (word >> 4)) & 0xF0F0F0F0F0F0F0F; + return (word * 0x101010101010101) >> 56; #endif /* HAVE__BUILTIN_POPCOUNT */ } -- 2.52.0