From d2081e48423d4c1703c30f9f375730de4a53cbbf Mon Sep 17 00:00:00 2001 From: "dgrowley@gmail.com" Date: Thu, 20 Dec 2018 17:46:35 +1300 Subject: [PATCH v1] Add basic support for using the POPCNT and SSE4.2s LZCNT opcodes These opcodes have been around in the AMD world since 2007, and 2008 in the case of intel. They're supported in GCC and Clang via some __builtin macros. The opcodes may be unavailable during runtime, in which case we fall back on a C-based implementation of the code. In order to get the POPCNT instruction we must pass the -mpopcnt option to the compiler, when supported. David Rowley and Thomas Munro --- config/c-compiler.m4 | 108 ++++++++ configure | 236 ++++++++++++++++++ configure.in | 9 + src/backend/access/heap/visibilitymap.c | 72 ++---- src/backend/nodes/bitmapset.c | 161 ++++-------- src/backend/utils/adt/Makefile | 2 +- src/backend/utils/adt/bitutils.c | 424 ++++++++++++++++++++++++++++++++ src/include/pg_config.h.in | 18 ++ src/include/pg_config.h.win32 | 18 ++ src/include/utils/bitutils.h | 52 ++++ 10 files changed, 937 insertions(+), 163 deletions(-) create mode 100644 src/backend/utils/adt/bitutils.c create mode 100644 src/include/utils/bitutils.h diff --git a/config/c-compiler.m4 b/config/c-compiler.m4 index af2dea1c2a..ac73416dd1 100644 --- a/config/c-compiler.m4 +++ b/config/c-compiler.m4 @@ -378,6 +378,114 @@ fi])# PGAC_C_BUILTIN_OP_OVERFLOW +# PGAC_C_BUILTIN_POPCOUNT +# ------------------------- +# Check if the C compiler understands __builtin_popcount(), +# and define HAVE__BUILTIN_POPCOUNT if so. +AC_DEFUN([PGAC_C_BUILTIN_POPCOUNT], +[AC_CACHE_CHECK(for __builtin_popcount, pgac_cv__builtin_popcount, +[AC_COMPILE_IFELSE([AC_LANG_SOURCE( +[static int x = __builtin_popcount(255);] +)], +[pgac_cv__builtin_popcount=yes], +[pgac_cv__builtin_popcount=no])]) +if test x"$pgac_cv__builtin_popcount" = xyes ; then +AC_DEFINE(HAVE__BUILTIN_POPCOUNT, 1, + [Define to 1 if your compiler understands __builtin_popcount.]) +fi])# PGAC_C_BUILTIN_POPCOUNT + + + +# PGAC_C_BUILTIN_POPCOUNTL +# ------------------------- +# Check if the C compiler understands __builtin_popcountl(), +# and define HAVE__BUILTIN_POPCOUNTL if so. +AC_DEFUN([PGAC_C_BUILTIN_POPCOUNTL], +[AC_CACHE_CHECK(for __builtin_popcountl, pgac_cv__builtin_popcountl, +[AC_COMPILE_IFELSE([AC_LANG_SOURCE( +[static int x = __builtin_popcountl(255);] +)], +[pgac_cv__builtin_popcountl=yes], +[pgac_cv__builtin_popcountl=no])]) +if test x"$pgac_cv__builtin_popcountl" = xyes ; then +AC_DEFINE(HAVE__BUILTIN_POPCOUNTL, 1, + [Define to 1 if your compiler understands __builtin_popcountl.]) +fi])# PGAC_C_BUILTIN_POPCOUNTL + + + +# PGAC_C_BUILTIN_CTZ +# ------------------------- +# Check if the C compiler understands __builtin_ctz(), +# and define HAVE__BUILTIN_CTZ if so. +AC_DEFUN([PGAC_C_BUILTIN_CTZ], +[AC_CACHE_CHECK(for __builtin_ctz, pgac_cv__builtin_ctz, +[AC_COMPILE_IFELSE([AC_LANG_SOURCE( +[static int x = __builtin_ctz(256);] +)], +[pgac_cv__builtin_ctz=yes], +[pgac_cv__builtin_ctz=no])]) +if test x"$pgac_cv__builtin_ctz" = xyes ; then +AC_DEFINE(HAVE__BUILTIN_CTZ, 1, + [Define to 1 if your compiler understands __builtin_ctz.]) +fi])# PGAC_C_BUILTIN_CTZ + + + +# PGAC_C_BUILTIN_CTZL +# ------------------------- +# Check if the C compiler understands __builtin_ctzl(), +# and define HAVE__BUILTIN_CTZL if so. +AC_DEFUN([PGAC_C_BUILTIN_CTZL], +[AC_CACHE_CHECK(for __builtin_ctzl, pgac_cv__builtin_ctzl, +[AC_COMPILE_IFELSE([AC_LANG_SOURCE( +[static int x = __builtin_ctzl(256);] +)], +[pgac_cv__builtin_ctzl=yes], +[pgac_cv__builtin_ctzl=no])]) +if test x"$pgac_cv__builtin_ctzl" = xyes ; then +AC_DEFINE(HAVE__BUILTIN_CTZL, 1, + [Define to 1 if your compiler understands __builtin_ctzl.]) +fi])# PGAC_C_BUILTIN_CTZL + + + +# PGAC_C_BUILTIN_CLZ +# ------------------------- +# Check if the C compiler understands __builtin_clz(), +# and define HAVE__BUILTIN_CLZ if so. +AC_DEFUN([PGAC_C_BUILTIN_CLZ], +[AC_CACHE_CHECK(for __builtin_clz, pgac_cv__builtin_clz, +[AC_COMPILE_IFELSE([AC_LANG_SOURCE( +[static int x = __builtin_clz(256);] +)], +[pgac_cv__builtin_clz=yes], +[pgac_cv__builtin_clz=no])]) +if test x"$pgac_cv__builtin_clz" = xyes ; then +AC_DEFINE(HAVE__BUILTIN_CLZ, 1, + [Define to 1 if your compiler understands __builtin_clz.]) +fi])# PGAC_C_BUILTIN_CLZ + + + +# PGAC_C_BUILTIN_CLZL +# ------------------------- +# Check if the C compiler understands __builtin_clzl(), +# and define HAVE__BUILTIN_CLZL if so. +AC_DEFUN([PGAC_C_BUILTIN_CLZL], +[AC_CACHE_CHECK(for __builtin_clzl, pgac_cv__builtin_clzl, +[AC_COMPILE_IFELSE([AC_LANG_SOURCE( +[static int x = __builtin_clzl(256);] +)], +[pgac_cv__builtin_clzl=yes], +[pgac_cv__builtin_clzl=no])]) +if test x"$pgac_cv__builtin_clzl" = xyes ; then +AC_DEFINE(HAVE__BUILTIN_CLZL, 1, + [Define to 1 if your compiler understands __builtin_clzl.]) +fi])# PGAC_C_BUILTIN_CLZL + + + # PGAC_C_BUILTIN_UNREACHABLE # -------------------------- # Check if the C compiler understands __builtin_unreachable(), diff --git a/configure b/configure index ea40f5f03d..3e400fe3e2 100755 --- a/configure +++ b/configure @@ -5916,6 +5916,98 @@ if test x"$pgac_cv_prog_CXX_cxxflags__fexcess_precision_standard" = x"yes"; then fi + # Enable use of the POPCNT instruction + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${CC} supports -mpopcnt, for CFLAGS" >&5 +$as_echo_n "checking whether ${CC} supports -mpopcnt, for CFLAGS... " >&6; } +if ${pgac_cv_prog_CC_cflags__mpopcnt+:} false; then : + $as_echo_n "(cached) " >&6 +else + pgac_save_CFLAGS=$CFLAGS +pgac_save_CC=$CC +CC=${CC} +CFLAGS="${CFLAGS} -mpopcnt" +ac_save_c_werror_flag=$ac_c_werror_flag +ac_c_werror_flag=yes +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + pgac_cv_prog_CC_cflags__mpopcnt=yes +else + pgac_cv_prog_CC_cflags__mpopcnt=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_c_werror_flag=$ac_save_c_werror_flag +CFLAGS="$pgac_save_CFLAGS" +CC="$pgac_save_CC" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_prog_CC_cflags__mpopcnt" >&5 +$as_echo "$pgac_cv_prog_CC_cflags__mpopcnt" >&6; } +if test x"$pgac_cv_prog_CC_cflags__mpopcnt" = x"yes"; then + CFLAGS="${CFLAGS} -mpopcnt" +fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${CXX} supports -mpopcnt, for CXXFLAGS" >&5 +$as_echo_n "checking whether ${CXX} supports -mpopcnt, for CXXFLAGS... " >&6; } +if ${pgac_cv_prog_CXX_cxxflags__mpopcnt+:} false; then : + $as_echo_n "(cached) " >&6 +else + pgac_save_CXXFLAGS=$CXXFLAGS +pgac_save_CXX=$CXX +CXX=${CXX} +CXXFLAGS="${CXXFLAGS} -mpopcnt" +ac_save_cxx_werror_flag=$ac_cxx_werror_flag +ac_cxx_werror_flag=yes +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + pgac_cv_prog_CXX_cxxflags__mpopcnt=yes +else + pgac_cv_prog_CXX_cxxflags__mpopcnt=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +ac_cxx_werror_flag=$ac_save_cxx_werror_flag +CXXFLAGS="$pgac_save_CXXFLAGS" +CXX="$pgac_save_CXX" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_prog_CXX_cxxflags__mpopcnt" >&5 +$as_echo "$pgac_cv_prog_CXX_cxxflags__mpopcnt" >&6; } +if test x"$pgac_cv_prog_CXX_cxxflags__mpopcnt" = x"yes"; then + CXXFLAGS="${CXXFLAGS} -mpopcnt" +fi + + # Optimization flags for specific files that benefit from vectorization { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${CC} supports -funroll-loops, for CFLAGS_VECTOR" >&5 $as_echo_n "checking whether ${CC} supports -funroll-loops, for CFLAGS_VECTOR... " >&6; } @@ -14078,6 +14170,150 @@ if test x"$pgac_cv__builtin_constant_p" = xyes ; then $as_echo "#define HAVE__BUILTIN_CONSTANT_P 1" >>confdefs.h +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_popcount" >&5 +$as_echo_n "checking for __builtin_popcount... " >&6; } +if ${pgac_cv__builtin_popcount+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +static int x = __builtin_popcount(255); + +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + pgac_cv__builtin_popcount=yes +else + pgac_cv__builtin_popcount=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_popcount" >&5 +$as_echo "$pgac_cv__builtin_popcount" >&6; } +if test x"$pgac_cv__builtin_popcount" = xyes ; then + +$as_echo "#define HAVE__BUILTIN_POPCOUNT 1" >>confdefs.h + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_popcountl" >&5 +$as_echo_n "checking for __builtin_popcountl... " >&6; } +if ${pgac_cv__builtin_popcountl+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +static int x = __builtin_popcountl(255); + +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + pgac_cv__builtin_popcountl=yes +else + pgac_cv__builtin_popcountl=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_popcountl" >&5 +$as_echo "$pgac_cv__builtin_popcountl" >&6; } +if test x"$pgac_cv__builtin_popcountl" = xyes ; then + +$as_echo "#define HAVE__BUILTIN_POPCOUNTL 1" >>confdefs.h + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_ctz" >&5 +$as_echo_n "checking for __builtin_ctz... " >&6; } +if ${pgac_cv__builtin_ctz+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +static int x = __builtin_ctz(256); + +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + pgac_cv__builtin_ctz=yes +else + pgac_cv__builtin_ctz=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_ctz" >&5 +$as_echo "$pgac_cv__builtin_ctz" >&6; } +if test x"$pgac_cv__builtin_ctz" = xyes ; then + +$as_echo "#define HAVE__BUILTIN_CTZ 1" >>confdefs.h + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_ctzl" >&5 +$as_echo_n "checking for __builtin_ctzl... " >&6; } +if ${pgac_cv__builtin_ctzl+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +static int x = __builtin_ctzl(256); + +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + pgac_cv__builtin_ctzl=yes +else + pgac_cv__builtin_ctzl=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_ctzl" >&5 +$as_echo "$pgac_cv__builtin_ctzl" >&6; } +if test x"$pgac_cv__builtin_ctzl" = xyes ; then + +$as_echo "#define HAVE__BUILTIN_CTZL 1" >>confdefs.h + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_clz" >&5 +$as_echo_n "checking for __builtin_clz... " >&6; } +if ${pgac_cv__builtin_clz+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +static int x = __builtin_clz(256); + +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + pgac_cv__builtin_clz=yes +else + pgac_cv__builtin_clz=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_clz" >&5 +$as_echo "$pgac_cv__builtin_clz" >&6; } +if test x"$pgac_cv__builtin_clz" = xyes ; then + +$as_echo "#define HAVE__BUILTIN_CLZ 1" >>confdefs.h + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_clzl" >&5 +$as_echo_n "checking for __builtin_clzl... " >&6; } +if ${pgac_cv__builtin_clzl+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +static int x = __builtin_clzl(256); + +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + pgac_cv__builtin_clzl=yes +else + pgac_cv__builtin_clzl=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_clzl" >&5 +$as_echo "$pgac_cv__builtin_clzl" >&6; } +if test x"$pgac_cv__builtin_clzl" = xyes ; then + +$as_echo "#define HAVE__BUILTIN_CLZL 1" >>confdefs.h + fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_unreachable" >&5 $as_echo_n "checking for __builtin_unreachable... " >&6; } diff --git a/configure.in b/configure.in index 89a0fb2470..6a78fef36a 100644 --- a/configure.in +++ b/configure.in @@ -504,6 +504,9 @@ if test "$GCC" = yes -a "$ICC" = no; then # Disable FP optimizations that cause various errors on gcc 4.5+ or maybe 4.6+ PGAC_PROG_CC_CFLAGS_OPT([-fexcess-precision=standard]) PGAC_PROG_CXX_CFLAGS_OPT([-fexcess-precision=standard]) + # Enable use of the POPCNT instruction + PGAC_PROG_CC_CFLAGS_OPT([-mpopcnt]) + PGAC_PROG_CXX_CFLAGS_OPT([-mpopcnt]) # Optimization flags for specific files that benefit from vectorization PGAC_PROG_CC_VAR_OPT(CFLAGS_VECTOR, [-funroll-loops]) PGAC_PROG_CC_VAR_OPT(CFLAGS_VECTOR, [-ftree-vectorize]) @@ -1488,6 +1491,12 @@ PGAC_C_BUILTIN_BSWAP16 PGAC_C_BUILTIN_BSWAP32 PGAC_C_BUILTIN_BSWAP64 PGAC_C_BUILTIN_CONSTANT_P +PGAC_C_BUILTIN_POPCOUNT +PGAC_C_BUILTIN_POPCOUNTL +PGAC_C_BUILTIN_CTZ +PGAC_C_BUILTIN_CTZL +PGAC_C_BUILTIN_CLZ +PGAC_C_BUILTIN_CLZL PGAC_C_BUILTIN_UNREACHABLE PGAC_C_COMPUTED_GOTO PGAC_STRUCT_TIMEZONE diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index 695567b4b0..01969d399f 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -92,6 +92,7 @@ #include "storage/bufmgr.h" #include "storage/lmgr.h" #include "storage/smgr.h" +#include "utils/bitutils.h" #include "utils/inval.h" @@ -115,43 +116,9 @@ #define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE) #define HEAPBLK_TO_OFFSET(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK) -/* tables for fast counting of set bits for visible and frozen */ -static const uint8 number_of_ones_for_visible[256] = { - 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2, - 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, - 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2, - 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, - 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, - 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4, - 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, - 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4, - 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2, - 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, - 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2, - 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, - 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, - 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4, - 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, - 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4 -}; -static const uint8 number_of_ones_for_frozen[256] = { - 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, - 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, - 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, - 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, - 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, - 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, - 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, - 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, - 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, - 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, - 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4, - 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4, - 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, - 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, - 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4, - 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4 -}; +/* Masks for bit counting. */ +#define VISIBLE_MASK64 0x5555555555555555 /* The lower bit of each bit pair */ +#define FROZEN_MASK64 0xaaaaaaaaaaaaaaaa /* The upper bit of each bit pair */ /* prototypes for internal routines */ static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend); @@ -408,18 +375,16 @@ void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen) { BlockNumber mapBlock; + BlockNumber nvisible = 0; + BlockNumber nfrozen = 0; /* all_visible must be specified */ Assert(all_visible); - *all_visible = 0; - if (all_frozen) - *all_frozen = 0; - for (mapBlock = 0;; mapBlock++) { Buffer mapBuffer; - unsigned char *map; + uint64 *map; int i; /* @@ -436,17 +401,30 @@ visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_fro * immediately stale anyway if anyone is concurrently setting or * clearing bits, and we only really need an approximate value. */ - map = (unsigned char *) PageGetContents(BufferGetPage(mapBuffer)); + map = (uint64 *) PageGetContents(BufferGetPage(mapBuffer)); - for (i = 0; i < MAPSIZE; i++) + StaticAssertStmt(MAPSIZE % sizeof(uint64) == 0, + "unsupported MAPSIZE"); + if (all_frozen == NULL) { - *all_visible += number_of_ones_for_visible[map[i]]; - if (all_frozen) - *all_frozen += number_of_ones_for_frozen[map[i]]; + for (i = 0; i < MAPSIZE / sizeof(uint64); i++) + nvisible += pg_popcount64(map[i] & VISIBLE_MASK64); + } + else + { + for (i = 0; i < MAPSIZE / sizeof(uint64); i++) + { + nvisible += pg_popcount64(map[i] & VISIBLE_MASK64); + nfrozen += pg_popcount64(map[i] & FROZEN_MASK64); + } } ReleaseBuffer(mapBuffer); } + + *all_visible = nvisible; + if (all_frozen) + *all_frozen = nfrozen; } /* diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c index 8ce253c88d..6ad2427af6 100644 --- a/src/backend/nodes/bitmapset.c +++ b/src/backend/nodes/bitmapset.c @@ -22,6 +22,7 @@ #include "access/hash.h" #include "nodes/pg_list.h" +#include "utils/bitutils.h" #define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD) @@ -51,81 +52,6 @@ #define HAS_MULTIPLE_ONES(x) ((bitmapword) RIGHTMOST_ONE(x) != (x)) - -/* - * Lookup tables to avoid need for bit-by-bit groveling - * - * rightmost_one_pos[x] gives the bit number (0-7) of the rightmost one bit - * in a nonzero byte value x. The entry for x=0 is never used. - * - * leftmost_one_pos[x] gives the bit number (0-7) of the leftmost one bit in a - * nonzero byte value x. The entry for x=0 is never used. - * - * number_of_ones[x] gives the number of one-bits (0-8) in a byte value x. - * - * We could make these tables larger and reduce the number of iterations - * in the functions that use them, but bytewise shifts and masks are - * especially fast on many machines, so working a byte at a time seems best. - */ - -static const uint8 rightmost_one_pos[256] = { - 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 -}; - -static const uint8 leftmost_one_pos[256] = { - 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 -}; - -static const uint8 number_of_ones[256] = { - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 -}; - - /* * bms_copy - make a palloc'd copy of a bitmapset */ @@ -607,12 +533,13 @@ bms_singleton_member(const Bitmapset *a) if (result >= 0 || HAS_MULTIPLE_ONES(w)) elog(ERROR, "bitmapset has multiple members"); result = wordnum * BITS_PER_BITMAPWORD; - while ((w & 255) == 0) - { - w >>= 8; - result += 8; - } - result += rightmost_one_pos[w & 255]; +#if BITS_PER_BITMAPWORD == 32 + result += pg_rightmost_one32(w); +#elif BITS_PER_BITMAPWORD == 64 + result += pg_rightmost_one64(w); +#else +#error "invalid BITS_PER_BITMAPWORD" +#endif } } if (result < 0) @@ -650,12 +577,13 @@ bms_get_singleton_member(const Bitmapset *a, int *member) if (result >= 0 || HAS_MULTIPLE_ONES(w)) return false; result = wordnum * BITS_PER_BITMAPWORD; - while ((w & 255) == 0) - { - w >>= 8; - result += 8; - } - result += rightmost_one_pos[w & 255]; +#if BITS_PER_BITMAPWORD == 32 + result += pg_rightmost_one32(w); +#elif BITS_PER_BITMAPWORD == 64 + result += pg_rightmost_one64(w); +#else +#error "invalid BITS_PER_BITMAPWORD" +#endif } } if (result < 0) @@ -681,12 +609,16 @@ bms_num_members(const Bitmapset *a) { bitmapword w = a->words[wordnum]; - /* we assume here that bitmapword is an unsigned type */ - while (w != 0) - { - result += number_of_ones[w & 255]; - w >>= 8; - } + /* No need to count the bits in a zero word */ + if (w == 0) + continue; +#if BITS_PER_BITMAPWORD == 32 + result += pg_popcount32(w); +#elif BITS_PER_BITMAPWORD == 64 + result += pg_popcount64(w); +#else +#error "invalid BITS_PER_BITMAPWORD" +#endif } return result; } @@ -1041,12 +973,13 @@ bms_first_member(Bitmapset *a) a->words[wordnum] &= ~w; result = wordnum * BITS_PER_BITMAPWORD; - while ((w & 255) == 0) - { - w >>= 8; - result += 8; - } - result += rightmost_one_pos[w & 255]; +#if BITS_PER_BITMAPWORD == 32 + result += pg_rightmost_one32(w); +#elif BITS_PER_BITMAPWORD == 64 + result += pg_rightmost_one64(w); +#else +#error "invalid BITS_PER_BITMAPWORD" +#endif return result; } } @@ -1096,12 +1029,13 @@ bms_next_member(const Bitmapset *a, int prevbit) int result; result = wordnum * BITS_PER_BITMAPWORD; - while ((w & 255) == 0) - { - w >>= 8; - result += 8; - } - result += rightmost_one_pos[w & 255]; +#if BITS_PER_BITMAPWORD == 32 + result += pg_rightmost_one32(w); +#elif BITS_PER_BITMAPWORD == 64 + result += pg_rightmost_one64(w); +#else +#error "invalid BITS_PER_BITMAPWORD" +#endif return result; } @@ -1167,16 +1101,13 @@ bms_prev_member(const Bitmapset *a, int prevbit) if (w != 0) { - int result; - int shift = BITS_PER_BITMAPWORD - 8; - - result = wordnum * BITS_PER_BITMAPWORD; - - while ((w >> shift) == 0) - shift -= 8; - - result += shift + leftmost_one_pos[(w >> shift) & 255]; - return result; +#if BITS_PER_BITMAPWORD == 32 + return pg_leftmost_one32(w); +#elif BITS_PER_BITMAPWORD == 64 + return pg_leftmost_one64(w); +#else +#error "invalid BITS_PER_BITMAPWORD" +#endif } /* in subsequent words, consider all bits */ diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index 20eead1798..40e71c6b56 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -11,7 +11,7 @@ include $(top_builddir)/src/Makefile.global # keep this list arranged alphabetically or it gets to be a mess OBJS = acl.o amutils.o arrayfuncs.o array_expanded.o array_selfuncs.o \ array_typanalyze.o array_userfuncs.o arrayutils.o ascii.o \ - bool.o cash.o char.o cryptohashes.o \ + bitutils.o bool.o cash.o char.o cryptohashes.o \ date.o datetime.o datum.o dbsize.o domains.o \ encode.o enum.o expandeddatum.o expandedrecord.o \ float.o format_type.o formatting.o genfile.o \ diff --git a/src/backend/utils/adt/bitutils.c b/src/backend/utils/adt/bitutils.c new file mode 100644 index 0000000000..05b686cf33 --- /dev/null +++ b/src/backend/utils/adt/bitutils.c @@ -0,0 +1,424 @@ +/*------------------------------------------------------------------------- + * + * bitutils.c + * miscellaneous functions bit-wise operations. + * + * Portions Copyright (c) 2019, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/adt/bitutils.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#ifdef HAVE__GET_CPUID +#include +#endif + +#ifdef HAVE__CPUID +#include +#endif + +#include "utils/bitutils.h" + +static const uint8 number_of_ones[256] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 +}; + +#if defined(HAVE__BUILTIN_POPCOUNT) || defined(HAVE__BUILTIN_POPCOUNTL) + +static bool +pg_popcount_available(void) +{ + + unsigned int exx[4] = { 0, 0, 0, 0 }; + +#if defined(HAVE__GET_CPUID) + __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); +#elif defined(HAVE__CPUID) + __cpuid(exx, 1); +#else +#error cpuid instruction not available +#endif + + return (exx[2] & (1 << 23)) != 0; /* POPCNT */ +} +#endif + +#ifdef HAVE__BUILTIN_POPCOUNT + +/* + * This gets called on the first call. It replaces the function pointer + * so that subsequent calls are routed directly to the chosen implementation. + */ +static int +pg_popcount32_choose(uint32 word) +{ + if (pg_popcount_available()) + pg_popcount32 = pg_popcount32_sse42; + else + pg_popcount32 = pg_popcount32_slow; + + return pg_popcount32(word); +} + +int +pg_popcount32_sse42(uint32 word) +{ + return __builtin_popcount(word); +} + + +int (*pg_popcount32) (uint32 word) = pg_popcount32_choose; + +#else + +int(*pg_popcount32) (uint32 word) = pg_popcount32_slow; + +#endif /* HAVE__BUILTIN_POPCOUNT */ + +/* + * pg_popcount32_slow + * Return the number of 1 bits set in word + */ +int +pg_popcount32_slow(uint32 word) +{ + int result = 0; + + while (word != 0) + { + result += number_of_ones[word & 255]; + word >>= 8; + } + + return result; +} + +#ifdef HAVE__BUILTIN_POPCOUNTL + +/* + * This gets called on the first call. It replaces the function pointer + * so that subsequent calls are routed directly to the chosen implementation. + */ +static int +pg_popcount64_choose(uint64 word) +{ + if (pg_popcount_available()) + pg_popcount64 = pg_popcount64_sse42; + else + pg_popcount64 = pg_popcount64_slow; + + return pg_popcount64(word); +} + +int +pg_popcount64_sse42(uint64 word) +{ + return __builtin_popcountl(word); +} + +int (*pg_popcount64) (uint64 word) = pg_popcount64_choose; + +#else + +int (*pg_popcount64) (uint64 word) = pg_popcount64_slow; + +#endif /* HAVE__BUILTIN_POPCOUNTL */ + +/* + * pg_popcount64_slow + * Return the number of 1 bits set in word + */ +int +pg_popcount64_slow(uint64 word) +{ + int result = 0; + + while (word != 0) + { + result += number_of_ones[word & 255]; + word >>= 8; + } + + return result; +} + +static const uint8 rightmost_one_pos[256] = { + 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 +}; + +static const uint8 leftmost_one_pos[256] = { + 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 +}; + +#if defined(HAVE__BUILTIN_CTZ) || defined(HAVE__BUILTIN_CTZL) || defined(HAVE__BUILTIN_CLZ) || defined(HAVE__BUILTIN_CLZL) + +static bool +pg_lzcnt_available(void) +{ + + unsigned int exx[4] = { 0, 0, 0, 0 }; + +#if defined(HAVE__GET_CPUID) + __get_cpuid(0x80000001, &exx[0], &exx[1], &exx[2], &exx[3]); +#elif defined(HAVE__CPUID) + __cpuid(exx, 0x80000001); +#else +#error cpuid instruction not available +#endif + + return (exx[2] & (1 << 5)) != 0; /* LZCNT */ +} +#endif + +#ifdef HAVE__BUILTIN_CTZ + +/* + * This gets called on the first call. It replaces the function pointer + * so that subsequent calls are routed directly to the chosen implementation. + */ +static int +pg_rightmost_one32_choose(uint32 word) +{ + if (pg_lzcnt_available()) + pg_rightmost_one32 = pg_rightmost_one32_abm; + else + pg_rightmost_one32 = pg_rightmost_one32_slow; + + return pg_rightmost_one32(word); +} + +int +pg_rightmost_one32_abm(uint32 word) +{ + return __builtin_ctz(word); +} + +int (*pg_rightmost_one32) (uint32 word) = pg_rightmost_one32_choose; + +#else + +int (*pg_rightmost_one32) (uint32 word) = pg_rightmost_one32_slow; + +#endif /* HAVE__BUILTIN_CTZ */ + +/* + * pg_rightmost_one32_slow + * Returns the number of trailing 0-bits in word, starting at the least + * significant bit position. word must not be 0. + */ +int +pg_rightmost_one32_slow(uint32 word) +{ + int result = 0; + + Assert(word != 0); + + while ((word & 255) == 0) + { + word >>= 8; + result += 8; + } + result += rightmost_one_pos[word & 255]; + + return result; +} + +#ifdef HAVE__BUILTIN_CTZL + +/* + * This gets called on the first call. It replaces the function pointer + * so that subsequent calls are routed directly to the chosen implementation. + */ +static int +pg_rightmost_one64_choose(uint64 word) +{ + if (pg_lzcnt_available()) + pg_rightmost_one64 = pg_rightmost_one64_abm; + else + pg_rightmost_one64 = pg_rightmost_one64_slow; + + return pg_rightmost_one64(word); +} + +int +pg_rightmost_one64_abm(uint64 word) +{ + return __builtin_ctzl(word); +} + +int (*pg_rightmost_one64) (uint64 word) = pg_rightmost_one64_choose; + +#else + +int (*pg_rightmost_one64) (uint64 word) = pg_rightmost_one64_slow; + +#endif /* HAVE__BUILTIN_CTZL */ + +/* + * pg_rightmost_one64_slow + * Returns the number of trailing 0-bits in word, starting at the least + * significant bit position. word must not be 0. + */ +int +pg_rightmost_one64_slow(uint64 word) +{ + int result = 0; + + Assert(word != 0); + + while ((word & 255) == 0) + { + word >>= 8; + result += 8; + } + result += rightmost_one_pos[word & 255]; + + return result; +} + +#ifdef HAVE__BUILTIN_CLZ + +/* + * This gets called on the first call. It replaces the function pointer + * so that subsequent calls are routed directly to the chosen implementation. + */ +static int +pg_leftmost_one32_choose(uint32 word) +{ + if (pg_lzcnt_available()) + pg_leftmost_one32 = pg_leftmost_one32_abm; + else + pg_leftmost_one32 = pg_leftmost_one32_slow; + + return pg_leftmost_one32(word); +} + +int +pg_leftmost_one32_abm(uint32 word) +{ + return 31 - __builtin_clz(word); +} + +int (*pg_leftmost_one32) (uint32 word) = pg_leftmost_one32_choose; + +#else + +int (*pg_leftmost_one32) (uint32 word) = pg_leftmost_one32_slow; + +#endif /* HAVE__BUILTIN_CLZ */ + +/* + * pg_leftmost_one32_slow + * Returns the 0-based position of the most significant set bit in word + * measured from the least significant bit. word must not be 0. + */ +int +pg_leftmost_one32_slow(uint32 word) +{ + int shift = 32 - 8; + + Assert(word != 0); + + while ((word >> shift) == 0) + shift -= 8; + + return shift + leftmost_one_pos[(word >> shift) & 255]; +} + +#ifdef HAVE__BUILTIN_CLZL + +/* + * This gets called on the first call. It replaces the function pointer + * so that subsequent calls are routed directly to the chosen implementation. + */ +static int +pg_leftmost_one64_choose(uint64 word) +{ + if (pg_lzcnt_available()) + pg_leftmost_one64 = pg_leftmost_one64_abm; + else + pg_leftmost_one64 = pg_leftmost_one64_slow; + + return pg_leftmost_one64(word); +} + +int +pg_leftmost_one64_abm(uint64 word) +{ + return 63 - __builtin_clzl(word); +} + +int (*pg_leftmost_one64) (uint64 word) = pg_leftmost_one64_choose; + +#else + +int (*pg_leftmost_one64) (uint64 word) = pg_leftmost_one64_slow; + +#endif /* HAVE__BUILTIN_CLZL */ + +/* + * pg_leftmost_one64_slow + * Returns the 0-based position of the most significant set bit in word + * measured from the least significant bit. word must not be 0. + */ +int +pg_leftmost_one64_slow(uint64 word) +{ + int shift = 64 - 8; + + Assert(word != 0); + + while ((word >> shift) == 0) + shift -= 8; + + return shift + leftmost_one_pos[(word >> shift) & 255]; +} diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 76bd81e9bf..862471266a 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -751,6 +751,24 @@ /* Define to 1 if your compiler understands __builtin_$op_overflow. */ #undef HAVE__BUILTIN_OP_OVERFLOW +/* Define to 1 if your compiler understands __builtin_popcount. */ +#undef HAVE__BUILTIN_POPCOUNT + +/* Define to 1 if your compiler understands __builtin_popcountl. */ +#undef HAVE__BUILTIN_POPCOUNTL + +/* Define to 1 if your compiler understands __builtin_ctz. */ +#undef HAVE__BUILTIN_CTZ + +/* Define to 1 if your compiler understands __builtin_ctzl. */ +#undef HAVE__BUILTIN_CTZL + +/* Define to 1 if your compiler understands __builtin_clz. */ +#undef HAVE__BUILTIN_CLZ + +/* Define to 1 if your compiler understands __builtin_clzl. */ +#undef HAVE__BUILTIN_CLZL + /* Define to 1 if your compiler understands __builtin_types_compatible_p. */ #undef HAVE__BUILTIN_TYPES_COMPATIBLE_P diff --git a/src/include/pg_config.h.win32 b/src/include/pg_config.h.win32 index de0c4d9997..98639d7f83 100644 --- a/src/include/pg_config.h.win32 +++ b/src/include/pg_config.h.win32 @@ -590,6 +590,24 @@ /* Define to 1 if your compiler understands __builtin_$op_overflow. */ /* #undef HAVE__BUILTIN_OP_OVERFLOW */ +/* Define to 1 if your compiler understands __builtin_popcount. */ +/* #undef HAVE__BUILTIN_POPCOUNT */ + +/* Define to 1 if your compiler understands __builtin_popcountl. */ +/* #undef HAVE__BUILTIN_POPCOUNTL */ + +/* Define to 1 if your compiler understands __builtin_ctz. */ +/* #undef HAVE__BUILTIN_CTZ */ + +/* Define to 1 if your compiler understands __builtin_ctzl. */ +/* #undef HAVE__BUILTIN_CTZL */ + +/* Define to 1 if your compiler understands __builtin_clz. */ +/* #undef HAVE__BUILTIN_CLZ */ + +/* Define to 1 if your compiler understands __builtin_clzl. */ +/* #undef HAVE__BUILTIN_CLZL */ + /* Define to 1 if your compiler understands __builtin_types_compatible_p. */ /* #undef HAVE__BUILTIN_TYPES_COMPATIBLE_P */ diff --git a/src/include/utils/bitutils.h b/src/include/utils/bitutils.h new file mode 100644 index 0000000000..96ce102bc9 --- /dev/null +++ b/src/include/utils/bitutils.h @@ -0,0 +1,52 @@ +/*------------------------------------------------------------------------ - + * + * bitutils.h + * miscellaneous functions bit-wise operations. + * + * + * Portions Copyright(c) 2019, PostgreSQL Global Development Group + * + * src/include/utils/bitutils.h + * + *------------------------------------------------------------------------ - + */ + +#ifndef BITUTILS_H +#define BITUTILS_H + +extern int pg_popcount32_slow(uint32 word); +#ifdef HAVE__BUILTIN_POPCOUNT +extern int pg_popcount32_sse42(uint32 word); +#endif +extern int pg_popcount64_slow(uint64 word); +#ifdef HAVE__BUILTIN_POPCOUNTL +extern int pg_popcount64_sse42(uint64 word); +#endif + +extern int pg_rightmost_one32_slow(uint32 word); +#ifdef HAVE__BUILTIN_CTZ +extern int pg_rightmost_one32_abm(uint32 word); +#endif +extern int pg_rightmost_one64_slow(uint64 word); +#ifdef HAVE__BUILTIN_CTZL +extern int pg_rightmost_one64_abm(uint64 word); +#endif + +extern int pg_leftmost_one32_slow(uint32 word); +#ifdef HAVE__BUILTIN_CLZ +extern int pg_leftmost_one32_abm(uint32 word); +#endif +extern int pg_leftmost_one64_slow(uint64 word); +#ifdef HAVE__BUILTIN_CLZL +extern int pg_leftmost_one64_abm(uint64 word); +#endif + + +extern int (*pg_popcount32) (uint32 word); +extern int (*pg_popcount64) (uint64 word); +extern int (*pg_rightmost_one32) (uint32 word); +extern int (*pg_rightmost_one64) (uint64 word); +extern int (*pg_leftmost_one32) (uint32 word); +extern int (*pg_leftmost_one64) (uint64 word); + +#endif /* BITUTILS_H */ -- 2.16.2.windows.1