From 4422558d6ce777bd46283ac772f5b59f67d0011f Mon Sep 17 00:00:00 2001 From: Chiranmoy Bhattacharya Date: Tue, 10 Dec 2024 13:53:20 +0530 Subject: [PATCH v2] SVE support for popcount and popcount masked --- config/c-compiler.m4 | 41 +++++++++ configure | 93 +++++++++++++++++++++ configure.ac | 16 ++++ meson.build | 31 +++++++ src/Makefile.global.in | 4 + src/include/pg_config.h.in | 3 + src/include/port/pg_bitutils.h | 14 ++++ src/makefiles/meson.build | 3 +- src/port/Makefile | 11 +++ src/port/meson.build | 4 +- src/port/pg_bitutils.c | 10 ++- src/port/pg_popcount_sve.c | 134 ++++++++++++++++++++++++++++++ src/port/pg_popcount_sve_choose.c | 32 +++++++ 13 files changed, 393 insertions(+), 3 deletions(-) create mode 100644 src/port/pg_popcount_sve.c create mode 100644 src/port/pg_popcount_sve_choose.c diff --git a/config/c-compiler.m4 b/config/c-compiler.m4 index e112fd45d4..eabe68a773 100644 --- a/config/c-compiler.m4 +++ b/config/c-compiler.m4 @@ -704,3 +704,44 @@ if test x"$Ac_cachevar" = x"yes"; then fi undefine([Ac_cachevar])dnl ])# PGAC_AVX512_POPCNT_INTRINSICS + +# PGAC_ARM_SVE_POPCNT_INTRINSICS +# ------------------------------ +# Check if the compiler supports the ARM SVE popcount instructions using the +# svdup_u64, svptrue_b64, svcnt_z, svcnt_x, svadd_x, svaddv, and svwhilelt_b8 +# intrinsic functions. +# +# Optional compiler flags can be passed as arguments (e.g., -march=armv8-a+sve). +AC_DEFUN([PGAC_ARM_SVE_POPCNT_INTRINSICS], +[ + AC_CACHE_CHECK([for svdup_u64 and other intrinsics with CFLAGS=$1], + [pgac_cv_arm_sve_popcnt_intrinsics], + [ + pgac_save_CFLAGS=$CFLAGS + CFLAGS="$pgac_save_CFLAGS $1" + + AC_LINK_IFELSE([AC_LANG_PROGRAM([#include ], + [svbool_t predicate = svptrue_b64(); + svuint64_t segment = svdup_u64(0), accum = svdup_u64(0); + const char *buf = NULL; /* Simulating a buffer pointer */ + uint32_t num_vals_segment = svlen_u64(segment); + + /* Using intrinsics as per the code */ + predicate = svwhilelt_b8(0, 128); + segment = svld1(predicate, (const uint64_t *)buf); + accum = svadd_x(predicate, accum, svcnt_x(predicate, segment)); + uint64_t popcnt = svaddv(predicate, accum); + + /* Return computed value, to prevent the above being optimized away */ + return popcnt;])], + [pgac_cv_arm_sve_popcnt_intrinsics=yes], + [pgac_cv_arm_sve_popcnt_intrinsics=no]) + + CFLAGS="$pgac_save_CFLAGS" + ]) + + if test x"$pgac_cv_arm_sve_popcnt_intrinsics" = x"yes"; then + CFLAGS_POPCNT_ARM="$1" + pgac_arm_sve_popcnt_intrinsics=yes + fi +]) diff --git a/configure b/configure index 518c33b73a..a3e41459d5 100755 --- a/configure +++ b/configure @@ -647,6 +647,8 @@ MSGFMT_FLAGS MSGFMT PG_CRC32C_OBJS CFLAGS_CRC +PG_POPCNT_OBJS_ARM +CFLAGS_POPCNT_ARM LIBOBJS OPENSSL ZSTD @@ -17159,6 +17161,97 @@ $as_echo "#define USE_AVX512_POPCNT_WITH_RUNTIME_CHECK 1" >>confdefs.h fi fi +# Check for ARM SVE popcount intrinsics +CFLAGS_POPCNT_ARM="" +PG_POPCNT_OBJS_ARM="" + +if test x"$host_cpu" = x"aarch64"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for svcnt_u64 with CFLAGS=" >&5 +$as_echo_n "checking for svcnt_u64 with CFLAGS=... " >&6; } +if ${pgac_cv_arm_sve_popcnt_intrinsics_+:} false; then : + $as_echo_n "(cached) " >&6 +else + pgac_save_CFLAGS=$CFLAGS + CFLAGS="$pgac_save_CFLAGS " + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ + svbool_t predicate = svptrue_b64(); + svuint64_t segment, accum = svdup_u64(0); + uint64_t numVals = svlen_u64(segment); + + svuint64_t counts = svcnt_u64_z(predicate, segment); + accum = svadd_u64_m(predicate, accum, counts); + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + pgac_cv_arm_sve_popcnt_intrinsics_=yes +else + pgac_cv_arm_sve_popcnt_intrinsics_=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +CFLAGS="$pgac_save_CFLAGS" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_arm_sve_popcnt_intrinsics_" >&5 +$as_echo "$pgac_cv_arm_sve_popcnt_intrinsics_" >&6; } +if test x"$pgac_cv_arm_sve_popcnt_intrinsics_" = x"yes"; then + CFLAGS_POPCNT_ARM="" + pgac_arm_sve_popcnt_intrinsics=yes +fi + +if test x"$pgac_arm_sve_popcnt_intrinsics" != x"yes"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for svcnt_u64 with CFLAGS=-march=armv8-a+sve" >&5 +$as_echo_n "checking for svcnt_u64 with CFLAGS=-march=armv8-a+sve... " >&6; } +if ${pgac_cv_arm_sve_popcnt_intrinsics__march_armv8_a_sve+:} false; then : + $as_echo_n "(cached) " >&6 +else + pgac_save_CFLAGS=$CFLAGS + CFLAGS="$pgac_save_CFLAGS -march=armv8-a+sve" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ + svbool_t predicate = svptrue_b64(); + svuint64_t segment, accum = svdup_u64(0); + uint64_t numVals = svlen_u64(segment); + + svuint64_t counts = svcnt_u64_z(predicate, segment); + accum = svadd_u64_m(predicate, accum, counts); + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + pgac_cv_arm_sve_popcnt_intrinsics__march_armv8_a_sve=yes +else + pgac_cv_arm_sve_popcnt_intrinsics__march_armv8_a_sve=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +CFLAGS="$pgac_save_CFLAGS" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_arm_sve_popcnt_intrinsics__march_armv8_a_sve" >&5 +$as_echo "$pgac_cv_arm_sve_popcnt_intrinsics__march_armv8_a_sve" >&6; } +if test x"$pgac_cv_arm_sve_popcnt_intrinsics__march_armv8_a_sve" = x"yes"; then + CFLAGS_POPCNT_ARM="-march=armv8-a+sve" + pgac_arm_sve_popcnt_intrinsics=yes +fi + +fi +if test x"$pgac_arm_sve_popcnt_intrinsics" = x"yes"; then + PG_POPCNT_OBJS_ARM="pg_popcount_sve.o pg_popcount_sve_choose.o" + + $as_echo "#define USE_SVE_POPCNT_WITH_RUNTIME_CHECK 1" >>confdefs.h + +fi +fi + # Check for Intel SSE 4.2 intrinsics to do CRC calculations. # { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm_crc32_u8 and _mm_crc32_u32" >&5 diff --git a/configure.ac b/configure.ac index 247ae97fa4..1ea314190b 100644 --- a/configure.ac +++ b/configure.ac @@ -2021,6 +2021,22 @@ if test x"$host_cpu" = x"x86_64"; then fi fi +# Check for ARM popcount intrinsics +CFLAGS_POPCNT_ARM="" +PG_POPCNT_OBJS_ARM="" +if test x"$host_cpu" = x"aarch64"; then + PGAC_ARM_SVE_POPCNT_INTRINSICS([]) + if test x"$pgac_arm_sve_popcnt_intrinsics" != x"yes"; then + PGAC_ARM_SVE_POPCNT_INTRINSICS([-march=armv8-a+sve]) + fi + if test x"$pgac_arm_sve_popcnt_intrinsics" = x"yes"; then + PG_POPCNT_OBJS_ARM="pg_popcount_sve.o pg_popcount_sve_choose.o" + AC_DEFINE(USE_SVE_POPCNT_WITH_RUNTIME_CHECK, 1, [Define to 1 to use ARM popcount instructions.]) + fi +fi +AC_SUBST(CFLAGS_POPCNT_ARM) +AC_SUBST(PG_POPCNT_OBJS_ARM) + # Check for Intel SSE 4.2 intrinsics to do CRC calculations. # PGAC_SSE42_CRC32_INTRINSICS() diff --git a/meson.build b/meson.build index e5ce437a5c..6c936f2f2b 100644 --- a/meson.build +++ b/meson.build @@ -2191,6 +2191,37 @@ int main(void) endif +############################################################### +# Check for the availability of ARM SVE popcount intrinsics. +############################################################### + +cflags_popcnt_arm = [] +if host_cpu == 'aarch64' + + prog = ''' +#include + +int main(void) +{ + const svuint64_t val = svdup_u64(0xFFFFFFFFFFFFFFFF); + svuint64_t popcnt = svcntb(val); + /* return computed value, to prevent the above being optimized away */ + return popcnt == 0; +} +''' + + if cc.links(prog, name: 'ARM SVE popcount without -march=armv8-a+sve', + args: test_c_args + ['-DSVINT64=@0@'.format(cdata.get('SV_INT64_TYPE'))]) + cdata.set('USE_SVE_POPCNT_WITH_RUNTIME_CHECK', 1) + elif cc.links(prog, name: 'ARM SVE popcount with -march=armv8-a+sve', + args: test_c_args + ['-DSVINT64=@0@'.format(cdata.get('SV_INT64_TYPE'))] + ['-march=armv8-a+sve']) + cdata.set('USE_SVE_POPCNT_WITH_RUNTIME_CHECK', 1) + cflags_popcnt_arm += ['-march=armv8-a+sve'] + endif + +endif + + ############################################################### # Select CRC-32C implementation. # diff --git a/src/Makefile.global.in b/src/Makefile.global.in index eac3d00121..2c32dfab5e 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -262,6 +262,7 @@ CFLAGS_SL_MODULE = @CFLAGS_SL_MODULE@ CXXFLAGS_SL_MODULE = @CXXFLAGS_SL_MODULE@ CFLAGS_UNROLL_LOOPS = @CFLAGS_UNROLL_LOOPS@ CFLAGS_VECTORIZE = @CFLAGS_VECTORIZE@ +CFLAGS_POPCNT_ARM = @CFLAGS_POPCNT_ARM@ CFLAGS_CRC = @CFLAGS_CRC@ PERMIT_DECLARATION_AFTER_STATEMENT = @PERMIT_DECLARATION_AFTER_STATEMENT@ PERMIT_MISSING_VARIABLE_DECLARATIONS = @PERMIT_MISSING_VARIABLE_DECLARATIONS@ @@ -770,6 +771,9 @@ LIBOBJS = @LIBOBJS@ # files needed for the chosen CRC-32C implementation PG_CRC32C_OBJS = @PG_CRC32C_OBJS@ +# files needed for the chosen popcount implementation +PG_POPCNT_OBJS_ARM = @PG_POPCNT_OBJS_ARM@ + LIBS := -lpgcommon -lpgport $(LIBS) # to make ws2_32.lib the last library diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 07b2f798ab..29c32bbbbe 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -648,6 +648,9 @@ /* Define to 1 to use AVX-512 popcount instructions with a runtime check. */ #undef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK +/* Define to 1 to use SVE popcount instructions with a runtime check. */ +#undef USE_SVE_POPCNT_WITH_RUNTIME_CHECK + /* Define to 1 to build with Bonjour support. (--with-bonjour) */ #undef USE_BONJOUR diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h index a3cad46afe..57ebfddb7d 100644 --- a/src/include/port/pg_bitutils.h +++ b/src/include/port/pg_bitutils.h @@ -298,6 +298,14 @@ pg_ceil_log2_64(uint64 num) #endif #endif +/* + * On AArch64 builds, try using SVE popcount instructions, but only if + * we can verify that the CPU supports it via a runtime check. + */ +#if defined(USE_SVE_POPCNT_WITH_RUNTIME_CHECK) +#define TRY_POPCNT_FAST 1 +#endif + #ifdef TRY_POPCNT_FAST /* Attempt to use the POPCNT instruction, but perform a runtime check first */ extern PGDLLIMPORT int (*pg_popcount32) (uint32 word); @@ -317,6 +325,12 @@ extern uint64 pg_popcount_avx512(const char *buf, int bytes); extern uint64 pg_popcount_masked_avx512(const char *buf, int bytes, bits8 mask); #endif +#ifdef USE_SVE_POPCNT_WITH_RUNTIME_CHECK +extern bool pg_popcount_sve_available(void); +extern uint64 pg_popcount_sve(const char *buf, int bytes); +extern uint64 pg_popcount_masked_sve(const char *buf, int bytes, bits8 mask); +#endif + #else /* Use a portable implementation -- no need for a function pointer. */ extern int pg_popcount32(uint32 word); diff --git a/src/makefiles/meson.build b/src/makefiles/meson.build index aba7411a1b..c0207426c2 100644 --- a/src/makefiles/meson.build +++ b/src/makefiles/meson.build @@ -102,6 +102,7 @@ pgxs_kv = { ' '.join(cflags_no_missing_var_decls), 'CFLAGS_CRC': ' '.join(cflags_crc), + 'CFLAGS_POPCNT_ARM': ' '.join(cflags_popcnt_arm) 'CFLAGS_UNROLL_LOOPS': ' '.join(unroll_loops_cflags), 'CFLAGS_VECTORIZE': ' '.join(vectorize_cflags), @@ -179,7 +180,7 @@ pgxs_empty = [ 'WANTED_LANGUAGES', # Not needed because we don't build the server / PLs with the generated makefile - 'LIBOBJS', 'PG_CRC32C_OBJS', 'TAS', + 'LIBOBJS', 'PG_CRC32C_OBJS', 'PG_POPCNT_OBJS', 'PG_POPCNT_OBJS_ARM', 'TAS', 'PG_TEST_EXTRA', 'DTRACEFLAGS', # only server has dtrace probes diff --git a/src/port/Makefile b/src/port/Makefile index 4c22431951..2e04ea4d5a 100644 --- a/src/port/Makefile +++ b/src/port/Makefile @@ -38,6 +38,7 @@ LIBS += $(PTHREAD_LIBS) OBJS = \ $(LIBOBJS) \ $(PG_CRC32C_OBJS) \ + $(PG_POPCNT_OBJS_ARM) \ bsearch_arg.o \ chklocale.o \ inet_net_ntop.o \ @@ -87,6 +88,16 @@ pg_crc32c_armv8.o: CFLAGS+=$(CFLAGS_CRC) pg_crc32c_armv8_shlib.o: CFLAGS+=$(CFLAGS_CRC) pg_crc32c_armv8_srv.o: CFLAGS+=$(CFLAGS_CRC) +# all version of pg_popcount_sve.o need CFLAGS_POPCNT_ARM +pg_popcount_sve.o: CFLAGS+=$(CFLAGS_POPCNT_ARM) +pg_popcount_sve_shlib.o: CFLAGS+=$(CFLAGS_POPCNT_ARM) +pg_popcount_sve_srv.o: CFLAGS+=$(CFLAGS_POPCNT_ARM) + +# all versions of pg_popcount_sve_choose.o need CFLAGS_POPCNT_ARM +pg_popcount_sve_choose.o: CFLAGS+=$(CFLAGS_POPCNT_ARM) +pg_popcount_sve_choose_shlib.o: CFLAGS+=$(CFLAGS_POPCNT_ARM) +pg_popcount_sve_choose_srv.o: CFLAGS+=$(CFLAGS_POPCNT_ARM) + # # Shared library versions of object files # diff --git a/src/port/meson.build b/src/port/meson.build index c5bceed9cd..21d686a26e 100644 --- a/src/port/meson.build +++ b/src/port/meson.build @@ -91,6 +91,8 @@ replace_funcs_pos = [ ['pg_crc32c_armv8', 'USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK', 'crc'], ['pg_crc32c_armv8_choose', 'USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK'], ['pg_crc32c_sb8', 'USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK'], + ['pg_popcount_sve', 'USE_SVE_POPCNT_WITH_RUNTIME_CHECK', 'popcnt'], + ['pg_popcount_sve_choose', 'USE_SVE_POPCNT_WITH_RUNTIME_CHECK'], # loongarch ['pg_crc32c_loongarch', 'USE_LOONGARCH_CRC32C'], @@ -99,7 +101,7 @@ replace_funcs_pos = [ ['pg_crc32c_sb8', 'USE_SLICING_BY_8_CRC32C'], ] -pgport_cflags = {'crc': cflags_crc} +pgport_cflags = {'crc': cflags_crc, 'popcnt': cflags_popcnt + cflags_popcnt_arm} pgport_sources_cflags = {'crc': []} foreach f : replace_funcs_neg diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c index c8399981ee..6b2e6b3794 100644 --- a/src/port/pg_bitutils.c +++ b/src/port/pg_bitutils.c @@ -135,7 +135,9 @@ pg_popcount_available(void) { unsigned int exx[4] = {0, 0, 0, 0}; -#if defined(HAVE__GET_CPUID) +#if defined(__aarch64__) + return false; /* cpuid not available in __aarch64__ */ +#elif defined(HAVE__GET_CPUID) __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); #elif defined(HAVE__CPUID) __cpuid(exx, 1); @@ -176,6 +178,12 @@ choose_popcount_functions(void) pg_popcount_optimized = pg_popcount_avx512; pg_popcount_masked_optimized = pg_popcount_masked_avx512; } +#elif USE_SVE_POPCNT_WITH_RUNTIME_CHECK + if (pg_popcount_sve_available()) + { + pg_popcount_optimized = pg_popcount_sve; + pg_popcount_masked_optimized = pg_popcount_masked_sve; + } #endif } diff --git a/src/port/pg_popcount_sve.c b/src/port/pg_popcount_sve.c new file mode 100644 index 0000000000..c2a3a4cba0 --- /dev/null +++ b/src/port/pg_popcount_sve.c @@ -0,0 +1,134 @@ +/*------------------------------------------------------------------------- + * + * pg_popcount_sve.c + * Holds the SVE pg_popcount() implementation. + * + * Copyright (c) 2024, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/port/pg_popcount_sve.c + * + *------------------------------------------------------------------------- + */ +#include "c.h" +#include "port/pg_bitutils.h" + +#include + +#ifdef USE_SVE_POPCNT_WITH_RUNTIME_CHECK + +/* + * pg_popcount_sve + * Returns the number of 1-bits in buf + */ +uint64 +pg_popcount_sve(const char *buf, int bytes) +{ + svbool_t pred; + svuint64_t vec64, + accum1 = svdup_u64(0), + accum2 = svdup_u64(0); + uint32 i = 0, + vec_len = svcntb(), + pre_align, + loop_bytes; + uint64 popcnt = 0; + const char *aligned = (const char *) TYPEALIGN_DOWN(sizeof(uint64_t), buf); + + /* + * For smaller inputs, aligning the buffer degrades the performance. + * Therefore, we align the buffers only when the input size is sufficiently large. + */ + if (aligned != buf && bytes > 4 * vec_len) + { + pre_align = aligned + sizeof(uint64_t) - buf; + pred = svwhilelt_b8(0U, pre_align); + popcnt = svaddv(pred, svcnt_z(pred, svld1(pred, (const uint8 *) buf))); + buf += pre_align; + bytes -= pre_align; + } + + pred = svptrue_b64(); + loop_bytes = bytes & ~(vec_len * 2 - 1); + + /* Process 2 complete vectors */ + for (; i < loop_bytes; i += vec_len * 2) + { + vec64 = svld1(pred, (const uint64 *) (buf + i)); + accum1 = svadd_x(pred, accum1, svcnt_x(pred, vec64)); + vec64 = svld1(pred, (const uint64 *) (buf + i + vec_len)); + accum2 = svadd_x(pred, accum2, svcnt_x(pred, vec64)); + } + + popcnt += svaddv(pred, svadd_x(pred, accum1, accum2)); /* reduce the accumulators */ + + /* Process the last incomplete vector */ + for(; i < bytes; i += vec_len) + { + pred = svwhilelt_b8(i, (uint32) bytes); + popcnt += svaddv(pred, svcnt_z(pred, svld1(pred, (const uint8 *) (buf + i)))); + } + + return popcnt; +} + +/* + * pg_popcount_masked_sve + * Returns the number of 1-bits in buf after applying the mask to each byte + */ +uint64 +pg_popcount_masked_sve(const char *buf, int bytes, bits8 mask) +{ + svbool_t pred; + svuint8_t vec8; + svuint64_t vec64, + accum1 = svdup_u64(0), + accum2 = svdup_u64(0); + uint32 i = 0, + vec_len = svcntb(), + pre_align, + loop_bytes; + uint64 popcnt = 0, + mask64 = ~UINT64CONST(0) / 0xFF * mask; + const char *aligned = (const char *) TYPEALIGN_DOWN(sizeof(uint64_t), buf); + + /* + * For smaller inputs, aligning the buffer degrades the performance. + * Therefore, we align the buffers only when the input size is sufficiently large. + */ + if (aligned != buf && bytes > 4 * vec_len) + { + pre_align = aligned + sizeof(uint64_t) - buf; + pred = svwhilelt_b8(0U, pre_align); + vec8 = svand_n_u8_m(pred, svld1(pred, (const uint8 *) buf), mask); /* load and mask */ + popcnt = svaddv(pred, svcnt_z(pred, vec8)); + buf += pre_align; + bytes -= pre_align; + } + + pred = svptrue_b64(); + loop_bytes = bytes & ~(vec_len * 2 - 1); + + /* Process 2 complete vectors */ + for (; i < loop_bytes; i += vec_len * 2) + { + vec64 = svand_n_u64_x(pred, svld1(pred, (const uint64 *) (buf + i)), mask64); + accum1 = svadd_x(pred, accum1, svcnt_x(pred, vec64)); + vec64 = svand_n_u64_x(pred, svld1(pred, (const uint64 *) (buf + i + vec_len)), mask64); + accum2 = svadd_x(pred, accum2, svcnt_x(pred, vec64)); + } + + popcnt += svaddv(pred, svadd_x(pred, accum1, accum2)); /* reduce the accumulators */ + + /* Process the last incomplete vectors */ + for(; i < bytes; i += vec_len) + { + pred = svwhilelt_b8(i, (uint32) bytes); + vec8 = svand_n_u8_m(pred, svld1(pred, (const uint8 *) (buf + i)), mask); + popcnt += svaddv(pred, svcnt_z(pred, vec8)); + } + + return popcnt; +} + +#endif /* USE_SVE_POPCNT_WITH_RUNTIME_CHECK */ diff --git a/src/port/pg_popcount_sve_choose.c b/src/port/pg_popcount_sve_choose.c new file mode 100644 index 0000000000..5f4e164f9c --- /dev/null +++ b/src/port/pg_popcount_sve_choose.c @@ -0,0 +1,32 @@ +/*------------------------------------------------------------------------- + * + * pg_popcount_sve_choose.c + * Test whether we can use the SVE pg_popcount() implementation. + * + * Copyright (c) 2024, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/port/pg_popcount_sve_choose.c + * + *------------------------------------------------------------------------- + */ +#include "c.h" +#include "port/pg_bitutils.h" + +#include +#include + +#ifdef USE_SVE_POPCNT_WITH_RUNTIME_CHECK + +/* + * Returns true if the CPU supports the instructions required for the SVE + * pg_popcount() implementation. + */ +bool +pg_popcount_sve_available(void) +{ + unsigned long hwcap = getauxval(AT_HWCAP); /* get the HWCAP flags */ + return (hwcap & HWCAP_SVE) != 0; /* return true if SVE is supported */ +} + +#endif /* USE_SVE_POPCNT_WITH_RUNTIME_CHECK */ -- 2.34.1