From 16693caca491f9d52cff463dfc85bbbd54df9064 Mon Sep 17 00:00:00 2001 From: Paul Amonson Date: Tue, 23 Jul 2024 11:23:23 -0700 Subject: [PATCH] [Refactor] Move all HW checks to common file. Signed-off-by: Paul Amonson --- configure | 12 +- configure.ac | 2 +- src/include/port/pg_bitutils.h | 1 - src/include/port/pg_hw_feat_check.h | 33 ++++++ src/port/Makefile | 9 +- src/port/meson.build | 2 +- src/port/pg_bitutils.c | 22 +--- src/port/pg_crc32c_sse42_choose.c | 27 +---- src/port/pg_hw_feat_check.c | 159 +++++++++++++++++++++++++++ src/port/pg_popcount_avx512_choose.c | 102 ----------------- 10 files changed, 208 insertions(+), 161 deletions(-) create mode 100644 src/include/port/pg_hw_feat_check.h create mode 100644 src/port/pg_hw_feat_check.c delete mode 100644 src/port/pg_popcount_avx512_choose.c diff --git a/configure b/configure index 2abbeb2794..5be6fb4d5f 100755 --- a/configure +++ b/configure @@ -14868,7 +14868,7 @@ else We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; @@ -14914,7 +14914,7 @@ else We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; @@ -14938,7 +14938,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; @@ -14983,7 +14983,7 @@ else We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; @@ -15007,7 +15007,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; @@ -17674,7 +17674,7 @@ fi fi if test x"$pgac_avx512_popcnt_intrinsics" = x"yes"; then - PG_POPCNT_OBJS="pg_popcount_avx512.o pg_popcount_avx512_choose.o" + PG_POPCNT_OBJS="pg_popcount_avx512.o" $as_echo "#define USE_AVX512_POPCNT_WITH_RUNTIME_CHECK 1" >>confdefs.h diff --git a/configure.ac b/configure.ac index c46ed2c591..2e64f53898 100644 --- a/configure.ac +++ b/configure.ac @@ -2090,7 +2090,7 @@ if test x"$host_cpu" = x"x86_64"; then PGAC_AVX512_POPCNT_INTRINSICS([-mavx512vpopcntdq -mavx512bw]) fi if test x"$pgac_avx512_popcnt_intrinsics" = x"yes"; then - PG_POPCNT_OBJS="pg_popcount_avx512.o pg_popcount_avx512_choose.o" + PG_POPCNT_OBJS="pg_popcount_avx512.o" AC_DEFINE(USE_AVX512_POPCNT_WITH_RUNTIME_CHECK, 1, [Define to 1 to use AVX-512 popcount instructions with a runtime check.]) fi fi diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h index 4d88478c9c..263f27930d 100644 --- a/src/include/port/pg_bitutils.h +++ b/src/include/port/pg_bitutils.h @@ -312,7 +312,6 @@ extern PGDLLIMPORT uint64 (*pg_popcount_masked_optimized) (const char *buf, int * files. */ #ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK -extern bool pg_popcount_avx512_available(void); extern uint64 pg_popcount_avx512(const char *buf, int bytes); extern uint64 pg_popcount_masked_avx512(const char *buf, int bytes, bits8 mask); #endif diff --git a/src/include/port/pg_hw_feat_check.h b/src/include/port/pg_hw_feat_check.h new file mode 100644 index 0000000000..58be900b54 --- /dev/null +++ b/src/include/port/pg_hw_feat_check.h @@ -0,0 +1,33 @@ +/*------------------------------------------------------------------------- + * + * pg_hw_feat_check.h + * Miscellaneous functions for cheing for hardware features at runtime. + * + * + * Copyright (c) 2024, PostgreSQL Global Development Group + * + * src/include/port/pg_hw_feat_check.h + * + *------------------------------------------------------------------------- + */ +#ifndef PG_HW_FEAT_CHECK_H +#define PG_HW_FEAT_CHECK_H + +/* + * Test to see if all hardware features required by SSE 4.2 crc32c (64 bit) + * are available. + */ +extern PGDLLIMPORT bool pg_crc32c_sse42_available(void); + +/* + * Test to see if all hardware features required by SSE 4.1 POPCNT (64 bit) + * are available. + */ +extern PGDLLIMPORT bool pg_popcount_available(void); + +/* + * Test to see if all hardware features required by AVX-512 POPCNT are + * available. + */ +extern PGDLLIMPORT bool pg_popcount_avx512_available(void); +#endif /* PG_HW_FEAT_CHECK_H */ diff --git a/src/port/Makefile b/src/port/Makefile index db7c02117b..b18710eeef 100644 --- a/src/port/Makefile +++ b/src/port/Makefile @@ -45,6 +45,7 @@ OBJS = \ noblock.o \ path.o \ pg_bitutils.o \ + pg_hw_feat_check.o \ pg_strong_random.o \ pgcheckdir.o \ pgmkdirp.o \ @@ -93,10 +94,10 @@ pg_crc32c_armv8.o: CFLAGS+=$(CFLAGS_CRC) pg_crc32c_armv8_shlib.o: CFLAGS+=$(CFLAGS_CRC) pg_crc32c_armv8_srv.o: CFLAGS+=$(CFLAGS_CRC) -# all versions of pg_popcount_avx512_choose.o need CFLAGS_XSAVE -pg_popcount_avx512_choose.o: CFLAGS+=$(CFLAGS_XSAVE) -pg_popcount_avx512_choose_shlib.o: CFLAGS+=$(CFLAGS_XSAVE) -pg_popcount_avx512_choose_srv.o: CFLAGS+=$(CFLAGS_XSAVE) +# all versions of pg_hw_feat_check.o need CFLAGS_XSAVE +pg_hw_feat_check.o: CFLAGS+=$(CFLAGS_XSAVE) +pg_hw_feat_check_shlib.o: CFLAGS+=$(CFLAGS_XSAVE) +pg_hw_feat_check_srv.o: CFLAGS+=$(CFLAGS_XSAVE) # all versions of pg_popcount_avx512.o need CFLAGS_POPCNT pg_popcount_avx512.o: CFLAGS+=$(CFLAGS_POPCNT) diff --git a/src/port/meson.build b/src/port/meson.build index ff54b7b53e..f8cafc4bd4 100644 --- a/src/port/meson.build +++ b/src/port/meson.build @@ -86,7 +86,7 @@ replace_funcs_pos = [ ['pg_crc32c_sse42_choose', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'], ['pg_crc32c_sb8', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'], ['pg_popcount_avx512', 'USE_AVX512_POPCNT_WITH_RUNTIME_CHECK', 'popcnt'], - ['pg_popcount_avx512_choose', 'USE_AVX512_POPCNT_WITH_RUNTIME_CHECK', 'xsave'], + ['pg_hw_feat_check', 'HAVE_XSAVE_INTRINSICS', 'xsave'], # arm / aarch64 ['pg_crc32c_armv8', 'USE_ARMV8_CRC32C'], diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c index 87f56e82b8..b2823d5732 100644 --- a/src/port/pg_bitutils.c +++ b/src/port/pg_bitutils.c @@ -20,7 +20,7 @@ #endif #include "port/pg_bitutils.h" - +#include "port/pg_hw_feat_check.h" /* * Array giving the position of the left-most set bit for each possible @@ -109,7 +109,6 @@ static uint64 pg_popcount_slow(const char *buf, int bytes); static uint64 pg_popcount_masked_slow(const char *buf, int bytes, bits8 mask); #ifdef TRY_POPCNT_FAST -static bool pg_popcount_available(void); static int pg_popcount32_choose(uint32 word); static int pg_popcount64_choose(uint64 word); static uint64 pg_popcount_choose(const char *buf, int bytes); @@ -127,25 +126,6 @@ uint64 (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask) #ifdef TRY_POPCNT_FAST -/* - * Return true if CPUID indicates that the POPCNT instruction is available. - */ -static bool -pg_popcount_available(void) -{ - unsigned int exx[4] = {0, 0, 0, 0}; - -#if defined(HAVE__GET_CPUID) - __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); -#elif defined(HAVE__CPUID) - __cpuid(exx, 1); -#else -#error cpuid instruction not available -#endif - - return (exx[2] & (1 << 23)) != 0; /* POPCNT */ -} - /* * These functions get called on the first call to pg_popcount32 etc. * They detect whether we can use the asm implementations, and replace diff --git a/src/port/pg_crc32c_sse42_choose.c b/src/port/pg_crc32c_sse42_choose.c index 56d600f3a9..36e6949362 100644 --- a/src/port/pg_crc32c_sse42_choose.c +++ b/src/port/pg_crc32c_sse42_choose.c @@ -20,31 +20,8 @@ #include "c.h" -#ifdef HAVE__GET_CPUID -#include -#endif - -#ifdef HAVE__CPUID -#include -#endif - #include "port/pg_crc32c.h" - -static bool -pg_crc32c_sse42_available(void) -{ - unsigned int exx[4] = {0, 0, 0, 0}; - -#if defined(HAVE__GET_CPUID) - __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); -#elif defined(HAVE__CPUID) - __cpuid(exx, 1); -#else -#error cpuid instruction not available -#endif - - return (exx[2] & (1 << 20)) != 0; /* SSE 4.2 */ -} +#include "port/pg_hw_feat_check.h" /* * This gets called on the first call. It replaces the function pointer @@ -61,4 +38,4 @@ pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len) return pg_comp_crc32c(crc, data, len); } -pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose; +pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose; diff --git a/src/port/pg_hw_feat_check.c b/src/port/pg_hw_feat_check.c new file mode 100644 index 0000000000..455005add5 --- /dev/null +++ b/src/port/pg_hw_feat_check.c @@ -0,0 +1,159 @@ +/*------------------------------------------------------------------------- + * + * pg_hw_feat_check.c + * Test for hardware features at runtime on x86_64 platforms. + * + * Copyright (c) 2024, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/port/pg_hw_feat_check.c + * + *------------------------------------------------------------------------- + */ +#include "c.h" + +#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT) +#include +#endif + +#include + +#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX) +#include +#endif + +#include "port/pg_hw_feat_check.h" + +/* Define names for EXX registers to avoid hard to see bugs in code below. */ +typedef unsigned int exx_t; +typedef enum +{ + EAX = 0, + EBX = 1, + ECX = 2, + EDX = 3 +} reg_name; + +/* + * Helper function. + * Test for a bit being set in a exx_t register. + */ +inline static bool is_bit_set_in_exx(exx_t* regs, reg_name ex, int bit) +{ + return ((regs[ex] & (1 << bit)) != 0); +} + +/* + * x86_64 Platform CPUID check for Linux and Visual Studio platforms. + */ +inline static void +pg_getcpuid(unsigned int leaf, exx_t *exx) +{ +#if defined(HAVE__GET_CPUID) + __get_cpuid(leaf, &exx[0], &exx[1], &exx[2], &exx[3]); +#elif defined(HAVE__CPUID) + __cpuid(exx, 1); +#else +#error cpuid instruction not available +#endif +} + +/* + * x86_64 Platform CPUIDEX check for Linux and Visual Studio platforms. + */ +inline static void +pg_getcpuidex(unsigned int leaf, unsigned int subleaf, exx_t *exx) +{ +#if defined(HAVE__GET_CPUID_COUNT) + __get_cpuid_count(leaf, subleaf, &exx[0], &exx[1], &exx[2], &exx[3]); +#elif defined(HAVE__CPUIDEX) + __cpuidex(exx, 7, 0); +#else +#error cpuid instruction not available +#endif +} + +/* + * Check for CPU support for CPUID: osxsave + */ +inline static bool +osxsave_available(void) +{ +#if defined(HAVE_XSAVE_INTRINSICS) + exx_t exx[4] = {0, 0, 0, 0}; + + pg_getcpuid(1, exx); + + return is_bit_set_in_exx(exx, ECX, 27); /* osxsave */ +#else + return false; +#endif +} + +/* + * Does XGETBV say the ZMM registers are enabled? + * + * NB: Caller is responsible for verifying that osxsave_available() returns true + * before calling this. + */ +inline static bool +zmm_regs_available(void) +{ +#if defined(HAVE_XSAVE_INTRINSICS) + return (_xgetbv(0) & 0xe6) == 0xe6; +#else + return false; +#endif +} + +/* + * Does CPUID say there's support for AVX-512 popcount and byte-and-word + * instructions? + */ +inline static bool +avx512_popcnt_available(void) +{ + exx_t exx[4] = {0, 0, 0, 0}; + + pg_getcpuidex(7, 0, exx); + + return is_bit_set_in_exx(exx, ECX, 14) && is_bit_set_in_exx(exx, EBX, 30); +} + +/* + * Return true if CPUID indicates that the POPCNT instruction is available. + */ +bool PGDLLIMPORT pg_popcount_available(void) +{ + exx_t exx[4] = {0, 0, 0, 0}; + + pg_getcpuid(1, exx); + + return is_bit_set_in_exx(exx, ECX, 23); + } + + /* + * Returns true if the CPU supports the instructions required for the AVX-512 + * pg_popcount() implementation. + * + * PA: The call to 'osxsave_available' MUST preceed the call to + * 'zmm_regs_available' function per NB above. + */ +bool PGDLLIMPORT pg_popcount_avx512_available(void) +{ + return osxsave_available() && + zmm_regs_available() && + avx512_popcnt_available(); +} + +/* + * Does CPUID say there's support for SSE 4.2? + */ +bool PGDLLIMPORT pg_crc32c_sse42_available(void) +{ + exx_t exx[4] = {0, 0, 0, 0}; + + pg_getcpuid(1, exx); + + return is_bit_set_in_exx(exx, ECX, 20); +} diff --git a/src/port/pg_popcount_avx512_choose.c b/src/port/pg_popcount_avx512_choose.c deleted file mode 100644 index b37107803a..0000000000 --- a/src/port/pg_popcount_avx512_choose.c +++ /dev/null @@ -1,102 +0,0 @@ -/*------------------------------------------------------------------------- - * - * pg_popcount_avx512_choose.c - * Test whether we can use the AVX-512 pg_popcount() implementation. - * - * Copyright (c) 2024, PostgreSQL Global Development Group - * - * IDENTIFICATION - * src/port/pg_popcount_avx512_choose.c - * - *------------------------------------------------------------------------- - */ -#include "c.h" - -#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT) -#include -#endif - -#ifdef HAVE_XSAVE_INTRINSICS -#include -#endif - -#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX) -#include -#endif - -#include "port/pg_bitutils.h" - -/* - * It's probably unlikely that TRY_POPCNT_FAST won't be set if we are able to - * use AVX-512 intrinsics, but we check it anyway to be sure. We piggy-back on - * the function pointers that are only used when TRY_POPCNT_FAST is set. - */ -#ifdef TRY_POPCNT_FAST - -/* - * Does CPUID say there's support for XSAVE instructions? - */ -static inline bool -xsave_available(void) -{ - unsigned int exx[4] = {0, 0, 0, 0}; - -#if defined(HAVE__GET_CPUID) - __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); -#elif defined(HAVE__CPUID) - __cpuid(exx, 1); -#else -#error cpuid instruction not available -#endif - return (exx[2] & (1 << 27)) != 0; /* osxsave */ -} - -/* - * Does XGETBV say the ZMM registers are enabled? - * - * NB: Caller is responsible for verifying that xsave_available() returns true - * before calling this. - */ -static inline bool -zmm_regs_available(void) -{ -#ifdef HAVE_XSAVE_INTRINSICS - return (_xgetbv(0) & 0xe6) == 0xe6; -#else - return false; -#endif -} - -/* - * Does CPUID say there's support for AVX-512 popcount and byte-and-word - * instructions? - */ -static inline bool -avx512_popcnt_available(void) -{ - unsigned int exx[4] = {0, 0, 0, 0}; - -#if defined(HAVE__GET_CPUID_COUNT) - __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]); -#elif defined(HAVE__CPUIDEX) - __cpuidex(exx, 7, 0); -#else -#error cpuid instruction not available -#endif - return (exx[2] & (1 << 14)) != 0 && /* avx512-vpopcntdq */ - (exx[1] & (1 << 30)) != 0; /* avx512-bw */ -} - -/* - * Returns true if the CPU supports the instructions required for the AVX-512 - * pg_popcount() implementation. - */ -bool -pg_popcount_avx512_available(void) -{ - return xsave_available() && - zmm_regs_available() && - avx512_popcnt_available(); -} - -#endif /* TRY_POPCNT_FAST */ -- 2.34.1