Proposal for enabling auto-vectorization for checksum calculations
Hello,
This patch enables more compiler autovectorization for the checksum
calculations.
This code is particularly well suited for autovectorization, so just
adding pg_attribute_target and some simple dynamic dispatch logic we
can get improved vectorization.
This gives about a 2x speedup in a synthetic benchmark for
pg_checksum, which is also included as a seperate patch file.
Additionally, another 2x performance increase in the synthetic
benchmark with AVX2 can be obtained if N_SUMS was changed to 64.
However, this would change the results of the checksum. This isn't
included in this patch, but I think it is worth considering for the
future
One additional factor, without explicitly passing some optimization
flag like -O2 the makefile build won't autovectorize any of the code.
However, the meson based build does this automatically.
Attachments:
v1-0001-Enable-autovectorizing-pg_checksum_block.patchapplication/octet-stream; name=v1-0001-Enable-autovectorizing-pg_checksum_block.patchDownload
From e485110192c208becad0153e51a39514cace1377 Mon Sep 17 00:00:00 2001
From: Matthew Sterrett <matthew.sterrett@intel.com>
Date: Fri, 7 Mar 2025 11:33:45 -0800
Subject: [PATCH v1 1/2] Enable autovectorizing pg_checksum_block
---
config/c-compiler.m4 | 31 ++++++
configure | 52 ++++++++++
configure.ac | 9 ++
meson.build | 28 ++++++
src/include/pg_config.h.in | 3 +
src/include/storage/checksum_impl.h | 150 +++++++++++++++++++++++-----
6 files changed, 250 insertions(+), 23 deletions(-)
diff --git a/config/c-compiler.m4 b/config/c-compiler.m4
index 8534cc54c1..143a8a40dd 100644
--- a/config/c-compiler.m4
+++ b/config/c-compiler.m4
@@ -670,6 +670,37 @@ fi
undefine([Ac_cachevar])dnl
])# PGAC_XSAVE_INTRINSICS
+# PGAC_AVX2_SUPPORT
+# -----------------------------
+# Check if the compiler supports AVX2 in attribute((target))
+# and using AVX2 intrinsics in those functions
+#
+# If the intrinsics are supported, sets pgac_avx2_support.
+AC_DEFUN([PGAC_AVX2_SUPPORT],
+[define([Ac_cachevar], [AS_TR_SH([pgac_cv_avx2_support])])dnl
+AC_CACHE_CHECK([for AVX2 support], [Ac_cachevar],
+[AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <immintrin.h>
+ #include <stdint.h>
+ #if defined(__has_attribute) && __has_attribute (target)
+ __attribute__((target("avx2")))
+ #endif
+ static int avx2_test(void)
+ {
+ const char buf@<:@sizeof(__m256i)@:>@;
+ __m256i accum = _mm256_loadu_si256((const __m256i *) buf);
+ accum = _mm256_add_epi32(accum, accum);
+ int result = _mm256_extract_epi32(accum, 0);
+ return (int) result;
+ }],
+ [return avx2_test();])],
+ [Ac_cachevar=yes],
+ [Ac_cachevar=no])])
+if test x"$Ac_cachevar" = x"yes"; then
+ pgac_avx2_support=yes
+fi
+undefine([Ac_cachevar])dnl
+])# PGAC_AVX2_SUPPORT
+
# PGAC_AVX512_POPCNT_INTRINSICS
# -----------------------------
# Check if the compiler supports the AVX-512 popcount instructions using the
diff --git a/configure b/configure
index ceeef9b091..87080b8844 100755
--- a/configure
+++ b/configure
@@ -17113,6 +17113,58 @@ $as_echo "#define HAVE_XSAVE_INTRINSICS 1" >>confdefs.h
fi
+# Check for AVX2 target and intrinsic support
+#
+if test x"$host_cpu" = x"x86_64"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX2 support" >&5
+$as_echo_n "checking for AVX2 support... " >&6; }
+if ${pgac_cv_avx2_support+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <immintrin.h>
+ #include <stdint.h>
+ #if defined(__has_attribute) && __has_attribute (target)
+ __attribute__((target("avx2")))
+ #endif
+ static int avx2_test(void)
+ {
+ const char buf[sizeof(__m256i)];
+ __m256i accum = _mm256_loadu_si256((const __m256i *) buf);
+ accum = _mm256_add_epi32(accum, accum);
+ int result = _mm256_extract_epi32(accum, 0);
+ return (int) result;
+ }
+int
+main ()
+{
+return avx2_test();
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ pgac_cv_avx2_support=yes
+else
+ pgac_cv_avx2_support=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_avx2_support" >&5
+$as_echo "$pgac_cv_avx2_support" >&6; }
+if test x"$pgac_cv_avx2_support" = x"yes"; then
+ pgac_avx2_support=yes
+fi
+
+ if test x"$pgac_avx2_support" = x"yes"; then
+
+$as_echo "#define USE_AVX2_WITH_RUNTIME_CHECK 1" >>confdefs.h
+
+ fi
+fi
+
# Check for AVX-512 popcount intrinsics
#
if test x"$host_cpu" = x"x86_64"; then
diff --git a/configure.ac b/configure.ac
index d713360f34..7effbf40e2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2012,6 +2012,15 @@ if test x"$pgac_xsave_intrinsics" = x"yes"; then
AC_DEFINE(HAVE_XSAVE_INTRINSICS, 1, [Define to 1 if you have XSAVE intrinsics.])
fi
+# Check for AVX2 target and intrinsic support
+#
+if test x"$host_cpu" = x"x86_64"; then
+ PGAC_AVX2_SUPPORT()
+ if test x"$pgac_avx2_support" = x"yes"; then
+ AC_DEFINE(USE_AVX2_WITH_RUNTIME_CHECK, 1, [Define to 1 to use AVX2 instructions with a runtime check.])
+ fi
+fi
+
# Check for AVX-512 popcount intrinsics
#
if test x"$host_cpu" = x"x86_64"; then
diff --git a/meson.build b/meson.build
index 8e128f4982..4fd96fee1d 100644
--- a/meson.build
+++ b/meson.build
@@ -2159,6 +2159,34 @@ int main(void)
endif
+###############################################################
+# Check for the availability of AVX2 support
+###############################################################
+
+if host_cpu == 'x86_64'
+
+ prog = '''
+#include <immintrin.h>
+#include <stdint.h>
+#if defined(__has_attribute) && __has_attribute (target)
+__attribute__((target("avx2")))
+#endif
+int main(void)
+{
+ const char buf[sizeof(__m256i)];
+ __m256i accum = _mm256_loadu_si256((const __m256i *) buf);
+ accum = _mm256_add_epi32(accum, accum);
+ int result = _mm256_extract_epi32(accum, 0);
+ return (int) result;
+}
+'''
+
+ if cc.links(prog, name: 'AVX2 support', args: test_c_args)
+ cdata.set('USE_AVX2_WITH_RUNTIME_CHECK', 1)
+ endif
+
+endif
+
###############################################################
# Check for the availability of AVX-512 popcount intrinsics.
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 07b2f798ab..0fcd9f05a7 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -645,6 +645,9 @@
/* Define to 1 to build with assertion checks. (--enable-cassert) */
#undef USE_ASSERT_CHECKING
+/* Define to 1 to use AVX2 instructions with a runtime check. */
+#undef USE_AVX2_WITH_RUNTIME_CHECK
+
/* Define to 1 to use AVX-512 popcount instructions with a runtime check. */
#undef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
diff --git a/src/include/storage/checksum_impl.h b/src/include/storage/checksum_impl.h
index da87d61ba5..5ea1f698b5 100644
--- a/src/include/storage/checksum_impl.h
+++ b/src/include/storage/checksum_impl.h
@@ -101,12 +101,83 @@
*/
#include "storage/bufpage.h"
+#include "pg_config.h"
/* number of checksums to calculate in parallel */
#define N_SUMS 32
/* prime multiplier of FNV-1a hash */
#define FNV_PRIME 16777619
+#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT)
+#include <cpuid.h>
+#endif
+
+#include <immintrin.h>
+
+#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX)
+#include <intrin.h>
+#endif
+
+/*
+ * Does CPUID say there's support for XSAVE instructions?
+ */
+static inline bool
+xsave_available(void)
+{
+ unsigned int exx[4] = {0, 0, 0, 0};
+
+#if defined(HAVE__GET_CPUID)
+ __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUID)
+ __cpuid(exx, 1);
+#else
+#error cpuid instruction not available
+#endif
+ return (exx[2] & (1 << 27)) != 0; /* osxsave */
+}
+
+/*
+ * Does XGETBV say the YMM registers are enabled?
+ *
+ * NB: Caller is responsible for verifying that xsave_available() returns true
+ * before calling this.
+ */
+#ifdef HAVE_XSAVE_INTRINSICS
+pg_attribute_target("xsave")
+#endif
+static inline bool
+ymm_regs_available(void)
+{
+#ifdef HAVE_XSAVE_INTRINSICS
+ return (_xgetbv(0) & 0x06) == 0x06;
+#else
+ return false;
+#endif
+}
+
+/*
+ * Does CPUID say there's support for AVX-2
+ */
+static inline bool
+avx2_available(void)
+{
+#ifdef USE_AVX2_WITH_RUNTIME_CHECK
+ unsigned int exx[4] = {0, 0, 0, 0};
+ if (!xsave_available() || !ymm_regs_available()) return false;
+
+#if defined(HAVE__GET_CPUID_COUNT)
+ __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUIDEX)
+ __cpuidex(exx, 7, 0);
+#else
+#error cpuid instruction not available
+#endif
+ return (exx[1] & (1 << 5)) != 0; /* avx2 */
+#else
+ return false;
+#endif
+}
+
/* Use a union so that this code is valid under strict aliasing */
typedef union
{
@@ -142,35 +213,68 @@ do { \
* Block checksum algorithm. The page must be adequately aligned
* (at least on 4-byte boundary).
*/
-static uint32
-pg_checksum_block(const PGChecksummablePage *page)
-{
- uint32 sums[N_SUMS];
- uint32 result = 0;
- uint32 i,
- j;
+
+#define PG_DECLARE_CHECKSUM_ISA(ISANAME) \
+static uint32 \
+pg_checksum_block_##ISANAME(const PGChecksummablePage *page);
+
+#define PG_DEFINE_CHECKSUM_ISA(ISANAME) \
+pg_attribute_target(#ISANAME) \
+static uint32 \
+pg_checksum_block_##ISANAME(const PGChecksummablePage *page) \
+{ \
+ uint32 sums[N_SUMS]; \
+ uint32 result = 0; \
+ uint32 i, \
+ j; \
+ \
+ /* ensure that the size is compatible with the algorithm */ \
+ Assert(sizeof(PGChecksummablePage) == BLCKSZ); \
+ \
+ /* initialize partial checksums to their corresponding offsets */ \
+ memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets)); \
+ \
+ /* main checksum calculation */ \
+ /* this is the main place that autovectorization occurs */ \
+ for (i = 0; i < (uint32) (BLCKSZ / (sizeof(uint32) * N_SUMS)); i++) \
+ for (j = 0; j < N_SUMS; j++) \
+ CHECKSUM_COMP(sums[j], page->data[i][j]); \
+ \
+ /* finally add in two rounds of zeroes for additional mixing */ \
+ for (i = 0; i < 2; i++) \
+ for (j = 0; j < N_SUMS; j++) \
+ CHECKSUM_COMP(sums[j], 0); \
+ \
+ /* xor fold partial checksums together */ \
+ for (i = 0; i < N_SUMS; i++) \
+ result ^= sums[i]; \
+ \
+ return result; \
+}
- /* ensure that the size is compatible with the algorithm */
- Assert(sizeof(PGChecksummablePage) == BLCKSZ);
+/* Declarations are always defined to make dynamic dispatch code simpler */
- /* initialize partial checksums to their corresponding offsets */
- memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets));
+PG_DECLARE_CHECKSUM_ISA(default);
+PG_DECLARE_CHECKSUM_ISA(avx2);
- /* main checksum calculation */
- for (i = 0; i < (uint32) (BLCKSZ / (sizeof(uint32) * N_SUMS)); i++)
- for (j = 0; j < N_SUMS; j++)
- CHECKSUM_COMP(sums[j], page->data[i][j]);
+PG_DEFINE_CHECKSUM_ISA(default);
+#ifdef USE_AVX2_WITH_RUNTIME_CHECK
+PG_DEFINE_CHECKSUM_ISA(avx2);
+#endif
- /* finally add in two rounds of zeroes for additional mixing */
- for (i = 0; i < 2; i++)
- for (j = 0; j < N_SUMS; j++)
- CHECKSUM_COMP(sums[j], 0);
+static uint32
+pg_checksum_block_dispatch(const PGChecksummablePage *page);
- /* xor fold partial checksums together */
- for (i = 0; i < N_SUMS; i++)
- result ^= sums[i];
+static uint32 (*pg_checksum_block)(const PGChecksummablePage *page) = pg_checksum_block_dispatch;
- return result;
+static uint32
+pg_checksum_block_dispatch(const PGChecksummablePage *page){
+ if (avx2_available()){
+ pg_checksum_block = pg_checksum_block_avx2;
+ }else{
+ pg_checksum_block = pg_checksum_block_default;
+ }
+ return pg_checksum_block(page);
}
/*
--
2.43.0
v1-0002-Benchmark-code-for-postgres-checksums.patchapplication/octet-stream; name=v1-0002-Benchmark-code-for-postgres-checksums.patchDownload
From 0013d718f95abb2ea7997e4dc374d80c5560ff2b Mon Sep 17 00:00:00 2001
From: Matthew Sterrett <matthew.sterrett@intel.com>
Date: Fri, 7 Mar 2025 11:33:27 -0800
Subject: [PATCH v1 2/2] Benchmark code for postgres checksums
---
contrib/meson.build | 1 +
contrib/pg_checksum_bench/meson.build | 23 +++++++++++++
.../pg_checksum_bench--1.0.sql | 8 +++++
contrib/pg_checksum_bench/pg_checksum_bench.c | 34 +++++++++++++++++++
.../pg_checksum_bench.control | 4 +++
.../sql/pg_checksum_bench.sql | 17 ++++++++++
6 files changed, 87 insertions(+)
create mode 100644 contrib/pg_checksum_bench/meson.build
create mode 100644 contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql
create mode 100644 contrib/pg_checksum_bench/pg_checksum_bench.c
create mode 100644 contrib/pg_checksum_bench/pg_checksum_bench.control
create mode 100644 contrib/pg_checksum_bench/sql/pg_checksum_bench.sql
diff --git a/contrib/meson.build b/contrib/meson.build
index 1ba73ebd67..d7cad22ac8 100644
--- a/contrib/meson.build
+++ b/contrib/meson.build
@@ -12,6 +12,7 @@ contrib_doc_args = {
'install_dir': contrib_doc_dir,
}
+subdir('pg_checksum_bench')
subdir('amcheck')
subdir('auth_delay')
subdir('auto_explain')
diff --git a/contrib/pg_checksum_bench/meson.build b/contrib/pg_checksum_bench/meson.build
new file mode 100644
index 0000000000..32ccd9efa0
--- /dev/null
+++ b/contrib/pg_checksum_bench/meson.build
@@ -0,0 +1,23 @@
+# Copyright (c) 2022-2025, PostgreSQL Global Development Group
+
+pg_checksum_bench_sources = files(
+ 'pg_checksum_bench.c',
+)
+
+if host_system == 'windows'
+ pg_checksum_bench_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+ '--NAME', 'pg_checksum_bench',
+ '--FILEDESC', 'pg_checksum_bench',])
+endif
+
+pg_checksum_bench = shared_module('pg_checksum_bench',
+ pg_checksum_bench_sources,
+ kwargs: contrib_mod_args,
+)
+contrib_targets += pg_checksum_bench
+
+install_data(
+ 'pg_checksum_bench--1.0.sql',
+ 'pg_checksum_bench.control',
+ kwargs: contrib_data_args,
+)
diff --git a/contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql b/contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql
new file mode 100644
index 0000000000..5f13cbe3c5
--- /dev/null
+++ b/contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql
@@ -0,0 +1,8 @@
+/* contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+-- \echo Use "CREATE EXTENSION pg_checksum_bench" to load this file. \quit
+
+CREATE FUNCTION drive_pg_checksum(page_count int)
+ RETURNS pg_catalog.void
+ AS 'MODULE_PATHNAME' LANGUAGE C;
diff --git a/contrib/pg_checksum_bench/pg_checksum_bench.c b/contrib/pg_checksum_bench/pg_checksum_bench.c
new file mode 100644
index 0000000000..f40f335ff5
--- /dev/null
+++ b/contrib/pg_checksum_bench/pg_checksum_bench.c
@@ -0,0 +1,34 @@
+#include "postgres.h"
+#include "fmgr.h"
+#include "storage/checksum_impl.h"
+
+#include <stdio.h>
+#include <assert.h>
+
+PG_MODULE_MAGIC;
+
+#define REPEATS 1000000
+
+PG_FUNCTION_INFO_V1(drive_pg_checksum);
+Datum
+drive_pg_checksum(PG_FUNCTION_ARGS)
+{
+ int page_count = PG_GETARG_INT32(0);
+
+ PGChecksummablePage * pages = palloc(page_count * sizeof(PGChecksummablePage));
+ srand(0);
+ for (size_t i = 0; i < page_count * sizeof(PGChecksummablePage); i++){
+ char * byte_ptr = (char *) pages;
+ byte_ptr[i] = rand() % 256;
+ }
+
+ for (int i = 0; i < REPEATS; i++){
+ const PGChecksummablePage * test_page = pages + (i % page_count);
+ volatile uint32 result = pg_checksum_block(test_page);
+ (void) result;
+ }
+
+ pfree((void *) pages);
+
+ PG_RETURN_VOID();
+}
diff --git a/contrib/pg_checksum_bench/pg_checksum_bench.control b/contrib/pg_checksum_bench/pg_checksum_bench.control
new file mode 100644
index 0000000000..4a4e2c9363
--- /dev/null
+++ b/contrib/pg_checksum_bench/pg_checksum_bench.control
@@ -0,0 +1,4 @@
+comment = 'pg_checksum benchmark'
+default_version = '1.0'
+module_pathname = '$libdir/pg_checksum_bench'
+relocatable = true
diff --git a/contrib/pg_checksum_bench/sql/pg_checksum_bench.sql b/contrib/pg_checksum_bench/sql/pg_checksum_bench.sql
new file mode 100644
index 0000000000..4b34769995
--- /dev/null
+++ b/contrib/pg_checksum_bench/sql/pg_checksum_bench.sql
@@ -0,0 +1,17 @@
+CREATE EXTENSION pg_checksum_bench;
+
+SELECT drive_pg_checksum(-1);
+
+\timing on
+
+SELECT drive_pg_checksum(1);
+SELECT drive_pg_checksum(2);
+SELECT drive_pg_checksum(4);
+SELECT drive_pg_checksum(8);
+SELECT drive_pg_checksum(16);
+SELECT drive_pg_checksum(32);
+SELECT drive_pg_checksum(64);
+SELECT drive_pg_checksum(128);
+SELECT drive_pg_checksum(256);
+SELECT drive_pg_checksum(512);
+SELECT drive_pg_checksum(1024);
--
2.43.0
Hello! I'm still trying to figure out those CI failures, I just wanted
to update things.
From my testing, with this patch repeatedly disabling/enabling
checksums is about 12.4% on an approximately 15 GB database.
By the way, I'd love it if anyone could help me figure out how to
replicate a CI failure in the Cirrus CI.
I haven't been able to figure out how to test CI runs locally, does
anyone know a good method to do that?
On Thu, May 8, 2025 at 6:57 AM Matthew Sterrett <matthewsterrett2@gmail.com>
wrote:
Hello! I'm still trying to figure out those CI failures, I just wanted
to update things.From my testing, with this patch repeatedly disabling/enabling
checksums is about 12.4% on an approximately 15 GB database.By the way, I'd love it if anyone could help me figure out how to
replicate a CI failure in the Cirrus CI.
I haven't been able to figure out how to test CI runs locally, does
anyone know a good method to do that?
Hi Matthew,
Thanks for the patch!
I ran some timing tests:
(without avx2)
Time: 4034.351 ms
SELECT drive_pg_checksum(512);
(with avx2)
Time: 3559.076 ms
SELECT drive_pg_checksum(512);
Also attached two patches that should fix the CI issues.
Best,
Stepan Neretin
On Sat, May 10, 2025 at 6:01 PM Stepan Neretin <slpmcf@gmail.com> wrote:
On Thu, May 8, 2025 at 6:57 AM Matthew Sterrett <
matthewsterrett2@gmail.com> wrote:Hello! I'm still trying to figure out those CI failures, I just wanted
to update things.From my testing, with this patch repeatedly disabling/enabling
checksums is about 12.4% on an approximately 15 GB database.By the way, I'd love it if anyone could help me figure out how to
replicate a CI failure in the Cirrus CI.
I haven't been able to figure out how to test CI runs locally, does
anyone know a good method to do that?Hi Matthew,
Thanks for the patch!
I ran some timing tests:
(without avx2)
Time: 4034.351 ms
SELECT drive_pg_checksum(512);(with avx2)
Time: 3559.076 ms
SELECT drive_pg_checksum(512);Also attached two patches that should fix the CI issues.
Best,
Stepan Neretin
Oops, forgot to attach patches :)
Best,
Stepan Neretin
Attachments:
v2-0001-Enable-autovectorizing-pg_checksum_block.patchtext/x-patch; charset=US-ASCII; name=v2-0001-Enable-autovectorizing-pg_checksum_block.patchDownload
From a6288a4e73d97f3e76e4dabc1e6e6a86478ddbcd Mon Sep 17 00:00:00 2001
From: Matthew Sterrett <matthew.sterrett@intel.com>
Date: Fri, 7 Mar 2025 11:33:45 -0800
Subject: [PATCH v2 1/4] Enable autovectorizing pg_checksum_block
---
config/c-compiler.m4 | 31 ++++++
configure | 52 ++++++++++
configure.ac | 9 ++
meson.build | 28 ++++++
src/include/pg_config.h.in | 3 +
src/include/storage/checksum_impl.h | 150 +++++++++++++++++++++++-----
6 files changed, 250 insertions(+), 23 deletions(-)
diff --git a/config/c-compiler.m4 b/config/c-compiler.m4
index 5f3e1d1faf9..4d5bceafe9e 100644
--- a/config/c-compiler.m4
+++ b/config/c-compiler.m4
@@ -710,6 +710,37 @@ fi
undefine([Ac_cachevar])dnl
])# PGAC_XSAVE_INTRINSICS
+# PGAC_AVX2_SUPPORT
+# -----------------------------
+# Check if the compiler supports AVX2 in attribute((target))
+# and using AVX2 intrinsics in those functions
+#
+# If the intrinsics are supported, sets pgac_avx2_support.
+AC_DEFUN([PGAC_AVX2_SUPPORT],
+[define([Ac_cachevar], [AS_TR_SH([pgac_cv_avx2_support])])dnl
+AC_CACHE_CHECK([for AVX2 support], [Ac_cachevar],
+[AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <immintrin.h>
+ #include <stdint.h>
+ #if defined(__has_attribute) && __has_attribute (target)
+ __attribute__((target("avx2")))
+ #endif
+ static int avx2_test(void)
+ {
+ const char buf@<:@sizeof(__m256i)@:>@;
+ __m256i accum = _mm256_loadu_si256((const __m256i *) buf);
+ accum = _mm256_add_epi32(accum, accum);
+ int result = _mm256_extract_epi32(accum, 0);
+ return (int) result;
+ }],
+ [return avx2_test();])],
+ [Ac_cachevar=yes],
+ [Ac_cachevar=no])])
+if test x"$Ac_cachevar" = x"yes"; then
+ pgac_avx2_support=yes
+fi
+undefine([Ac_cachevar])dnl
+])# PGAC_AVX2_SUPPORT
+
# PGAC_AVX512_POPCNT_INTRINSICS
# -----------------------------
# Check if the compiler supports the AVX-512 popcount instructions using the
diff --git a/configure b/configure
index 275c67ee67c..5ae2dc00d33 100755
--- a/configure
+++ b/configure
@@ -17711,6 +17711,58 @@ $as_echo "#define HAVE_XSAVE_INTRINSICS 1" >>confdefs.h
fi
+# Check for AVX2 target and intrinsic support
+#
+if test x"$host_cpu" = x"x86_64"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX2 support" >&5
+$as_echo_n "checking for AVX2 support... " >&6; }
+if ${pgac_cv_avx2_support+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <immintrin.h>
+ #include <stdint.h>
+ #if defined(__has_attribute) && __has_attribute (target)
+ __attribute__((target("avx2")))
+ #endif
+ static int avx2_test(void)
+ {
+ const char buf[sizeof(__m256i)];
+ __m256i accum = _mm256_loadu_si256((const __m256i *) buf);
+ accum = _mm256_add_epi32(accum, accum);
+ int result = _mm256_extract_epi32(accum, 0);
+ return (int) result;
+ }
+int
+main ()
+{
+return avx2_test();
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ pgac_cv_avx2_support=yes
+else
+ pgac_cv_avx2_support=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_avx2_support" >&5
+$as_echo "$pgac_cv_avx2_support" >&6; }
+if test x"$pgac_cv_avx2_support" = x"yes"; then
+ pgac_avx2_support=yes
+fi
+
+ if test x"$pgac_avx2_support" = x"yes"; then
+
+$as_echo "#define USE_AVX2_WITH_RUNTIME_CHECK 1" >>confdefs.h
+
+ fi
+fi
+
# Check for AVX-512 popcount intrinsics
#
if test x"$host_cpu" = x"x86_64"; then
diff --git a/configure.ac b/configure.ac
index 7ea91d56adb..a1f5eb8021a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2088,6 +2088,15 @@ if test x"$pgac_xsave_intrinsics" = x"yes"; then
AC_DEFINE(HAVE_XSAVE_INTRINSICS, 1, [Define to 1 if you have XSAVE intrinsics.])
fi
+# Check for AVX2 target and intrinsic support
+#
+if test x"$host_cpu" = x"x86_64"; then
+ PGAC_AVX2_SUPPORT()
+ if test x"$pgac_avx2_support" = x"yes"; then
+ AC_DEFINE(USE_AVX2_WITH_RUNTIME_CHECK, 1, [Define to 1 to use AVX2 instructions with a runtime check.])
+ fi
+fi
+
# Check for AVX-512 popcount intrinsics
#
if test x"$host_cpu" = x"x86_64"; then
diff --git a/meson.build b/meson.build
index 12de5e80c31..513d30a93ff 100644
--- a/meson.build
+++ b/meson.build
@@ -2301,6 +2301,34 @@ int main(void)
endif
+###############################################################
+# Check for the availability of AVX2 support
+###############################################################
+
+if host_cpu == 'x86_64'
+
+ prog = '''
+#include <immintrin.h>
+#include <stdint.h>
+#if defined(__has_attribute) && __has_attribute (target)
+__attribute__((target("avx2")))
+#endif
+int main(void)
+{
+ const char buf[sizeof(__m256i)];
+ __m256i accum = _mm256_loadu_si256((const __m256i *) buf);
+ accum = _mm256_add_epi32(accum, accum);
+ int result = _mm256_extract_epi32(accum, 0);
+ return (int) result;
+}
+'''
+
+ if cc.links(prog, name: 'AVX2 support', args: test_c_args)
+ cdata.set('USE_AVX2_WITH_RUNTIME_CHECK', 1)
+ endif
+
+endif
+
###############################################################
# Check for the availability of AVX-512 popcount intrinsics.
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index c3cc9fa856d..b174bcc5f79 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -671,6 +671,9 @@
/* Define to 1 to use AVX-512 CRC algorithms with a runtime check. */
#undef USE_AVX512_CRC32C_WITH_RUNTIME_CHECK
+/* Define to 1 to use AVX2 instructions with a runtime check. */
+#undef USE_AVX2_WITH_RUNTIME_CHECK
+
/* Define to 1 to use AVX-512 popcount instructions with a runtime check. */
#undef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
diff --git a/src/include/storage/checksum_impl.h b/src/include/storage/checksum_impl.h
index da87d61ba52..5ea1f698b57 100644
--- a/src/include/storage/checksum_impl.h
+++ b/src/include/storage/checksum_impl.h
@@ -101,12 +101,83 @@
*/
#include "storage/bufpage.h"
+#include "pg_config.h"
/* number of checksums to calculate in parallel */
#define N_SUMS 32
/* prime multiplier of FNV-1a hash */
#define FNV_PRIME 16777619
+#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT)
+#include <cpuid.h>
+#endif
+
+#include <immintrin.h>
+
+#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX)
+#include <intrin.h>
+#endif
+
+/*
+ * Does CPUID say there's support for XSAVE instructions?
+ */
+static inline bool
+xsave_available(void)
+{
+ unsigned int exx[4] = {0, 0, 0, 0};
+
+#if defined(HAVE__GET_CPUID)
+ __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUID)
+ __cpuid(exx, 1);
+#else
+#error cpuid instruction not available
+#endif
+ return (exx[2] & (1 << 27)) != 0; /* osxsave */
+}
+
+/*
+ * Does XGETBV say the YMM registers are enabled?
+ *
+ * NB: Caller is responsible for verifying that xsave_available() returns true
+ * before calling this.
+ */
+#ifdef HAVE_XSAVE_INTRINSICS
+pg_attribute_target("xsave")
+#endif
+static inline bool
+ymm_regs_available(void)
+{
+#ifdef HAVE_XSAVE_INTRINSICS
+ return (_xgetbv(0) & 0x06) == 0x06;
+#else
+ return false;
+#endif
+}
+
+/*
+ * Does CPUID say there's support for AVX-2
+ */
+static inline bool
+avx2_available(void)
+{
+#ifdef USE_AVX2_WITH_RUNTIME_CHECK
+ unsigned int exx[4] = {0, 0, 0, 0};
+ if (!xsave_available() || !ymm_regs_available()) return false;
+
+#if defined(HAVE__GET_CPUID_COUNT)
+ __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUIDEX)
+ __cpuidex(exx, 7, 0);
+#else
+#error cpuid instruction not available
+#endif
+ return (exx[1] & (1 << 5)) != 0; /* avx2 */
+#else
+ return false;
+#endif
+}
+
/* Use a union so that this code is valid under strict aliasing */
typedef union
{
@@ -142,35 +213,68 @@ do { \
* Block checksum algorithm. The page must be adequately aligned
* (at least on 4-byte boundary).
*/
-static uint32
-pg_checksum_block(const PGChecksummablePage *page)
-{
- uint32 sums[N_SUMS];
- uint32 result = 0;
- uint32 i,
- j;
+
+#define PG_DECLARE_CHECKSUM_ISA(ISANAME) \
+static uint32 \
+pg_checksum_block_##ISANAME(const PGChecksummablePage *page);
+
+#define PG_DEFINE_CHECKSUM_ISA(ISANAME) \
+pg_attribute_target(#ISANAME) \
+static uint32 \
+pg_checksum_block_##ISANAME(const PGChecksummablePage *page) \
+{ \
+ uint32 sums[N_SUMS]; \
+ uint32 result = 0; \
+ uint32 i, \
+ j; \
+ \
+ /* ensure that the size is compatible with the algorithm */ \
+ Assert(sizeof(PGChecksummablePage) == BLCKSZ); \
+ \
+ /* initialize partial checksums to their corresponding offsets */ \
+ memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets)); \
+ \
+ /* main checksum calculation */ \
+ /* this is the main place that autovectorization occurs */ \
+ for (i = 0; i < (uint32) (BLCKSZ / (sizeof(uint32) * N_SUMS)); i++) \
+ for (j = 0; j < N_SUMS; j++) \
+ CHECKSUM_COMP(sums[j], page->data[i][j]); \
+ \
+ /* finally add in two rounds of zeroes for additional mixing */ \
+ for (i = 0; i < 2; i++) \
+ for (j = 0; j < N_SUMS; j++) \
+ CHECKSUM_COMP(sums[j], 0); \
+ \
+ /* xor fold partial checksums together */ \
+ for (i = 0; i < N_SUMS; i++) \
+ result ^= sums[i]; \
+ \
+ return result; \
+}
- /* ensure that the size is compatible with the algorithm */
- Assert(sizeof(PGChecksummablePage) == BLCKSZ);
+/* Declarations are always defined to make dynamic dispatch code simpler */
- /* initialize partial checksums to their corresponding offsets */
- memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets));
+PG_DECLARE_CHECKSUM_ISA(default);
+PG_DECLARE_CHECKSUM_ISA(avx2);
- /* main checksum calculation */
- for (i = 0; i < (uint32) (BLCKSZ / (sizeof(uint32) * N_SUMS)); i++)
- for (j = 0; j < N_SUMS; j++)
- CHECKSUM_COMP(sums[j], page->data[i][j]);
+PG_DEFINE_CHECKSUM_ISA(default);
+#ifdef USE_AVX2_WITH_RUNTIME_CHECK
+PG_DEFINE_CHECKSUM_ISA(avx2);
+#endif
- /* finally add in two rounds of zeroes for additional mixing */
- for (i = 0; i < 2; i++)
- for (j = 0; j < N_SUMS; j++)
- CHECKSUM_COMP(sums[j], 0);
+static uint32
+pg_checksum_block_dispatch(const PGChecksummablePage *page);
- /* xor fold partial checksums together */
- for (i = 0; i < N_SUMS; i++)
- result ^= sums[i];
+static uint32 (*pg_checksum_block)(const PGChecksummablePage *page) = pg_checksum_block_dispatch;
- return result;
+static uint32
+pg_checksum_block_dispatch(const PGChecksummablePage *page){
+ if (avx2_available()){
+ pg_checksum_block = pg_checksum_block_avx2;
+ }else{
+ pg_checksum_block = pg_checksum_block_default;
+ }
+ return pg_checksum_block(page);
}
/*
--
2.43.0
v2-0002-Fix-compilation-on-systems-where-immintrin.h-is-n.patchtext/x-patch; charset=US-ASCII; name=v2-0002-Fix-compilation-on-systems-where-immintrin.h-is-n.patchDownload
From 7fdbb08a5d6c44a6d584d7b7dc05edfce24d9e26 Mon Sep 17 00:00:00 2001
From: Stepan Neretin <slpmcf@gmail.com>
Date: Sat, 10 May 2025 16:37:13 +0700
Subject: [PATCH v2 2/4] Fix compilation on systems where <immintrin.h> is not
available or inappropriate, such as older GCC versions or non-x86 platforms.
---
src/include/storage/checksum_impl.h | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/src/include/storage/checksum_impl.h b/src/include/storage/checksum_impl.h
index 5ea1f698b57..042ee8af120 100644
--- a/src/include/storage/checksum_impl.h
+++ b/src/include/storage/checksum_impl.h
@@ -112,7 +112,9 @@
#include <cpuid.h>
#endif
+#ifdef HAVE_XSAVE_INTRINSICS
#include <immintrin.h>
+#endif
#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX)
#include <intrin.h>
@@ -253,10 +255,6 @@ pg_checksum_block_##ISANAME(const PGChecksummablePage *page) \
}
/* Declarations are always defined to make dynamic dispatch code simpler */
-
-PG_DECLARE_CHECKSUM_ISA(default);
-PG_DECLARE_CHECKSUM_ISA(avx2);
-
PG_DEFINE_CHECKSUM_ISA(default);
#ifdef USE_AVX2_WITH_RUNTIME_CHECK
PG_DEFINE_CHECKSUM_ISA(avx2);
--
2.43.0
v2-0003-Benchmark-code-for-postgres-checksums.patchtext/x-patch; charset=US-ASCII; name=v2-0003-Benchmark-code-for-postgres-checksums.patchDownload
From fe708e3f869cef91c9ee2a31e087fb49ed463619 Mon Sep 17 00:00:00 2001
From: Matthew Sterrett <matthew.sterrett@intel.com>
Date: Fri, 7 Mar 2025 11:33:27 -0800
Subject: [PATCH v2 3/4] Benchmark code for postgres checksums
---
contrib/meson.build | 1 +
contrib/pg_checksum_bench/meson.build | 23 +++++++++++++
.../pg_checksum_bench--1.0.sql | 8 +++++
contrib/pg_checksum_bench/pg_checksum_bench.c | 34 +++++++++++++++++++
.../pg_checksum_bench.control | 4 +++
.../sql/pg_checksum_bench.sql | 17 ++++++++++
6 files changed, 87 insertions(+)
create mode 100644 contrib/pg_checksum_bench/meson.build
create mode 100644 contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql
create mode 100644 contrib/pg_checksum_bench/pg_checksum_bench.c
create mode 100644 contrib/pg_checksum_bench/pg_checksum_bench.control
create mode 100644 contrib/pg_checksum_bench/sql/pg_checksum_bench.sql
diff --git a/contrib/meson.build b/contrib/meson.build
index ed30ee7d639..fe5149aadff 100644
--- a/contrib/meson.build
+++ b/contrib/meson.build
@@ -12,6 +12,7 @@ contrib_doc_args = {
'install_dir': contrib_doc_dir,
}
+subdir('pg_checksum_bench')
subdir('amcheck')
subdir('auth_delay')
subdir('auto_explain')
diff --git a/contrib/pg_checksum_bench/meson.build b/contrib/pg_checksum_bench/meson.build
new file mode 100644
index 00000000000..32ccd9efa0f
--- /dev/null
+++ b/contrib/pg_checksum_bench/meson.build
@@ -0,0 +1,23 @@
+# Copyright (c) 2022-2025, PostgreSQL Global Development Group
+
+pg_checksum_bench_sources = files(
+ 'pg_checksum_bench.c',
+)
+
+if host_system == 'windows'
+ pg_checksum_bench_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+ '--NAME', 'pg_checksum_bench',
+ '--FILEDESC', 'pg_checksum_bench',])
+endif
+
+pg_checksum_bench = shared_module('pg_checksum_bench',
+ pg_checksum_bench_sources,
+ kwargs: contrib_mod_args,
+)
+contrib_targets += pg_checksum_bench
+
+install_data(
+ 'pg_checksum_bench--1.0.sql',
+ 'pg_checksum_bench.control',
+ kwargs: contrib_data_args,
+)
diff --git a/contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql b/contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql
new file mode 100644
index 00000000000..5f13cbe3c5e
--- /dev/null
+++ b/contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql
@@ -0,0 +1,8 @@
+/* contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+-- \echo Use "CREATE EXTENSION pg_checksum_bench" to load this file. \quit
+
+CREATE FUNCTION drive_pg_checksum(page_count int)
+ RETURNS pg_catalog.void
+ AS 'MODULE_PATHNAME' LANGUAGE C;
diff --git a/contrib/pg_checksum_bench/pg_checksum_bench.c b/contrib/pg_checksum_bench/pg_checksum_bench.c
new file mode 100644
index 00000000000..f40f335ff59
--- /dev/null
+++ b/contrib/pg_checksum_bench/pg_checksum_bench.c
@@ -0,0 +1,34 @@
+#include "postgres.h"
+#include "fmgr.h"
+#include "storage/checksum_impl.h"
+
+#include <stdio.h>
+#include <assert.h>
+
+PG_MODULE_MAGIC;
+
+#define REPEATS 1000000
+
+PG_FUNCTION_INFO_V1(drive_pg_checksum);
+Datum
+drive_pg_checksum(PG_FUNCTION_ARGS)
+{
+ int page_count = PG_GETARG_INT32(0);
+
+ PGChecksummablePage * pages = palloc(page_count * sizeof(PGChecksummablePage));
+ srand(0);
+ for (size_t i = 0; i < page_count * sizeof(PGChecksummablePage); i++){
+ char * byte_ptr = (char *) pages;
+ byte_ptr[i] = rand() % 256;
+ }
+
+ for (int i = 0; i < REPEATS; i++){
+ const PGChecksummablePage * test_page = pages + (i % page_count);
+ volatile uint32 result = pg_checksum_block(test_page);
+ (void) result;
+ }
+
+ pfree((void *) pages);
+
+ PG_RETURN_VOID();
+}
diff --git a/contrib/pg_checksum_bench/pg_checksum_bench.control b/contrib/pg_checksum_bench/pg_checksum_bench.control
new file mode 100644
index 00000000000..4a4e2c9363c
--- /dev/null
+++ b/contrib/pg_checksum_bench/pg_checksum_bench.control
@@ -0,0 +1,4 @@
+comment = 'pg_checksum benchmark'
+default_version = '1.0'
+module_pathname = '$libdir/pg_checksum_bench'
+relocatable = true
diff --git a/contrib/pg_checksum_bench/sql/pg_checksum_bench.sql b/contrib/pg_checksum_bench/sql/pg_checksum_bench.sql
new file mode 100644
index 00000000000..4b347699953
--- /dev/null
+++ b/contrib/pg_checksum_bench/sql/pg_checksum_bench.sql
@@ -0,0 +1,17 @@
+CREATE EXTENSION pg_checksum_bench;
+
+SELECT drive_pg_checksum(-1);
+
+\timing on
+
+SELECT drive_pg_checksum(1);
+SELECT drive_pg_checksum(2);
+SELECT drive_pg_checksum(4);
+SELECT drive_pg_checksum(8);
+SELECT drive_pg_checksum(16);
+SELECT drive_pg_checksum(32);
+SELECT drive_pg_checksum(64);
+SELECT drive_pg_checksum(128);
+SELECT drive_pg_checksum(256);
+SELECT drive_pg_checksum(512);
+SELECT drive_pg_checksum(1024);
--
2.43.0
v2-0004-fix-bench-compiling.patchtext/x-patch; charset=US-ASCII; name=v2-0004-fix-bench-compiling.patchDownload
From df72aef0422054f94fe4f407e48d7b6c135a7208 Mon Sep 17 00:00:00 2001
From: Stepan Neretin <slpmcf@gmail.com>
Date: Sat, 10 May 2025 17:57:47 +0700
Subject: [PATCH v2 4/4] fix bench compiling
---
contrib/pg_checksum_bench/pg_checksum_bench.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/contrib/pg_checksum_bench/pg_checksum_bench.c b/contrib/pg_checksum_bench/pg_checksum_bench.c
index f40f335ff59..0296dcfd259 100644
--- a/contrib/pg_checksum_bench/pg_checksum_bench.c
+++ b/contrib/pg_checksum_bench/pg_checksum_bench.c
@@ -1,5 +1,6 @@
#include "postgres.h"
#include "fmgr.h"
+#include "storage/checksum.h"
#include "storage/checksum_impl.h"
#include <stdio.h>
--
2.43.0
Hello! Thanks for helping me with this.
I'm still trying to figure out what is going on with the Bookworm test
failures. I'm pretty sure this patchset should resolve all the issues
with the macOS build, but I don't think it will help the linux
failures unfortunately.
Show quoted text
On Sat, May 10, 2025 at 4:02 AM Stepan Neretin <slpmcf@gmail.com> wrote:
On Sat, May 10, 2025 at 6:01 PM Stepan Neretin <slpmcf@gmail.com> wrote:
On Thu, May 8, 2025 at 6:57 AM Matthew Sterrett <matthewsterrett2@gmail.com> wrote:
Hello! I'm still trying to figure out those CI failures, I just wanted
to update things.From my testing, with this patch repeatedly disabling/enabling
checksums is about 12.4% on an approximately 15 GB database.By the way, I'd love it if anyone could help me figure out how to
replicate a CI failure in the Cirrus CI.
I haven't been able to figure out how to test CI runs locally, does
anyone know a good method to do that?Hi Matthew,
Thanks for the patch!
I ran some timing tests:
(without avx2)
Time: 4034.351 ms
SELECT drive_pg_checksum(512);(with avx2)
Time: 3559.076 ms
SELECT drive_pg_checksum(512);Also attached two patches that should fix the CI issues.
Best,
Stepan Neretin
Oops, forgot to attach patches :)
Best,
Stepan Neretin
Attachments:
v3-0002-Fix-compilation-on-systems-where-immintrin.h-is-n.patchapplication/octet-stream; name=v3-0002-Fix-compilation-on-systems-where-immintrin.h-is-n.patchDownload
From 30d916546860d3d19f8d91a0ab4bc99f9c350aaa Mon Sep 17 00:00:00 2001
From: Stepan Neretin <slpmcf@gmail.com>
Date: Sat, 10 May 2025 16:37:13 +0700
Subject: [PATCH v3 2/5] Fix compilation on systems where <immintrin.h> is not
available or inappropriate, such as older GCC versions or non-x86 platforms.
---
src/include/storage/checksum_impl.h | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/src/include/storage/checksum_impl.h b/src/include/storage/checksum_impl.h
index 5ea1f698b57..042ee8af120 100644
--- a/src/include/storage/checksum_impl.h
+++ b/src/include/storage/checksum_impl.h
@@ -112,7 +112,9 @@
#include <cpuid.h>
#endif
+#ifdef HAVE_XSAVE_INTRINSICS
#include <immintrin.h>
+#endif
#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX)
#include <intrin.h>
@@ -253,10 +255,6 @@ pg_checksum_block_##ISANAME(const PGChecksummablePage *page) \
}
/* Declarations are always defined to make dynamic dispatch code simpler */
-
-PG_DECLARE_CHECKSUM_ISA(default);
-PG_DECLARE_CHECKSUM_ISA(avx2);
-
PG_DEFINE_CHECKSUM_ISA(default);
#ifdef USE_AVX2_WITH_RUNTIME_CHECK
PG_DEFINE_CHECKSUM_ISA(avx2);
--
2.43.0
v3-0005-Use-dummy-function-to-avoid-linker-error-move-dec.patchapplication/octet-stream; name=v3-0005-Use-dummy-function-to-avoid-linker-error-move-dec.patchDownload
From f7b28c6378db9cc98371e89e830a2ae8a571e9a0 Mon Sep 17 00:00:00 2001
From: Matthew Sterrett <matthew.sterrett@intel.com>
Date: Mon, 19 May 2025 13:23:55 -0700
Subject: [PATCH v3 5/5] Use dummy function to avoid linker error, move
declarations
---
src/include/storage/checksum_impl.h | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
diff --git a/src/include/storage/checksum_impl.h b/src/include/storage/checksum_impl.h
index 042ee8af120..4070646e23e 100644
--- a/src/include/storage/checksum_impl.h
+++ b/src/include/storage/checksum_impl.h
@@ -163,7 +163,7 @@ ymm_regs_available(void)
static inline bool
avx2_available(void)
{
-#ifdef USE_AVX2_WITH_RUNTIME_CHECK
+#if defined (USE_AVX2_WITH_RUNTIME_CHECK) && defined(__x86_64__)
unsigned int exx[4] = {0, 0, 0, 0};
if (!xsave_available() || !ymm_regs_available()) return false;
@@ -220,7 +220,20 @@ do { \
static uint32 \
pg_checksum_block_##ISANAME(const PGChecksummablePage *page);
+#define PG_DEFINE_CHECKSUM_DUMMY(ISANAME) \
+static uint32 \
+pg_checksum_block_##ISANAME(const PGChecksummablePage *page); \
+pg_attribute_target(#ISANAME) \
+static uint32 \
+pg_checksum_block_##ISANAME(const PGChecksummablePage *page) \
+{ \
+ Assert(false); /* This function should never be called */ \
+ return pg_checksum_block_default(page); /* Just in case it somehow is */ \
+}
+
#define PG_DEFINE_CHECKSUM_ISA(ISANAME) \
+static uint32 \
+pg_checksum_block_##ISANAME(const PGChecksummablePage *page); \
pg_attribute_target(#ISANAME) \
static uint32 \
pg_checksum_block_##ISANAME(const PGChecksummablePage *page) \
@@ -254,10 +267,11 @@ pg_checksum_block_##ISANAME(const PGChecksummablePage *page) \
return result; \
}
-/* Declarations are always defined to make dynamic dispatch code simpler */
PG_DEFINE_CHECKSUM_ISA(default);
#ifdef USE_AVX2_WITH_RUNTIME_CHECK
PG_DEFINE_CHECKSUM_ISA(avx2);
+#else
+PG_DEFINE_CHECKSUM_DUMMY(avx2);
#endif
static uint32
--
2.43.0
v3-0004-fix-bench-compiling.patchapplication/octet-stream; name=v3-0004-fix-bench-compiling.patchDownload
From 78873aec488678accbed1c4d1fc7ce15d12df303 Mon Sep 17 00:00:00 2001
From: Stepan Neretin <slpmcf@gmail.com>
Date: Sat, 10 May 2025 17:57:47 +0700
Subject: [PATCH v3 4/5] fix bench compiling
---
contrib/pg_checksum_bench/pg_checksum_bench.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/contrib/pg_checksum_bench/pg_checksum_bench.c b/contrib/pg_checksum_bench/pg_checksum_bench.c
index f40f335ff59..0296dcfd259 100644
--- a/contrib/pg_checksum_bench/pg_checksum_bench.c
+++ b/contrib/pg_checksum_bench/pg_checksum_bench.c
@@ -1,5 +1,6 @@
#include "postgres.h"
#include "fmgr.h"
+#include "storage/checksum.h"
#include "storage/checksum_impl.h"
#include <stdio.h>
--
2.43.0
v3-0003-Benchmark-code-for-postgres-checksums.patchapplication/octet-stream; name=v3-0003-Benchmark-code-for-postgres-checksums.patchDownload
From cd6dc517b897faf3d960fafe8ea6b2cf9bbc2e05 Mon Sep 17 00:00:00 2001
From: Matthew Sterrett <matthew.sterrett@intel.com>
Date: Fri, 7 Mar 2025 11:33:27 -0800
Subject: [PATCH v3 3/5] Benchmark code for postgres checksums
---
contrib/meson.build | 1 +
contrib/pg_checksum_bench/meson.build | 23 +++++++++++++
.../pg_checksum_bench--1.0.sql | 8 +++++
contrib/pg_checksum_bench/pg_checksum_bench.c | 34 +++++++++++++++++++
.../pg_checksum_bench.control | 4 +++
.../sql/pg_checksum_bench.sql | 17 ++++++++++
6 files changed, 87 insertions(+)
create mode 100644 contrib/pg_checksum_bench/meson.build
create mode 100644 contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql
create mode 100644 contrib/pg_checksum_bench/pg_checksum_bench.c
create mode 100644 contrib/pg_checksum_bench/pg_checksum_bench.control
create mode 100644 contrib/pg_checksum_bench/sql/pg_checksum_bench.sql
diff --git a/contrib/meson.build b/contrib/meson.build
index ed30ee7d639..fe5149aadff 100644
--- a/contrib/meson.build
+++ b/contrib/meson.build
@@ -12,6 +12,7 @@ contrib_doc_args = {
'install_dir': contrib_doc_dir,
}
+subdir('pg_checksum_bench')
subdir('amcheck')
subdir('auth_delay')
subdir('auto_explain')
diff --git a/contrib/pg_checksum_bench/meson.build b/contrib/pg_checksum_bench/meson.build
new file mode 100644
index 00000000000..32ccd9efa0f
--- /dev/null
+++ b/contrib/pg_checksum_bench/meson.build
@@ -0,0 +1,23 @@
+# Copyright (c) 2022-2025, PostgreSQL Global Development Group
+
+pg_checksum_bench_sources = files(
+ 'pg_checksum_bench.c',
+)
+
+if host_system == 'windows'
+ pg_checksum_bench_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+ '--NAME', 'pg_checksum_bench',
+ '--FILEDESC', 'pg_checksum_bench',])
+endif
+
+pg_checksum_bench = shared_module('pg_checksum_bench',
+ pg_checksum_bench_sources,
+ kwargs: contrib_mod_args,
+)
+contrib_targets += pg_checksum_bench
+
+install_data(
+ 'pg_checksum_bench--1.0.sql',
+ 'pg_checksum_bench.control',
+ kwargs: contrib_data_args,
+)
diff --git a/contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql b/contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql
new file mode 100644
index 00000000000..5f13cbe3c5e
--- /dev/null
+++ b/contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql
@@ -0,0 +1,8 @@
+/* contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+-- \echo Use "CREATE EXTENSION pg_checksum_bench" to load this file. \quit
+
+CREATE FUNCTION drive_pg_checksum(page_count int)
+ RETURNS pg_catalog.void
+ AS 'MODULE_PATHNAME' LANGUAGE C;
diff --git a/contrib/pg_checksum_bench/pg_checksum_bench.c b/contrib/pg_checksum_bench/pg_checksum_bench.c
new file mode 100644
index 00000000000..f40f335ff59
--- /dev/null
+++ b/contrib/pg_checksum_bench/pg_checksum_bench.c
@@ -0,0 +1,34 @@
+#include "postgres.h"
+#include "fmgr.h"
+#include "storage/checksum_impl.h"
+
+#include <stdio.h>
+#include <assert.h>
+
+PG_MODULE_MAGIC;
+
+#define REPEATS 1000000
+
+PG_FUNCTION_INFO_V1(drive_pg_checksum);
+Datum
+drive_pg_checksum(PG_FUNCTION_ARGS)
+{
+ int page_count = PG_GETARG_INT32(0);
+
+ PGChecksummablePage * pages = palloc(page_count * sizeof(PGChecksummablePage));
+ srand(0);
+ for (size_t i = 0; i < page_count * sizeof(PGChecksummablePage); i++){
+ char * byte_ptr = (char *) pages;
+ byte_ptr[i] = rand() % 256;
+ }
+
+ for (int i = 0; i < REPEATS; i++){
+ const PGChecksummablePage * test_page = pages + (i % page_count);
+ volatile uint32 result = pg_checksum_block(test_page);
+ (void) result;
+ }
+
+ pfree((void *) pages);
+
+ PG_RETURN_VOID();
+}
diff --git a/contrib/pg_checksum_bench/pg_checksum_bench.control b/contrib/pg_checksum_bench/pg_checksum_bench.control
new file mode 100644
index 00000000000..4a4e2c9363c
--- /dev/null
+++ b/contrib/pg_checksum_bench/pg_checksum_bench.control
@@ -0,0 +1,4 @@
+comment = 'pg_checksum benchmark'
+default_version = '1.0'
+module_pathname = '$libdir/pg_checksum_bench'
+relocatable = true
diff --git a/contrib/pg_checksum_bench/sql/pg_checksum_bench.sql b/contrib/pg_checksum_bench/sql/pg_checksum_bench.sql
new file mode 100644
index 00000000000..4b347699953
--- /dev/null
+++ b/contrib/pg_checksum_bench/sql/pg_checksum_bench.sql
@@ -0,0 +1,17 @@
+CREATE EXTENSION pg_checksum_bench;
+
+SELECT drive_pg_checksum(-1);
+
+\timing on
+
+SELECT drive_pg_checksum(1);
+SELECT drive_pg_checksum(2);
+SELECT drive_pg_checksum(4);
+SELECT drive_pg_checksum(8);
+SELECT drive_pg_checksum(16);
+SELECT drive_pg_checksum(32);
+SELECT drive_pg_checksum(64);
+SELECT drive_pg_checksum(128);
+SELECT drive_pg_checksum(256);
+SELECT drive_pg_checksum(512);
+SELECT drive_pg_checksum(1024);
--
2.43.0
v3-0001-Enable-autovectorizing-pg_checksum_block.patchapplication/octet-stream; name=v3-0001-Enable-autovectorizing-pg_checksum_block.patchDownload
From 58122ebe1bbebc70da90c7ec3e77f3c3d524aa85 Mon Sep 17 00:00:00 2001
From: Matthew Sterrett <matthew.sterrett@intel.com>
Date: Fri, 7 Mar 2025 11:33:45 -0800
Subject: [PATCH v3 1/5] Enable autovectorizing pg_checksum_block
---
config/c-compiler.m4 | 31 ++++++
configure | 52 ++++++++++
configure.ac | 9 ++
meson.build | 28 ++++++
src/include/pg_config.h.in | 3 +
src/include/storage/checksum_impl.h | 150 +++++++++++++++++++++++-----
6 files changed, 250 insertions(+), 23 deletions(-)
diff --git a/config/c-compiler.m4 b/config/c-compiler.m4
index 5f3e1d1faf9..4d5bceafe9e 100644
--- a/config/c-compiler.m4
+++ b/config/c-compiler.m4
@@ -710,6 +710,37 @@ fi
undefine([Ac_cachevar])dnl
])# PGAC_XSAVE_INTRINSICS
+# PGAC_AVX2_SUPPORT
+# -----------------------------
+# Check if the compiler supports AVX2 in attribute((target))
+# and using AVX2 intrinsics in those functions
+#
+# If the intrinsics are supported, sets pgac_avx2_support.
+AC_DEFUN([PGAC_AVX2_SUPPORT],
+[define([Ac_cachevar], [AS_TR_SH([pgac_cv_avx2_support])])dnl
+AC_CACHE_CHECK([for AVX2 support], [Ac_cachevar],
+[AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <immintrin.h>
+ #include <stdint.h>
+ #if defined(__has_attribute) && __has_attribute (target)
+ __attribute__((target("avx2")))
+ #endif
+ static int avx2_test(void)
+ {
+ const char buf@<:@sizeof(__m256i)@:>@;
+ __m256i accum = _mm256_loadu_si256((const __m256i *) buf);
+ accum = _mm256_add_epi32(accum, accum);
+ int result = _mm256_extract_epi32(accum, 0);
+ return (int) result;
+ }],
+ [return avx2_test();])],
+ [Ac_cachevar=yes],
+ [Ac_cachevar=no])])
+if test x"$Ac_cachevar" = x"yes"; then
+ pgac_avx2_support=yes
+fi
+undefine([Ac_cachevar])dnl
+])# PGAC_AVX2_SUPPORT
+
# PGAC_AVX512_POPCNT_INTRINSICS
# -----------------------------
# Check if the compiler supports the AVX-512 popcount instructions using the
diff --git a/configure b/configure
index 4f15347cc95..0b328dc22db 100755
--- a/configure
+++ b/configure
@@ -17724,6 +17724,58 @@ $as_echo "#define HAVE_XSAVE_INTRINSICS 1" >>confdefs.h
fi
+# Check for AVX2 target and intrinsic support
+#
+if test x"$host_cpu" = x"x86_64"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX2 support" >&5
+$as_echo_n "checking for AVX2 support... " >&6; }
+if ${pgac_cv_avx2_support+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <immintrin.h>
+ #include <stdint.h>
+ #if defined(__has_attribute) && __has_attribute (target)
+ __attribute__((target("avx2")))
+ #endif
+ static int avx2_test(void)
+ {
+ const char buf[sizeof(__m256i)];
+ __m256i accum = _mm256_loadu_si256((const __m256i *) buf);
+ accum = _mm256_add_epi32(accum, accum);
+ int result = _mm256_extract_epi32(accum, 0);
+ return (int) result;
+ }
+int
+main ()
+{
+return avx2_test();
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ pgac_cv_avx2_support=yes
+else
+ pgac_cv_avx2_support=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_avx2_support" >&5
+$as_echo "$pgac_cv_avx2_support" >&6; }
+if test x"$pgac_cv_avx2_support" = x"yes"; then
+ pgac_avx2_support=yes
+fi
+
+ if test x"$pgac_avx2_support" = x"yes"; then
+
+$as_echo "#define USE_AVX2_WITH_RUNTIME_CHECK 1" >>confdefs.h
+
+ fi
+fi
+
# Check for AVX-512 popcount intrinsics
#
if test x"$host_cpu" = x"x86_64"; then
diff --git a/configure.ac b/configure.ac
index 4b8335dc613..b980429282f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2089,6 +2089,15 @@ if test x"$pgac_xsave_intrinsics" = x"yes"; then
AC_DEFINE(HAVE_XSAVE_INTRINSICS, 1, [Define to 1 if you have XSAVE intrinsics.])
fi
+# Check for AVX2 target and intrinsic support
+#
+if test x"$host_cpu" = x"x86_64"; then
+ PGAC_AVX2_SUPPORT()
+ if test x"$pgac_avx2_support" = x"yes"; then
+ AC_DEFINE(USE_AVX2_WITH_RUNTIME_CHECK, 1, [Define to 1 to use AVX2 instructions with a runtime check.])
+ fi
+fi
+
# Check for AVX-512 popcount intrinsics
#
if test x"$host_cpu" = x"x86_64"; then
diff --git a/meson.build b/meson.build
index d142e3e408b..e0ab1f9cf2f 100644
--- a/meson.build
+++ b/meson.build
@@ -2301,6 +2301,34 @@ int main(void)
endif
+###############################################################
+# Check for the availability of AVX2 support
+###############################################################
+
+if host_cpu == 'x86_64'
+
+ prog = '''
+#include <immintrin.h>
+#include <stdint.h>
+#if defined(__has_attribute) && __has_attribute (target)
+__attribute__((target("avx2")))
+#endif
+int main(void)
+{
+ const char buf[sizeof(__m256i)];
+ __m256i accum = _mm256_loadu_si256((const __m256i *) buf);
+ accum = _mm256_add_epi32(accum, accum);
+ int result = _mm256_extract_epi32(accum, 0);
+ return (int) result;
+}
+'''
+
+ if cc.links(prog, name: 'AVX2 support', args: test_c_args)
+ cdata.set('USE_AVX2_WITH_RUNTIME_CHECK', 1)
+ endif
+
+endif
+
###############################################################
# Check for the availability of AVX-512 popcount intrinsics.
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 726a7c1be1f..34fa398ee8c 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -672,6 +672,9 @@
/* Define to 1 to use AVX-512 CRC algorithms with a runtime check. */
#undef USE_AVX512_CRC32C_WITH_RUNTIME_CHECK
+/* Define to 1 to use AVX2 instructions with a runtime check. */
+#undef USE_AVX2_WITH_RUNTIME_CHECK
+
/* Define to 1 to use AVX-512 popcount instructions with a runtime check. */
#undef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
diff --git a/src/include/storage/checksum_impl.h b/src/include/storage/checksum_impl.h
index da87d61ba52..5ea1f698b57 100644
--- a/src/include/storage/checksum_impl.h
+++ b/src/include/storage/checksum_impl.h
@@ -101,12 +101,83 @@
*/
#include "storage/bufpage.h"
+#include "pg_config.h"
/* number of checksums to calculate in parallel */
#define N_SUMS 32
/* prime multiplier of FNV-1a hash */
#define FNV_PRIME 16777619
+#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT)
+#include <cpuid.h>
+#endif
+
+#include <immintrin.h>
+
+#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX)
+#include <intrin.h>
+#endif
+
+/*
+ * Does CPUID say there's support for XSAVE instructions?
+ */
+static inline bool
+xsave_available(void)
+{
+ unsigned int exx[4] = {0, 0, 0, 0};
+
+#if defined(HAVE__GET_CPUID)
+ __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUID)
+ __cpuid(exx, 1);
+#else
+#error cpuid instruction not available
+#endif
+ return (exx[2] & (1 << 27)) != 0; /* osxsave */
+}
+
+/*
+ * Does XGETBV say the YMM registers are enabled?
+ *
+ * NB: Caller is responsible for verifying that xsave_available() returns true
+ * before calling this.
+ */
+#ifdef HAVE_XSAVE_INTRINSICS
+pg_attribute_target("xsave")
+#endif
+static inline bool
+ymm_regs_available(void)
+{
+#ifdef HAVE_XSAVE_INTRINSICS
+ return (_xgetbv(0) & 0x06) == 0x06;
+#else
+ return false;
+#endif
+}
+
+/*
+ * Does CPUID say there's support for AVX-2
+ */
+static inline bool
+avx2_available(void)
+{
+#ifdef USE_AVX2_WITH_RUNTIME_CHECK
+ unsigned int exx[4] = {0, 0, 0, 0};
+ if (!xsave_available() || !ymm_regs_available()) return false;
+
+#if defined(HAVE__GET_CPUID_COUNT)
+ __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUIDEX)
+ __cpuidex(exx, 7, 0);
+#else
+#error cpuid instruction not available
+#endif
+ return (exx[1] & (1 << 5)) != 0; /* avx2 */
+#else
+ return false;
+#endif
+}
+
/* Use a union so that this code is valid under strict aliasing */
typedef union
{
@@ -142,35 +213,68 @@ do { \
* Block checksum algorithm. The page must be adequately aligned
* (at least on 4-byte boundary).
*/
-static uint32
-pg_checksum_block(const PGChecksummablePage *page)
-{
- uint32 sums[N_SUMS];
- uint32 result = 0;
- uint32 i,
- j;
+
+#define PG_DECLARE_CHECKSUM_ISA(ISANAME) \
+static uint32 \
+pg_checksum_block_##ISANAME(const PGChecksummablePage *page);
+
+#define PG_DEFINE_CHECKSUM_ISA(ISANAME) \
+pg_attribute_target(#ISANAME) \
+static uint32 \
+pg_checksum_block_##ISANAME(const PGChecksummablePage *page) \
+{ \
+ uint32 sums[N_SUMS]; \
+ uint32 result = 0; \
+ uint32 i, \
+ j; \
+ \
+ /* ensure that the size is compatible with the algorithm */ \
+ Assert(sizeof(PGChecksummablePage) == BLCKSZ); \
+ \
+ /* initialize partial checksums to their corresponding offsets */ \
+ memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets)); \
+ \
+ /* main checksum calculation */ \
+ /* this is the main place that autovectorization occurs */ \
+ for (i = 0; i < (uint32) (BLCKSZ / (sizeof(uint32) * N_SUMS)); i++) \
+ for (j = 0; j < N_SUMS; j++) \
+ CHECKSUM_COMP(sums[j], page->data[i][j]); \
+ \
+ /* finally add in two rounds of zeroes for additional mixing */ \
+ for (i = 0; i < 2; i++) \
+ for (j = 0; j < N_SUMS; j++) \
+ CHECKSUM_COMP(sums[j], 0); \
+ \
+ /* xor fold partial checksums together */ \
+ for (i = 0; i < N_SUMS; i++) \
+ result ^= sums[i]; \
+ \
+ return result; \
+}
- /* ensure that the size is compatible with the algorithm */
- Assert(sizeof(PGChecksummablePage) == BLCKSZ);
+/* Declarations are always defined to make dynamic dispatch code simpler */
- /* initialize partial checksums to their corresponding offsets */
- memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets));
+PG_DECLARE_CHECKSUM_ISA(default);
+PG_DECLARE_CHECKSUM_ISA(avx2);
- /* main checksum calculation */
- for (i = 0; i < (uint32) (BLCKSZ / (sizeof(uint32) * N_SUMS)); i++)
- for (j = 0; j < N_SUMS; j++)
- CHECKSUM_COMP(sums[j], page->data[i][j]);
+PG_DEFINE_CHECKSUM_ISA(default);
+#ifdef USE_AVX2_WITH_RUNTIME_CHECK
+PG_DEFINE_CHECKSUM_ISA(avx2);
+#endif
- /* finally add in two rounds of zeroes for additional mixing */
- for (i = 0; i < 2; i++)
- for (j = 0; j < N_SUMS; j++)
- CHECKSUM_COMP(sums[j], 0);
+static uint32
+pg_checksum_block_dispatch(const PGChecksummablePage *page);
- /* xor fold partial checksums together */
- for (i = 0; i < N_SUMS; i++)
- result ^= sums[i];
+static uint32 (*pg_checksum_block)(const PGChecksummablePage *page) = pg_checksum_block_dispatch;
- return result;
+static uint32
+pg_checksum_block_dispatch(const PGChecksummablePage *page){
+ if (avx2_available()){
+ pg_checksum_block = pg_checksum_block_avx2;
+ }else{
+ pg_checksum_block = pg_checksum_block_default;
+ }
+ return pg_checksum_block(page);
}
/*
--
2.43.0
Hi,
On Tue, 20 May 2025 at 02:54, Matthew Sterrett
<matthewsterrett2@gmail.com> wrote:
Hello! Thanks for helping me with this.
I'm still trying to figure out what is going on with the Bookworm test
failures. I'm pretty sure this patchset should resolve all the issues
with the macOS build, but I don't think it will help the linux
failures unfortunately.
You can see the failure at the artifacts ->
'log/tmp_install/log/install.log' file on the CI web page [1]https://cirrus-ci.com/task/4834162550505472.
If you want to replicate that on your local:
$ ./configure --with-llvm CLANG="ccache clang-16"
$ make -s -j8 world-bin
$ make -j8 check-world
should be enough. I was able to replicate it with these commands. I
hope these help.
[1]: https://cirrus-ci.com/task/4834162550505472
--
Regards,
Nazir Bilal Yavuz
Microsoft
You can see the failure at the artifacts ->
'log/tmp_install/log/install.log' file on the CI web page [1].If you want to replicate that on your local:
$ ./configure --with-llvm CLANG="ccache clang-16"
$ make -s -j8 world-bin
$ make -j8 check-worldshould be enough. I was able to replicate it with these commands. I
hope these help.
Thanks so much for helping me figure this out!
Okay, I've determined that versions of LLVM/Clang before 19 crash when
compiling this patch for some reason; it seems that both make
check-world and make install will crash with the affected LLVM
versions.
Unfortunately, what matters seems to be the version of the linker/LTO
optimizer, which I don't think we can check at compile time.
I added a check for Clang>=19 which works at preventing the crash on my system.
I think it's possible some unusual combination of clang/LLVM might
still crash during the build, but I think this is a reasonable
solution
Attachments:
v4-0005-Use-dummy-function-to-avoid-linker-error-move-dec.patchapplication/octet-stream; name=v4-0005-Use-dummy-function-to-avoid-linker-error-move-dec.patchDownload
From f7b28c6378db9cc98371e89e830a2ae8a571e9a0 Mon Sep 17 00:00:00 2001
From: Matthew Sterrett <matthew.sterrett@intel.com>
Date: Mon, 19 May 2025 13:23:55 -0700
Subject: [PATCH v4 5/6] Use dummy function to avoid linker error, move
declarations
---
src/include/storage/checksum_impl.h | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
diff --git a/src/include/storage/checksum_impl.h b/src/include/storage/checksum_impl.h
index 042ee8af120..4070646e23e 100644
--- a/src/include/storage/checksum_impl.h
+++ b/src/include/storage/checksum_impl.h
@@ -163,7 +163,7 @@ ymm_regs_available(void)
static inline bool
avx2_available(void)
{
-#ifdef USE_AVX2_WITH_RUNTIME_CHECK
+#if defined (USE_AVX2_WITH_RUNTIME_CHECK) && defined(__x86_64__)
unsigned int exx[4] = {0, 0, 0, 0};
if (!xsave_available() || !ymm_regs_available()) return false;
@@ -220,7 +220,20 @@ do { \
static uint32 \
pg_checksum_block_##ISANAME(const PGChecksummablePage *page);
+#define PG_DEFINE_CHECKSUM_DUMMY(ISANAME) \
+static uint32 \
+pg_checksum_block_##ISANAME(const PGChecksummablePage *page); \
+pg_attribute_target(#ISANAME) \
+static uint32 \
+pg_checksum_block_##ISANAME(const PGChecksummablePage *page) \
+{ \
+ Assert(false); /* This function should never be called */ \
+ return pg_checksum_block_default(page); /* Just in case it somehow is */ \
+}
+
#define PG_DEFINE_CHECKSUM_ISA(ISANAME) \
+static uint32 \
+pg_checksum_block_##ISANAME(const PGChecksummablePage *page); \
pg_attribute_target(#ISANAME) \
static uint32 \
pg_checksum_block_##ISANAME(const PGChecksummablePage *page) \
@@ -254,10 +267,11 @@ pg_checksum_block_##ISANAME(const PGChecksummablePage *page) \
return result; \
}
-/* Declarations are always defined to make dynamic dispatch code simpler */
PG_DEFINE_CHECKSUM_ISA(default);
#ifdef USE_AVX2_WITH_RUNTIME_CHECK
PG_DEFINE_CHECKSUM_ISA(avx2);
+#else
+PG_DEFINE_CHECKSUM_DUMMY(avx2);
#endif
static uint32
--
2.43.0
v4-0004-fix-bench-compiling.patchapplication/octet-stream; name=v4-0004-fix-bench-compiling.patchDownload
From 78873aec488678accbed1c4d1fc7ce15d12df303 Mon Sep 17 00:00:00 2001
From: Stepan Neretin <slpmcf@gmail.com>
Date: Sat, 10 May 2025 17:57:47 +0700
Subject: [PATCH v4 4/6] fix bench compiling
---
contrib/pg_checksum_bench/pg_checksum_bench.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/contrib/pg_checksum_bench/pg_checksum_bench.c b/contrib/pg_checksum_bench/pg_checksum_bench.c
index f40f335ff59..0296dcfd259 100644
--- a/contrib/pg_checksum_bench/pg_checksum_bench.c
+++ b/contrib/pg_checksum_bench/pg_checksum_bench.c
@@ -1,5 +1,6 @@
#include "postgres.h"
#include "fmgr.h"
+#include "storage/checksum.h"
#include "storage/checksum_impl.h"
#include <stdio.h>
--
2.43.0
v4-0002-Fix-compilation-on-systems-where-immintrin.h-is-n.patchapplication/octet-stream; name=v4-0002-Fix-compilation-on-systems-where-immintrin.h-is-n.patchDownload
From 30d916546860d3d19f8d91a0ab4bc99f9c350aaa Mon Sep 17 00:00:00 2001
From: Stepan Neretin <slpmcf@gmail.com>
Date: Sat, 10 May 2025 16:37:13 +0700
Subject: [PATCH v4 2/6] Fix compilation on systems where <immintrin.h> is not
available or inappropriate, such as older GCC versions or non-x86 platforms.
---
src/include/storage/checksum_impl.h | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/src/include/storage/checksum_impl.h b/src/include/storage/checksum_impl.h
index 5ea1f698b57..042ee8af120 100644
--- a/src/include/storage/checksum_impl.h
+++ b/src/include/storage/checksum_impl.h
@@ -112,7 +112,9 @@
#include <cpuid.h>
#endif
+#ifdef HAVE_XSAVE_INTRINSICS
#include <immintrin.h>
+#endif
#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX)
#include <intrin.h>
@@ -253,10 +255,6 @@ pg_checksum_block_##ISANAME(const PGChecksummablePage *page) \
}
/* Declarations are always defined to make dynamic dispatch code simpler */
-
-PG_DECLARE_CHECKSUM_ISA(default);
-PG_DECLARE_CHECKSUM_ISA(avx2);
-
PG_DEFINE_CHECKSUM_ISA(default);
#ifdef USE_AVX2_WITH_RUNTIME_CHECK
PG_DEFINE_CHECKSUM_ISA(avx2);
--
2.43.0
v4-0001-Enable-autovectorizing-pg_checksum_block.patchapplication/octet-stream; name=v4-0001-Enable-autovectorizing-pg_checksum_block.patchDownload
From 58122ebe1bbebc70da90c7ec3e77f3c3d524aa85 Mon Sep 17 00:00:00 2001
From: Matthew Sterrett <matthew.sterrett@intel.com>
Date: Fri, 7 Mar 2025 11:33:45 -0800
Subject: [PATCH v4 1/6] Enable autovectorizing pg_checksum_block
---
config/c-compiler.m4 | 31 ++++++
configure | 52 ++++++++++
configure.ac | 9 ++
meson.build | 28 ++++++
src/include/pg_config.h.in | 3 +
src/include/storage/checksum_impl.h | 150 +++++++++++++++++++++++-----
6 files changed, 250 insertions(+), 23 deletions(-)
diff --git a/config/c-compiler.m4 b/config/c-compiler.m4
index 5f3e1d1faf9..4d5bceafe9e 100644
--- a/config/c-compiler.m4
+++ b/config/c-compiler.m4
@@ -710,6 +710,37 @@ fi
undefine([Ac_cachevar])dnl
])# PGAC_XSAVE_INTRINSICS
+# PGAC_AVX2_SUPPORT
+# -----------------------------
+# Check if the compiler supports AVX2 in attribute((target))
+# and using AVX2 intrinsics in those functions
+#
+# If the intrinsics are supported, sets pgac_avx2_support.
+AC_DEFUN([PGAC_AVX2_SUPPORT],
+[define([Ac_cachevar], [AS_TR_SH([pgac_cv_avx2_support])])dnl
+AC_CACHE_CHECK([for AVX2 support], [Ac_cachevar],
+[AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <immintrin.h>
+ #include <stdint.h>
+ #if defined(__has_attribute) && __has_attribute (target)
+ __attribute__((target("avx2")))
+ #endif
+ static int avx2_test(void)
+ {
+ const char buf@<:@sizeof(__m256i)@:>@;
+ __m256i accum = _mm256_loadu_si256((const __m256i *) buf);
+ accum = _mm256_add_epi32(accum, accum);
+ int result = _mm256_extract_epi32(accum, 0);
+ return (int) result;
+ }],
+ [return avx2_test();])],
+ [Ac_cachevar=yes],
+ [Ac_cachevar=no])])
+if test x"$Ac_cachevar" = x"yes"; then
+ pgac_avx2_support=yes
+fi
+undefine([Ac_cachevar])dnl
+])# PGAC_AVX2_SUPPORT
+
# PGAC_AVX512_POPCNT_INTRINSICS
# -----------------------------
# Check if the compiler supports the AVX-512 popcount instructions using the
diff --git a/configure b/configure
index 4f15347cc95..0b328dc22db 100755
--- a/configure
+++ b/configure
@@ -17724,6 +17724,58 @@ $as_echo "#define HAVE_XSAVE_INTRINSICS 1" >>confdefs.h
fi
+# Check for AVX2 target and intrinsic support
+#
+if test x"$host_cpu" = x"x86_64"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX2 support" >&5
+$as_echo_n "checking for AVX2 support... " >&6; }
+if ${pgac_cv_avx2_support+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <immintrin.h>
+ #include <stdint.h>
+ #if defined(__has_attribute) && __has_attribute (target)
+ __attribute__((target("avx2")))
+ #endif
+ static int avx2_test(void)
+ {
+ const char buf[sizeof(__m256i)];
+ __m256i accum = _mm256_loadu_si256((const __m256i *) buf);
+ accum = _mm256_add_epi32(accum, accum);
+ int result = _mm256_extract_epi32(accum, 0);
+ return (int) result;
+ }
+int
+main ()
+{
+return avx2_test();
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ pgac_cv_avx2_support=yes
+else
+ pgac_cv_avx2_support=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_avx2_support" >&5
+$as_echo "$pgac_cv_avx2_support" >&6; }
+if test x"$pgac_cv_avx2_support" = x"yes"; then
+ pgac_avx2_support=yes
+fi
+
+ if test x"$pgac_avx2_support" = x"yes"; then
+
+$as_echo "#define USE_AVX2_WITH_RUNTIME_CHECK 1" >>confdefs.h
+
+ fi
+fi
+
# Check for AVX-512 popcount intrinsics
#
if test x"$host_cpu" = x"x86_64"; then
diff --git a/configure.ac b/configure.ac
index 4b8335dc613..b980429282f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2089,6 +2089,15 @@ if test x"$pgac_xsave_intrinsics" = x"yes"; then
AC_DEFINE(HAVE_XSAVE_INTRINSICS, 1, [Define to 1 if you have XSAVE intrinsics.])
fi
+# Check for AVX2 target and intrinsic support
+#
+if test x"$host_cpu" = x"x86_64"; then
+ PGAC_AVX2_SUPPORT()
+ if test x"$pgac_avx2_support" = x"yes"; then
+ AC_DEFINE(USE_AVX2_WITH_RUNTIME_CHECK, 1, [Define to 1 to use AVX2 instructions with a runtime check.])
+ fi
+fi
+
# Check for AVX-512 popcount intrinsics
#
if test x"$host_cpu" = x"x86_64"; then
diff --git a/meson.build b/meson.build
index d142e3e408b..e0ab1f9cf2f 100644
--- a/meson.build
+++ b/meson.build
@@ -2301,6 +2301,34 @@ int main(void)
endif
+###############################################################
+# Check for the availability of AVX2 support
+###############################################################
+
+if host_cpu == 'x86_64'
+
+ prog = '''
+#include <immintrin.h>
+#include <stdint.h>
+#if defined(__has_attribute) && __has_attribute (target)
+__attribute__((target("avx2")))
+#endif
+int main(void)
+{
+ const char buf[sizeof(__m256i)];
+ __m256i accum = _mm256_loadu_si256((const __m256i *) buf);
+ accum = _mm256_add_epi32(accum, accum);
+ int result = _mm256_extract_epi32(accum, 0);
+ return (int) result;
+}
+'''
+
+ if cc.links(prog, name: 'AVX2 support', args: test_c_args)
+ cdata.set('USE_AVX2_WITH_RUNTIME_CHECK', 1)
+ endif
+
+endif
+
###############################################################
# Check for the availability of AVX-512 popcount intrinsics.
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 726a7c1be1f..34fa398ee8c 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -672,6 +672,9 @@
/* Define to 1 to use AVX-512 CRC algorithms with a runtime check. */
#undef USE_AVX512_CRC32C_WITH_RUNTIME_CHECK
+/* Define to 1 to use AVX2 instructions with a runtime check. */
+#undef USE_AVX2_WITH_RUNTIME_CHECK
+
/* Define to 1 to use AVX-512 popcount instructions with a runtime check. */
#undef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
diff --git a/src/include/storage/checksum_impl.h b/src/include/storage/checksum_impl.h
index da87d61ba52..5ea1f698b57 100644
--- a/src/include/storage/checksum_impl.h
+++ b/src/include/storage/checksum_impl.h
@@ -101,12 +101,83 @@
*/
#include "storage/bufpage.h"
+#include "pg_config.h"
/* number of checksums to calculate in parallel */
#define N_SUMS 32
/* prime multiplier of FNV-1a hash */
#define FNV_PRIME 16777619
+#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT)
+#include <cpuid.h>
+#endif
+
+#include <immintrin.h>
+
+#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX)
+#include <intrin.h>
+#endif
+
+/*
+ * Does CPUID say there's support for XSAVE instructions?
+ */
+static inline bool
+xsave_available(void)
+{
+ unsigned int exx[4] = {0, 0, 0, 0};
+
+#if defined(HAVE__GET_CPUID)
+ __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUID)
+ __cpuid(exx, 1);
+#else
+#error cpuid instruction not available
+#endif
+ return (exx[2] & (1 << 27)) != 0; /* osxsave */
+}
+
+/*
+ * Does XGETBV say the YMM registers are enabled?
+ *
+ * NB: Caller is responsible for verifying that xsave_available() returns true
+ * before calling this.
+ */
+#ifdef HAVE_XSAVE_INTRINSICS
+pg_attribute_target("xsave")
+#endif
+static inline bool
+ymm_regs_available(void)
+{
+#ifdef HAVE_XSAVE_INTRINSICS
+ return (_xgetbv(0) & 0x06) == 0x06;
+#else
+ return false;
+#endif
+}
+
+/*
+ * Does CPUID say there's support for AVX-2
+ */
+static inline bool
+avx2_available(void)
+{
+#ifdef USE_AVX2_WITH_RUNTIME_CHECK
+ unsigned int exx[4] = {0, 0, 0, 0};
+ if (!xsave_available() || !ymm_regs_available()) return false;
+
+#if defined(HAVE__GET_CPUID_COUNT)
+ __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUIDEX)
+ __cpuidex(exx, 7, 0);
+#else
+#error cpuid instruction not available
+#endif
+ return (exx[1] & (1 << 5)) != 0; /* avx2 */
+#else
+ return false;
+#endif
+}
+
/* Use a union so that this code is valid under strict aliasing */
typedef union
{
@@ -142,35 +213,68 @@ do { \
* Block checksum algorithm. The page must be adequately aligned
* (at least on 4-byte boundary).
*/
-static uint32
-pg_checksum_block(const PGChecksummablePage *page)
-{
- uint32 sums[N_SUMS];
- uint32 result = 0;
- uint32 i,
- j;
+
+#define PG_DECLARE_CHECKSUM_ISA(ISANAME) \
+static uint32 \
+pg_checksum_block_##ISANAME(const PGChecksummablePage *page);
+
+#define PG_DEFINE_CHECKSUM_ISA(ISANAME) \
+pg_attribute_target(#ISANAME) \
+static uint32 \
+pg_checksum_block_##ISANAME(const PGChecksummablePage *page) \
+{ \
+ uint32 sums[N_SUMS]; \
+ uint32 result = 0; \
+ uint32 i, \
+ j; \
+ \
+ /* ensure that the size is compatible with the algorithm */ \
+ Assert(sizeof(PGChecksummablePage) == BLCKSZ); \
+ \
+ /* initialize partial checksums to their corresponding offsets */ \
+ memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets)); \
+ \
+ /* main checksum calculation */ \
+ /* this is the main place that autovectorization occurs */ \
+ for (i = 0; i < (uint32) (BLCKSZ / (sizeof(uint32) * N_SUMS)); i++) \
+ for (j = 0; j < N_SUMS; j++) \
+ CHECKSUM_COMP(sums[j], page->data[i][j]); \
+ \
+ /* finally add in two rounds of zeroes for additional mixing */ \
+ for (i = 0; i < 2; i++) \
+ for (j = 0; j < N_SUMS; j++) \
+ CHECKSUM_COMP(sums[j], 0); \
+ \
+ /* xor fold partial checksums together */ \
+ for (i = 0; i < N_SUMS; i++) \
+ result ^= sums[i]; \
+ \
+ return result; \
+}
- /* ensure that the size is compatible with the algorithm */
- Assert(sizeof(PGChecksummablePage) == BLCKSZ);
+/* Declarations are always defined to make dynamic dispatch code simpler */
- /* initialize partial checksums to their corresponding offsets */
- memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets));
+PG_DECLARE_CHECKSUM_ISA(default);
+PG_DECLARE_CHECKSUM_ISA(avx2);
- /* main checksum calculation */
- for (i = 0; i < (uint32) (BLCKSZ / (sizeof(uint32) * N_SUMS)); i++)
- for (j = 0; j < N_SUMS; j++)
- CHECKSUM_COMP(sums[j], page->data[i][j]);
+PG_DEFINE_CHECKSUM_ISA(default);
+#ifdef USE_AVX2_WITH_RUNTIME_CHECK
+PG_DEFINE_CHECKSUM_ISA(avx2);
+#endif
- /* finally add in two rounds of zeroes for additional mixing */
- for (i = 0; i < 2; i++)
- for (j = 0; j < N_SUMS; j++)
- CHECKSUM_COMP(sums[j], 0);
+static uint32
+pg_checksum_block_dispatch(const PGChecksummablePage *page);
- /* xor fold partial checksums together */
- for (i = 0; i < N_SUMS; i++)
- result ^= sums[i];
+static uint32 (*pg_checksum_block)(const PGChecksummablePage *page) = pg_checksum_block_dispatch;
- return result;
+static uint32
+pg_checksum_block_dispatch(const PGChecksummablePage *page){
+ if (avx2_available()){
+ pg_checksum_block = pg_checksum_block_avx2;
+ }else{
+ pg_checksum_block = pg_checksum_block_default;
+ }
+ return pg_checksum_block(page);
}
/*
--
2.43.0
v4-0003-Benchmark-code-for-postgres-checksums.patchapplication/octet-stream; name=v4-0003-Benchmark-code-for-postgres-checksums.patchDownload
From cd6dc517b897faf3d960fafe8ea6b2cf9bbc2e05 Mon Sep 17 00:00:00 2001
From: Matthew Sterrett <matthew.sterrett@intel.com>
Date: Fri, 7 Mar 2025 11:33:27 -0800
Subject: [PATCH v4 3/6] Benchmark code for postgres checksums
---
contrib/meson.build | 1 +
contrib/pg_checksum_bench/meson.build | 23 +++++++++++++
.../pg_checksum_bench--1.0.sql | 8 +++++
contrib/pg_checksum_bench/pg_checksum_bench.c | 34 +++++++++++++++++++
.../pg_checksum_bench.control | 4 +++
.../sql/pg_checksum_bench.sql | 17 ++++++++++
6 files changed, 87 insertions(+)
create mode 100644 contrib/pg_checksum_bench/meson.build
create mode 100644 contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql
create mode 100644 contrib/pg_checksum_bench/pg_checksum_bench.c
create mode 100644 contrib/pg_checksum_bench/pg_checksum_bench.control
create mode 100644 contrib/pg_checksum_bench/sql/pg_checksum_bench.sql
diff --git a/contrib/meson.build b/contrib/meson.build
index ed30ee7d639..fe5149aadff 100644
--- a/contrib/meson.build
+++ b/contrib/meson.build
@@ -12,6 +12,7 @@ contrib_doc_args = {
'install_dir': contrib_doc_dir,
}
+subdir('pg_checksum_bench')
subdir('amcheck')
subdir('auth_delay')
subdir('auto_explain')
diff --git a/contrib/pg_checksum_bench/meson.build b/contrib/pg_checksum_bench/meson.build
new file mode 100644
index 00000000000..32ccd9efa0f
--- /dev/null
+++ b/contrib/pg_checksum_bench/meson.build
@@ -0,0 +1,23 @@
+# Copyright (c) 2022-2025, PostgreSQL Global Development Group
+
+pg_checksum_bench_sources = files(
+ 'pg_checksum_bench.c',
+)
+
+if host_system == 'windows'
+ pg_checksum_bench_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+ '--NAME', 'pg_checksum_bench',
+ '--FILEDESC', 'pg_checksum_bench',])
+endif
+
+pg_checksum_bench = shared_module('pg_checksum_bench',
+ pg_checksum_bench_sources,
+ kwargs: contrib_mod_args,
+)
+contrib_targets += pg_checksum_bench
+
+install_data(
+ 'pg_checksum_bench--1.0.sql',
+ 'pg_checksum_bench.control',
+ kwargs: contrib_data_args,
+)
diff --git a/contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql b/contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql
new file mode 100644
index 00000000000..5f13cbe3c5e
--- /dev/null
+++ b/contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql
@@ -0,0 +1,8 @@
+/* contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+-- \echo Use "CREATE EXTENSION pg_checksum_bench" to load this file. \quit
+
+CREATE FUNCTION drive_pg_checksum(page_count int)
+ RETURNS pg_catalog.void
+ AS 'MODULE_PATHNAME' LANGUAGE C;
diff --git a/contrib/pg_checksum_bench/pg_checksum_bench.c b/contrib/pg_checksum_bench/pg_checksum_bench.c
new file mode 100644
index 00000000000..f40f335ff59
--- /dev/null
+++ b/contrib/pg_checksum_bench/pg_checksum_bench.c
@@ -0,0 +1,34 @@
+#include "postgres.h"
+#include "fmgr.h"
+#include "storage/checksum_impl.h"
+
+#include <stdio.h>
+#include <assert.h>
+
+PG_MODULE_MAGIC;
+
+#define REPEATS 1000000
+
+PG_FUNCTION_INFO_V1(drive_pg_checksum);
+Datum
+drive_pg_checksum(PG_FUNCTION_ARGS)
+{
+ int page_count = PG_GETARG_INT32(0);
+
+ PGChecksummablePage * pages = palloc(page_count * sizeof(PGChecksummablePage));
+ srand(0);
+ for (size_t i = 0; i < page_count * sizeof(PGChecksummablePage); i++){
+ char * byte_ptr = (char *) pages;
+ byte_ptr[i] = rand() % 256;
+ }
+
+ for (int i = 0; i < REPEATS; i++){
+ const PGChecksummablePage * test_page = pages + (i % page_count);
+ volatile uint32 result = pg_checksum_block(test_page);
+ (void) result;
+ }
+
+ pfree((void *) pages);
+
+ PG_RETURN_VOID();
+}
diff --git a/contrib/pg_checksum_bench/pg_checksum_bench.control b/contrib/pg_checksum_bench/pg_checksum_bench.control
new file mode 100644
index 00000000000..4a4e2c9363c
--- /dev/null
+++ b/contrib/pg_checksum_bench/pg_checksum_bench.control
@@ -0,0 +1,4 @@
+comment = 'pg_checksum benchmark'
+default_version = '1.0'
+module_pathname = '$libdir/pg_checksum_bench'
+relocatable = true
diff --git a/contrib/pg_checksum_bench/sql/pg_checksum_bench.sql b/contrib/pg_checksum_bench/sql/pg_checksum_bench.sql
new file mode 100644
index 00000000000..4b347699953
--- /dev/null
+++ b/contrib/pg_checksum_bench/sql/pg_checksum_bench.sql
@@ -0,0 +1,17 @@
+CREATE EXTENSION pg_checksum_bench;
+
+SELECT drive_pg_checksum(-1);
+
+\timing on
+
+SELECT drive_pg_checksum(1);
+SELECT drive_pg_checksum(2);
+SELECT drive_pg_checksum(4);
+SELECT drive_pg_checksum(8);
+SELECT drive_pg_checksum(16);
+SELECT drive_pg_checksum(32);
+SELECT drive_pg_checksum(64);
+SELECT drive_pg_checksum(128);
+SELECT drive_pg_checksum(256);
+SELECT drive_pg_checksum(512);
+SELECT drive_pg_checksum(1024);
--
2.43.0
v4-0006-Workaround-for-clang-19-crash.patchapplication/octet-stream; name=v4-0006-Workaround-for-clang-19-crash.patchDownload
From 5fbb374a397d22febcee97b08564967afaacf37e Mon Sep 17 00:00:00 2001
From: Matthew Sterrett <matthew.sterrett@intel.com>
Date: Wed, 21 May 2025 16:00:22 -0700
Subject: [PATCH v4 6/6] Workaround for clang<19 crash
---
src/include/storage/checksum_impl.h | 19 +++++++++++++------
1 file changed, 13 insertions(+), 6 deletions(-)
diff --git a/src/include/storage/checksum_impl.h b/src/include/storage/checksum_impl.h
index 4070646e23e..81dfecff17d 100644
--- a/src/include/storage/checksum_impl.h
+++ b/src/include/storage/checksum_impl.h
@@ -132,7 +132,7 @@ xsave_available(void)
__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
#elif defined(HAVE__CPUID)
__cpuid(exx, 1);
-#else
+#elif defined(__x86_64__)
#error cpuid instruction not available
#endif
return (exx[2] & (1 << 27)) != 0; /* osxsave */
@@ -215,15 +215,10 @@ do { \
* Block checksum algorithm. The page must be adequately aligned
* (at least on 4-byte boundary).
*/
-
-#define PG_DECLARE_CHECKSUM_ISA(ISANAME) \
-static uint32 \
-pg_checksum_block_##ISANAME(const PGChecksummablePage *page);
#define PG_DEFINE_CHECKSUM_DUMMY(ISANAME) \
static uint32 \
pg_checksum_block_##ISANAME(const PGChecksummablePage *page); \
-pg_attribute_target(#ISANAME) \
static uint32 \
pg_checksum_block_##ISANAME(const PGChecksummablePage *page) \
{ \
@@ -267,6 +262,9 @@ pg_checksum_block_##ISANAME(const PGChecksummablePage *page) \
return result; \
}
+/* Older Clang versions crash during LTO with this code */
+#if !(__clang_major__) || __clang_major__ >= 19
+
PG_DEFINE_CHECKSUM_ISA(default);
#ifdef USE_AVX2_WITH_RUNTIME_CHECK
PG_DEFINE_CHECKSUM_ISA(avx2);
@@ -289,6 +287,15 @@ pg_checksum_block_dispatch(const PGChecksummablePage *page){
return pg_checksum_block(page);
}
+#else
+/* The same as before the patch, for the crashing Clang versions */
+PG_DEFINE_CHECKSUM_ISA(default);
+static uint32 pg_checksum_block(const PGChecksummablePage *page) {
+ return pg_checksum_block_default(page);
+}
+
+#endif
+
/*
* Compute the checksum for a Postgres page.
*
--
2.43.0
On Fri, May 23, 2025 at 4:54 AM Matthew Sterrett
<matthewsterrett2@gmail.com> wrote:
Okay, I've determined that versions of LLVM/Clang before 19 crash when
compiling this patch for some reason; it seems that both make
check-world and make install will crash with the affected LLVM
versions.
Unfortunately, what matters seems to be the version of the linker/LTO
optimizer, which I don't think we can check at compile time.
I added a check for Clang>=19 which works at preventing the crash on my system.
I think it's possible some unusual combination of clang/LLVM might
still crash during the build, but I think this is a reasonable
solution
I don't know if this is related to the crashes, but it doesn't seem
like a good idea to #include the function pointer stuff everywhere,
that should probably go into src/port like the others.
--
John Naylor
Amazon Web Services