From 561893beb4e3e008196b3e571685503e25a243f1 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Tue, 8 Aug 2023 12:58:07 +0700
Subject: [PATCH v4 2/2] Some minor adjustemts to be squashed

---
 config/c-compiler.m4           | 4 ++++
 configure                      | 7 +++++--
 configure.ac                   | 7 +++++--
 src/port/pg_crc32c_loongarch.c | 6 +++---
 4 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/config/c-compiler.m4 b/config/c-compiler.m4
index 7777ad6e90..bd3e6d6623 100644
--- a/config/c-compiler.m4
+++ b/config/c-compiler.m4
@@ -669,6 +669,10 @@ undefine([Ac_cachevar])dnl
 # __builtin_loongarch_crcc_w_w_w and __builtin_loongarch_crcc_w_d_w
 # intrinsic functions.
 #
+# We test for the 8-byte variant since platforms capable of running
+# Postgres are 64-bit only (as of PG17), so we know CRC instructions
+# are available without a runtime check.
+#
 # If the intrinsics are supported, sets pgac_loongarch_crc32c_intrinsics.
 AC_DEFUN([PGAC_LOONGARCH_CRC32C_INTRINSICS],
 [define([Ac_cachevar], [AS_TR_SH([pgac_cv_loongarch_crc32c_intrinsics])])dnl
diff --git a/configure b/configure
index fe0b02aa80..6a80e374f1 100755
--- a/configure
+++ b/configure
@@ -18119,8 +18119,11 @@ fi
 # we're not targeting such a processor, but can nevertheless produce code that
 # uses the CRC instructions, compile both, and select at runtime.
 #
-# You can override this logic by setting the appropriate USE_*_CRC32 flag to 1
+# You can skip the runtime check by setting the appropriate USE_*_CRC32 flag to 1
 # in the template or configure command line.
+#
+# If we are targeting a LoongArch processor, CRC instructions are
+# always available (at least on 64 bit), so no runtime check is needed.
 if test x"$USE_SLICING_BY_8_CRC32C" = x"" && test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_ARMV8_CRC32C" = x"" && test x"$USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_LOONGARCH_CRC32C" = x""; then
   # Use Intel SSE 4.2 if available.
   if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && test x"$SSE4_2_TARGETED" = x"1" ; then
@@ -18139,8 +18142,8 @@ if test x"$USE_SLICING_BY_8_CRC32C" = x"" && test x"$USE_SSE42_CRC32C" = x"" &&
         if test x"$pgac_armv8_crc32c_intrinsics" = x"yes"; then
           USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK=1
         else
+          # LoongArch CRCC instructions.
           if test x"$pgac_loongarch_crc32c_intrinsics" = x"yes"; then
-            # LoongArch CRCC instructions.
             USE_LOONGARCH_CRC32C=1
           else
             # fall back to slicing-by-8 algorithm, which doesn't require any
diff --git a/configure.ac b/configure.ac
index 57f0f836c7..6105af6996 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2130,8 +2130,11 @@ AC_SUBST(CFLAGS_CRC)
 # we're not targeting such a processor, but can nevertheless produce code that
 # uses the CRC instructions, compile both, and select at runtime.
 #
-# You can override this logic by setting the appropriate USE_*_CRC32 flag to 1
+# You can skip the runtime check by setting the appropriate USE_*_CRC32 flag to 1
 # in the template or configure command line.
+#
+# If we are targeting a LoongArch processor, CRC instructions are
+# always available (at least on 64 bit), so no runtime check is needed.
 if test x"$USE_SLICING_BY_8_CRC32C" = x"" && test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_ARMV8_CRC32C" = x"" && test x"$USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_LOONGARCH_CRC32C" = x""; then
   # Use Intel SSE 4.2 if available.
   if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && test x"$SSE4_2_TARGETED" = x"1" ; then
@@ -2150,8 +2153,8 @@ if test x"$USE_SLICING_BY_8_CRC32C" = x"" && test x"$USE_SSE42_CRC32C" = x"" &&
         if test x"$pgac_armv8_crc32c_intrinsics" = x"yes"; then
           USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK=1
         else
+          # LoongArch CRCC instructions.
           if test x"$pgac_loongarch_crc32c_intrinsics" = x"yes"; then
-            # LoongArch CRCC instructions.
             USE_LOONGARCH_CRC32C=1
           else
             # fall back to slicing-by-8 algorithm, which doesn't require any
diff --git a/src/port/pg_crc32c_loongarch.c b/src/port/pg_crc32c_loongarch.c
index 2897920800..db9da80e1b 100644
--- a/src/port/pg_crc32c_loongarch.c
+++ b/src/port/pg_crc32c_loongarch.c
@@ -23,9 +23,9 @@ pg_comp_crc32c_loongarch(pg_crc32c crc, const void *data, size_t len)
 	const unsigned char *pend = p + len;
 
 	/*
-	 * Loongarch desktop and server chips support unaligned memory access by default.
-	 * However, aligned memory access is significantly faster.
-	 * Process leading bytes so that the loop below starts with a pointer aligned to eight bytes.
+	 * LoongArch doesn't require alignment, but aligned memory access is
+	 * significantly faster. Process leading bytes so that the loop below
+	 * starts with a pointer aligned to eight bytes.
 	 */
 	if (!PointerIsAligned(p, uint16) &&
 		p + 1 <= pend)
-- 
2.41.0

