From 82c9535e6b5b851020a1260bea05cbd5c104b0a5 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Fri, 8 Nov 2024 14:19:59 +0900
Subject: [PATCH v9] Optimize pg_memory_is_all_zeros()

pg_memory_is_all_zeros() is currently doing byte per byte comparison and so
could lead to performance regression or penalties when multi bytes comparison
could be done instead.

Let's provide an optimized version that divides the checks into four phases for
efficiency:

- Initial alignment (byte per byte comparison)
- Compare 8 size_t chunks at once using bitwise OR (candidate for SIMD optimization)
- Compare remaining size_t aligned chunks
- Compare remaining bytes (byte per byte comparison)

Code mainly suggested by David Rowley.

In passing, let's make use of this new version in PageIsVerifiedExtended() now
that multi bytes comparison is handled.
---
 src/include/utils/memutils.h       | 57 ++++++++++++++++++++++++++++--
 src/backend/storage/page/bufpage.c | 13 +------
 2 files changed, 55 insertions(+), 15 deletions(-)

diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h
index 3590c8bad9..e47747c38b 100644
--- a/src/include/utils/memutils.h
+++ b/src/include/utils/memutils.h
@@ -190,19 +190,70 @@ extern MemoryContext BumpContextCreate(MemoryContext parent,
 #define SLAB_LARGE_BLOCK_SIZE		(8 * 1024 * 1024)
 
 /*
+ * pg_memory_is_all_zeros
+ *
  * Test if a memory region starting at "ptr" and of size "len" is full of
  * zeroes.
+ *
+ * The test is divided into multiple phases, to be efficient for various
+ * length values:
+ * - Byte comparison, until the pointer is aligned.
+ * - 8 * sizeof(size_t) comparisons using bitwise OR, to encourage compilers
+ *   to use SIMD intrinsics if available, up to the last aligned location
+ *   possible.
+ * - size_t comparisons, with aligned pointers, up to the last location
+ *   possible.
+ * - Byte comparison, until the end location.
+ *
+ * Caller must ensure that "ptr" is not NULL.
  */
 static inline bool
 pg_memory_is_all_zeros(const void *ptr, size_t len)
 {
-	const char *p = (const char *) ptr;
+	const unsigned char *p = (const unsigned char *) ptr;
+	const unsigned char *end = &p[len];
+	const unsigned char *aligned_end = (const unsigned char *)
+		((uintptr_t) end & (~(sizeof(size_t) - 1)));
 
-	for (size_t i = 0; i < len; i++)
+	/* Compare bytes until the pointer "p" is aligned */
+	while (((uintptr_t) p & (sizeof(size_t) - 1)) != 0)
 	{
-		if (p[i] != 0)
+		if (p == end)
+			return true;
+
+		if (*p++ != 0)
 			return false;
 	}
+
+	/*
+	 * Compare 8 * sizeof(size_t) chunks at once.
+	 *
+	 * All comparisons are combined with a single OR operation, making it a
+	 * good candidate for SIMD intrinsics, if available.
+	 */
+	for (; p < aligned_end - (sizeof(size_t) * 7); p += sizeof(size_t) * 8)
+	{
+		if ((((size_t *) p)[0] != 0) | (((size_t *) p)[1] != 0) |
+			(((size_t *) p)[2] != 0) | (((size_t *) p)[3] != 0) |
+			(((size_t *) p)[4] != 0) | (((size_t *) p)[5] != 0) |
+			(((size_t *) p)[6] != 0) | (((size_t *) p)[7] != 0))
+			return false;
+	}
+
+	/* Compare remaining size_t-aligned chunks */
+	for (; p < aligned_end; p += sizeof(size_t))
+	{
+		if (*(size_t *) p != 0)
+			return false;
+	}
+
+	/* Compare remaining bytes until the end */
+	while (p < end)
+	{
+		if (*p++ != 0)
+			return false;
+	}
+
 	return true;
 }
 
diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c
index be6f1f62d2..aa264f61b9 100644
--- a/src/backend/storage/page/bufpage.c
+++ b/src/backend/storage/page/bufpage.c
@@ -89,10 +89,8 @@ PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags)
 {
 	PageHeader	p = (PageHeader) page;
 	size_t	   *pagebytes;
-	int			i;
 	bool		checksum_failure = false;
 	bool		header_sane = false;
-	bool		all_zeroes = false;
 	uint16		checksum = 0;
 
 	/*
@@ -126,18 +124,9 @@ PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags)
 	}
 
 	/* Check all-zeroes case */
-	all_zeroes = true;
 	pagebytes = (size_t *) page;
-	for (i = 0; i < (BLCKSZ / sizeof(size_t)); i++)
-	{
-		if (pagebytes[i] != 0)
-		{
-			all_zeroes = false;
-			break;
-		}
-	}
 
-	if (all_zeroes)
+	if (pg_memory_is_all_zeros(pagebytes, BLCKSZ))
 		return true;
 
 	/*
-- 
2.45.2

