From 3817435d200af5da954d505ae66245662dea064c Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Fri, 15 Mar 2024 12:26:26 -0500
Subject: [PATCH v3 1/3] pg_lfind32: process "tail" with SIMD intructions

---
 src/include/port/pg_lfind.h | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/include/port/pg_lfind.h b/src/include/port/pg_lfind.h
index b8dfa66eef..9d21284724 100644
--- a/src/include/port/pg_lfind.h
+++ b/src/include/port/pg_lfind.h
@@ -103,7 +103,7 @@ pg_lfind32(uint32 key, uint32 *base, uint32 nelem)
 	const uint32 nelem_per_iteration = 4 * nelem_per_vector;
 
 	/* round down to multiple of elements per iteration */
-	const uint32 tail_idx = nelem & ~(nelem_per_iteration - 1);
+	uint32 tail_idx = nelem & ~(nelem_per_iteration - 1);
 
 #if defined(USE_ASSERT_CHECKING)
 	bool		assert_result = false;
@@ -117,9 +117,11 @@ pg_lfind32(uint32 key, uint32 *base, uint32 nelem)
 			break;
 		}
 	}
+	i = 0;
 #endif
 
-	for (i = 0; i < tail_idx; i += nelem_per_iteration)
+retry:
+	for (; i < tail_idx; i += nelem_per_iteration)
 	{
 		Vector32	vals1,
 					vals2,
@@ -157,6 +159,16 @@ pg_lfind32(uint32 key, uint32 *base, uint32 nelem)
 			return true;
 		}
 	}
+
+	if (i == nelem)
+		return false;
+	else if (tail_idx > 0)
+	{
+		tail_idx = nelem;
+		i = nelem - nelem_per_iteration;
+		goto retry;
+	}
+
 #endif							/* ! USE_NO_SIMD */
 
 	/* Process the remaining elements one at a time. */
-- 
2.25.1

