From d816f92e1c290c771bfd323ffba6f33319a4eb72 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Wed, 12 Nov 2025 14:31:24 +0700
Subject: [PATCH v4 4/4] Detect common prefix to avoid wasted work during radix
 sort

This is particularly useful for integers, since they commonly
have some zero upper bytes.
---
 src/backend/utils/sort/tuplesort.c | 53 +++++++++++++++++++++++++++++-
 1 file changed, 52 insertions(+), 1 deletion(-)

diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 3efc22de24e..68fa9c13ca8 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -104,6 +104,7 @@
 #include "commands/tablespace.h"
 #include "miscadmin.h"
 #include "pg_trace.h"
+#include "port/pg_bitutils.h"
 #include "storage/shmem.h"
 #include "utils/guc.h"
 #include "utils/memutils.h"
@@ -2980,9 +2981,59 @@ sort_byvalue_datum(Tuplesortstate *state)
 		}
 		else
 		{
+			int			common_prefix;
+			Datum		first_datum = 0;
+			Datum		common_upper_bits = 0;
+
+			/*
+			 * Compute the common prefix to skip unproductive recursion steps
+			 * during radix sort.
+			 */
+			for (SortTuple *tup = not_null_start;
+				 tup < not_null_start + not_null_count;
+				 tup++)
+			{
+				Datum		this_datum = tup->datum1;
+
+				if (tup == not_null_start)
+				{
+					/*
+					 * Need to start with some value, may as well be the first
+					 * one.
+					 */
+					first_datum = this_datum;
+				}
+				else
+				{
+					Datum		this_common_bits;
+
+					/* The bits in common will be zero */
+					this_common_bits = first_datum ^ this_datum;
+
+					/*
+					 * We're really only interested in the case where the
+					 * leftmost one bit is further left, but this branch
+					 * should be predictable enough not to waste cycles trying
+					 * harder.
+					 */
+					if (this_common_bits > common_upper_bits)
+						common_upper_bits = this_common_bits;
+				}
+			}
+
+			/*
+			 * The upper bits of common_upper_bits are zero where all values
+			 * have the same bits. The byte position of the leftmost one bit
+			 * is the byte where radix sort should start bucketing. OR-ing in
+			 * the lowest bit guards against undefined behavior without
+			 * changing the result.
+			 */
+			common_prefix = sizeof(Datum) - 1 -
+				(pg_leftmost_one_pos64(common_upper_bits | 1) / BITS_PER_BYTE);
+
 			radix_sort_tuple(not_null_start,
 							 not_null_count,
-							 0,
+							 common_prefix,
 							 state);
 		}
 	}
-- 
2.51.1

