From 1c1da11022c64ebb8bd1bec9bdca9783a78c94ee Mon Sep 17 00:00:00 2001
From: Pierre Ducroquet <pinaraf@pinaraf.info>
Date: Fri, 30 Jan 2026 10:35:43 +0100
Subject: [PATCH] llvmjit: reduce the number of jumps generated in O0

When using O0, LLVM doesn't try to change the basic blocks order in
order to have a linear code in memory. LLVM also doesn't remove
jumps even when they target the instruction next to the current one.

Adding an optimizer step in O0 could end up having bad side effects,
so instead of asking LLVM to fix it, we can modify the IR code we
generate in order to get rid of as many jumps as possible.

- EEOP_QUAL was written following the C logic, thus:
	if null or value is false:
		jump to qualfail
	jump to next block
	qualfail:
	....

  By inverting the if, we have instead:
        if !null and value is not false:
		jump to next block
	....

  This is one less jump on amd64 with O0

- change the block creation order in tuple_deforming so that the
  outblock stays at the end of the function, removing a jump back from
  the last attribute to the outblock before
- don't create the adjust_unavail_cols block if not needed
- jump directly above the attisnull and the attcheckalign blocks if
  they are empty

All these together remove 7 jumps on a very basic query, and makes the
generated assembly code far more natural and easier for the CPU.
---
 src/backend/jit/llvm/llvmjit_deform.c | 62 +++++++++++++++++++++------
 src/backend/jit/llvm/llvmjit_expr.c   | 22 +++++-----
 2 files changed, 59 insertions(+), 25 deletions(-)

diff --git a/src/backend/jit/llvm/llvmjit_deform.c b/src/backend/jit/llvm/llvmjit_deform.c
index 3eb087eb56b..9aeff1e4ff5 100644
--- a/src/backend/jit/llvm/llvmjit_deform.c
+++ b/src/backend/jit/llvm/llvmjit_deform.c
@@ -145,14 +145,8 @@ slot_compile_deform(LLVMJitContext *context, TupleDesc desc,
 
 	b_entry =
 		LLVMAppendBasicBlockInContext(lc, v_deform_fn, "entry");
-	b_adjust_unavail_cols =
-		LLVMAppendBasicBlockInContext(lc, v_deform_fn, "adjust_unavail_cols");
 	b_find_start =
 		LLVMAppendBasicBlockInContext(lc, v_deform_fn, "find_startblock");
-	b_out =
-		LLVMAppendBasicBlockInContext(lc, v_deform_fn, "outblock");
-	b_dead =
-		LLVMAppendBasicBlockInContext(lc, v_deform_fn, "deadblock");
 
 	b = LLVMCreateBuilderInContext(lc);
 
@@ -314,6 +308,10 @@ slot_compile_deform(LLVMJitContext *context, TupleDesc desc,
 			l_bb_append_v(v_deform_fn, "block.attr.%d.store", attnum);
 	}
 
+	/* create the exit and dead blocks at the end, so that even with O0 they will be at the end */
+	b_out = LLVMAppendBasicBlockInContext(lc, v_deform_fn, "outblock");
+	b_dead = LLVMAppendBasicBlockInContext(lc, v_deform_fn, "deadblock");
+
 	/*
 	 * Check if it is guaranteed that all the desired attributes are available
 	 * in the tuple (but still possibly NULL), by dint of either the last
@@ -325,8 +323,6 @@ slot_compile_deform(LLVMJitContext *context, TupleDesc desc,
 	if ((natts - 1) <= guaranteed_column_number)
 	{
 		/* just skip through unnecessary blocks */
-		LLVMBuildBr(b, b_adjust_unavail_cols);
-		LLVMPositionBuilderAtEnd(b, b_adjust_unavail_cols);
 		LLVMBuildBr(b, b_find_start);
 	}
 	else
@@ -334,6 +330,9 @@ slot_compile_deform(LLVMJitContext *context, TupleDesc desc,
 		LLVMValueRef v_params[3];
 		LLVMValueRef f;
 
+		/* create the block since it is now needed */
+		b_adjust_unavail_cols = LLVMAppendBasicBlockInContext(lc, v_deform_fn, "adjust_unavail_cols");
+
 		/* branch if not all columns available */
 		LLVMBuildCondBr(b,
 						LLVMBuildICmp(b, LLVMIntULT,
@@ -399,6 +398,8 @@ slot_compile_deform(LLVMJitContext *context, TupleDesc desc,
 		LLVMValueRef l_attno = l_int16_const(lc, attnum);
 		LLVMValueRef v_attdatap;
 		LLVMValueRef v_resultp;
+		bool		delayed_jump_in_nonnullable;
+		bool		delayed_jump_in_attcheckno;
 
 		/* build block checking whether we did all the necessary attributes */
 		LLVMPositionBuilderAtEnd(b, attcheckattnoblocks[attnum]);
@@ -419,7 +420,7 @@ slot_compile_deform(LLVMJitContext *context, TupleDesc desc,
 		 */
 		if (attnum <= guaranteed_column_number)
 		{
-			LLVMBuildBr(b, attstartblocks[attnum]);
+			delayed_jump_in_attcheckno = true;
 		}
 		else
 		{
@@ -430,6 +431,7 @@ slot_compile_deform(LLVMJitContext *context, TupleDesc desc,
 									 v_maxatt,
 									 "heap_natts");
 			LLVMBuildCondBr(b, v_islast, b_out, attstartblocks[attnum]);
+			delayed_jump_in_attcheckno = false;
 		}
 		LLVMPositionBuilderAtEnd(b, attstartblocks[attnum]);
 
@@ -484,13 +486,19 @@ slot_compile_deform(LLVMJitContext *context, TupleDesc desc,
 
 			LLVMBuildBr(b, b_next);
 			attguaranteedalign = false;
+			delayed_jump_in_nonnullable = false;
+			/* add the jump to our attisnull block in start */
+			if (delayed_jump_in_attcheckno)
+			{
+				LLVMPositionBuilderAtEnd(b, attcheckattnoblocks[attnum]);
+				LLVMBuildBr(b, attstartblocks[attnum]);
+				delayed_jump_in_attcheckno = false;
+			}
 		}
 		else
 		{
 			/* nothing to do */
-			LLVMBuildBr(b, attcheckalignblocks[attnum]);
-			LLVMPositionBuilderAtEnd(b, attisnullblocks[attnum]);
-			LLVMBuildBr(b, attcheckalignblocks[attnum]);
+			delayed_jump_in_nonnullable = true;
 		}
 		LLVMPositionBuilderAtEnd(b, attcheckalignblocks[attnum]);
 
@@ -574,14 +582,40 @@ slot_compile_deform(LLVMJitContext *context, TupleDesc desc,
 			}
 
 			LLVMBuildBr(b, attstoreblocks[attnum]);
-			LLVMPositionBuilderAtEnd(b, attstoreblocks[attnum]);
+			if (delayed_jump_in_nonnullable)
+			{
+				LLVMPositionBuilderAtEnd(b, attstartblocks[attnum]);
+				LLVMBuildBr(b, attcheckalignblocks[attnum]);
+				LLVMPositionBuilderAtEnd(b, attisnullblocks[attnum]);
+				LLVMBuildBr(b, attcheckalignblocks[attnum]);
+			}
+			if (delayed_jump_in_attcheckno)
+			{
+				LLVMPositionBuilderAtEnd(b, attcheckattnoblocks[attnum]);
+				LLVMBuildBr(b, attcheckalignblocks[attnum]);
+				delayed_jump_in_attcheckno = false;
+			}
 		}
 		else
 		{
 			LLVMPositionBuilderAtEnd(b, attcheckalignblocks[attnum]);
-			LLVMBuildBr(b, attalignblocks[attnum]);
+			LLVMBuildBr(b, attstoreblocks[attnum]);
 			LLVMPositionBuilderAtEnd(b, attalignblocks[attnum]);
 			LLVMBuildBr(b, attstoreblocks[attnum]);
+			if (delayed_jump_in_nonnullable)
+			{
+				LLVMPositionBuilderAtEnd(b, attstartblocks[attnum]);
+				LLVMBuildBr(b, attstoreblocks[attnum]);
+				LLVMPositionBuilderAtEnd(b, attisnullblocks[attnum]);
+				LLVMBuildBr(b, attstoreblocks[attnum]);
+			}
+
+			if (delayed_jump_in_attcheckno)
+			{
+				LLVMPositionBuilderAtEnd(b, attcheckattnoblocks[attnum]);
+				LLVMBuildBr(b, attstoreblocks[attnum]);
+				delayed_jump_in_attcheckno = false;
+			}
 		}
 		LLVMPositionBuilderAtEnd(b, attstoreblocks[attnum]);
 
diff --git a/src/backend/jit/llvm/llvmjit_expr.c b/src/backend/jit/llvm/llvmjit_expr.c
index 885b34c27e4..c942e6f4557 100644
--- a/src/backend/jit/llvm/llvmjit_expr.c
+++ b/src/backend/jit/llvm/llvmjit_expr.c
@@ -1224,7 +1224,7 @@ llvm_compile_expr(ExprState *state)
 				{
 					LLVMValueRef v_resnull;
 					LLVMValueRef v_resvalue;
-					LLVMValueRef v_nullorfalse;
+					LLVMValueRef v_notnullnorfalse;
 					LLVMBasicBlockRef b_qualfail;
 
 					b_qualfail = l_bb_before_v(opblocks[opno + 1],
@@ -1233,18 +1233,18 @@ llvm_compile_expr(ExprState *state)
 					v_resvalue = l_load(b, TypeDatum, v_resvaluep, "");
 					v_resnull = l_load(b, TypeStorageBool, v_resnullp, "");
 
-					v_nullorfalse =
-						LLVMBuildOr(b,
-									LLVMBuildICmp(b, LLVMIntEQ, v_resnull,
-												  l_sbool_const(1), ""),
-									LLVMBuildICmp(b, LLVMIntEQ, v_resvalue,
-												  l_datum_const(0), ""),
-									"");
+					v_notnullnorfalse =
+						LLVMBuildAnd(b,
+									 LLVMBuildICmp(b, LLVMIntNE, v_resnull,
+												   l_sbool_const(1), ""),
+									 LLVMBuildICmp(b, LLVMIntNE, v_resvalue,
+												   l_datum_const(0), ""),
+									 "");
 
 					LLVMBuildCondBr(b,
-									v_nullorfalse,
-									b_qualfail,
-									opblocks[opno + 1]);
+									v_notnullnorfalse,
+									opblocks[opno + 1],
+									b_qualfail);
 
 					/* build block handling NULL or false */
 					LLVMPositionBuilderAtEnd(b, b_qualfail);
-- 
2.43.0

