From ddc08b652cd95bdd2bd10c4fb3fca3d292e2c625 Mon Sep 17 00:00:00 2001 From: Lukas Fittl Date: Sat, 7 Mar 2026 17:52:24 -0800 Subject: [PATCH v7 6/8] instrumentation: Optimize ExecProcNodeInstr instructions by inlining For most queries, the bulk of the overhead of EXPLAIN ANALYZE happens in ExecProcNodeInstr when starting/stopping instrumentation for that node. Previously each ExecProcNodeInstr would check which instrumentation options are active in the InstrStartNode/InstrStopNode calls, and do the corresponding work (timers, instrumentation stack, etc.). These conditionals being checked for each tuple being emitted add up, and cause non-optimal set of instructions to be generated by the compiler. Because we already have an existing mechanism to specify a function pointer when instrumentation is enabled, we can instead create specialized functions that are tailored to the instrumentation options enabled, and avoid conditionals on subsequent ExecProcNodeInstr calls. This results in the overhead for EXPLAIN (ANALYZE, TIMING OFF, BUFFERS OFF) for a stress test with a large COUNT(*) that does many ExecProcNode calls from ~ 20% on top of actual runtime to ~ 3%. When using BUFFERS ON the same query goes from ~ 20% to ~ 10% on top of actual runtime. Author: Lukas Fittl Reviewed-by: Discussion: --- src/backend/executor/execProcnode.c | 22 +-- src/backend/executor/instrument.c | 224 +++++++++++++++++++++------- src/include/executor/instrument.h | 5 + 3 files changed, 174 insertions(+), 77 deletions(-) diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index c788b5b00f9..6a74ca516ae 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -120,7 +120,6 @@ #include "nodes/nodeFuncs.h" static TupleTableSlot *ExecProcNodeFirst(PlanState *node); -static TupleTableSlot *ExecProcNodeInstr(PlanState *node); static bool ExecShutdownNode_walker(PlanState *node, void *context); static bool ExecRememberNodeInstrumentation_walker(PlanState *node, void *context); static bool ExecFinalizeNodeInstrumentation_walker(PlanState *node, void *context); @@ -464,7 +463,7 @@ ExecProcNodeFirst(PlanState *node) * have ExecProcNode() directly call the relevant function from now on. */ if (node->instrument) - node->ExecProcNode = ExecProcNodeInstr; + node->ExecProcNode = InstrNodeSetupExecProcNode(node->instrument); else node->ExecProcNode = node->ExecProcNodeReal; @@ -472,25 +471,6 @@ ExecProcNodeFirst(PlanState *node) } -/* - * ExecProcNode wrapper that performs instrumentation calls. By keeping - * this a separate function, we avoid overhead in the normal case where - * no instrumentation is wanted. - */ -static TupleTableSlot * -ExecProcNodeInstr(PlanState *node) -{ - TupleTableSlot *result; - - InstrStartNode(node->instrument); - - result = node->ExecProcNodeReal(node); - - InstrStopNode(node->instrument, TupIsNull(result) ? 0.0 : 1.0); - - return result; -} - /* ---------------------------------------------------------------- * MultiExecProcNode diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c index 1afa5e94960..602facec401 100644 --- a/src/backend/executor/instrument.c +++ b/src/backend/executor/instrument.c @@ -49,29 +49,20 @@ InstrInitOptions(Instrumentation *instr, int instrument_options) instr->need_timer = (instrument_options & INSTRUMENT_TIMER) != 0; } -void -InstrStart(Instrumentation *instr) +static inline void +InstrStartTimer(Instrumentation *instr) { - if (instr->need_timer) - { - if (!INSTR_TIME_IS_ZERO(instr->starttime)) - elog(ERROR, "InstrStart called twice in a row"); - else - INSTR_TIME_SET_CURRENT(instr->starttime); - } + Assert(INSTR_TIME_IS_ZERO(instr->starttime)); - if (instr->need_bufusage || instr->need_walusage) - InstrPushStack(instr); + INSTR_TIME_SET_CURRENT(instr->starttime); } -static void +static inline void InstrStopTimer(Instrumentation *instr) { instr_time endtime; - /* let's update the time only if the timer was requested */ - if (INSTR_TIME_IS_ZERO(instr->starttime)) - elog(ERROR, "InstrStop called without start"); + Assert(!INSTR_TIME_IS_ZERO(instr->starttime)); INSTR_TIME_SET_CURRENT(endtime); INSTR_TIME_ACCUM_DIFF(instr->total, endtime, instr->starttime); @@ -79,6 +70,16 @@ InstrStopTimer(Instrumentation *instr) INSTR_TIME_SET_ZERO(instr->starttime); } +void +InstrStart(Instrumentation *instr) +{ + if (instr->need_timer) + InstrStartTimer(instr); + + if (instr->need_bufusage || instr->need_walusage) + InstrPushStack(instr); +} + void InstrStop(Instrumentation *instr) { @@ -333,65 +334,57 @@ InstrInitNode(NodeInstrumentation *instr, int instrument_options) InstrInitOptions(&instr->instr, instrument_options); } -/* Entry to a plan node */ +/* Entry to a plan node. If you modify this, check InstrNodeSetupExecProcNode. */ void InstrStartNode(NodeInstrumentation *instr) { InstrStart(&instr->instr); } - -/* Exit from a plan node */ -void -InstrStopNode(NodeInstrumentation *instr, double nTuples) +/* + * Updates the node instrumentation time counter. + * + * Note this is different from InstrStop because total is only updated in + * InstrEndLoop. We need the separate counter variable because we need to + * calculate start-up time for the first tuple in each cycle, and then + * accumulate it together. + */ +static inline void +InstrStopNodeTimer(NodeInstrumentation *instr) { - double save_tuplecount = instr->tuplecount; instr_time endtime; - /* count the returned tuples */ - instr->tuplecount += nTuples; + Assert(!INSTR_TIME_IS_ZERO(instr->instr.starttime)); + + INSTR_TIME_SET_CURRENT(endtime); + INSTR_TIME_ACCUM_DIFF(instr->counter, endtime, instr->instr.starttime); + INSTR_TIME_SET_ZERO(instr->instr.starttime); /* - * Update the time only if the timer was requested. + * Is this the first tuple of this cycle? * - * Note this is different from InstrStop because total is only updated in - * InstrEndLoop. We need the separate counter variable because we need to - * calculate start-up time for the first tuple in each cycle, and then - * accumulate it together. + * In async mode, if the plan node hadn't emitted any tuples before, this + * might be the first tuple */ - if (instr->instr.need_timer) - { - if (INSTR_TIME_IS_ZERO(instr->instr.starttime)) - elog(ERROR, "InstrStopNode called without start"); - - INSTR_TIME_SET_CURRENT(endtime); - INSTR_TIME_ACCUM_DIFF(instr->counter, endtime, instr->instr.starttime); + if (!instr->running || (instr->async_mode && instr->tuplecount < 1.0)) + instr->firsttuple = instr->counter; +} - INSTR_TIME_SET_ZERO(instr->instr.starttime); - } +/* Exit from a plan node. If you modify this, check InstrNodeSetupExecProcNode. */ +void +InstrStopNode(NodeInstrumentation *instr, double nTuples) +{ + if (instr->instr.need_timer) + InstrStopNodeTimer(instr); - /* - * Only pop the stack, accumulation runs in - * ExecFinalizeNodeInstrumentation - */ + /* Only pop the stack, accumulation runs in InstrFinalizeNode */ if (instr->instr.need_bufusage || instr->instr.need_walusage) InstrPopStack(&instr->instr); - /* Is this the first tuple of this cycle? */ - if (!instr->running) - { - instr->running = true; - instr->firsttuple = instr->counter; - } - else - { - /* - * In async mode, if the plan node hadn't emitted any tuples before, - * this might be the first tuple - */ - if (instr->async_mode && save_tuplecount < 1.0) - instr->firsttuple = instr->counter; - } + instr->running = true; + + /* count the returned tuples */ + instr->tuplecount += nTuples; } /* Add per-node instrumentation to the parent and move into per-query memory context */ @@ -475,6 +468,125 @@ InstrAggNode(NodeInstrumentation *dst, NodeInstrumentation *add) WalUsageAdd(&dst->instr.walusage, &add->instr.walusage); } +/* + * Specialized handling of instrumented ExecProcNode + * + * These functions are equivalent to running ExecProcNodeReal wrapped in + * InstrStartNode and InstrStopNode, but avoid the conditionals in the hot path + * by checking the instrumentation options when the ExecProcNode pointer gets + * first set, and then using a special-purpose function for each. This results + * in a more optimized set of compiled instructions. + */ + +#include "executor/tuptable.h" +#include "nodes/execnodes.h" + +/* Simplified pop: restore saved state instead of re-deriving from array */ +static inline void +InstrPopStackTo(Instrumentation *prev) +{ + Assert(instr_stack.stack_size > 0); + instr_stack.stack_size--; + instr_stack.current = prev; +} + +static TupleTableSlot * +ExecProcNodeInstrFull(PlanState *node) +{ + NodeInstrumentation *instr = node->instrument; + Instrumentation *prev = instr_stack.current; + TupleTableSlot *result; + + InstrPushStack(&instr->instr); + InstrStartTimer(&instr->instr); + + result = node->ExecProcNodeReal(node); + + InstrStopNodeTimer(instr); + InstrPopStackTo(prev); + + instr->running = true; + if (!TupIsNull(result)) + instr->tuplecount += 1.0; + + return result; +} + +static TupleTableSlot * +ExecProcNodeInstrRowsBuffersWalOnly(PlanState *node) +{ + NodeInstrumentation *instr = node->instrument; + Instrumentation *prev = instr_stack.current; + TupleTableSlot *result; + + InstrPushStack(&instr->instr); + + result = node->ExecProcNodeReal(node); + + InstrPopStackTo(prev); + + instr->running = true; + if (!TupIsNull(result)) + instr->tuplecount += 1.0; + + return result; +} + +static TupleTableSlot * +ExecProcNodeInstrRowsTimerOnly(PlanState *node) +{ + NodeInstrumentation *instr = node->instrument; + TupleTableSlot *result; + + InstrStartTimer(&instr->instr); + + result = node->ExecProcNodeReal(node); + + InstrStopNodeTimer(instr); + + instr->running = true; + if (!TupIsNull(result)) + instr->tuplecount += 1.0; + + return result; +} + +static TupleTableSlot * +ExecProcNodeInstrRowsOnly(PlanState *node) +{ + NodeInstrumentation *instr = node->instrument; + TupleTableSlot *result; + + result = node->ExecProcNodeReal(node); + + instr->running = true; + if (!TupIsNull(result)) + instr->tuplecount += 1.0; + + return result; +} + +/* + * Returns an ExecProcNode wrapper that performs instrumentation calls, + * tailored to the instrumentation options enabled for the node. + */ +ExecProcNodeMtd +InstrNodeSetupExecProcNode(NodeInstrumentation *instr) +{ + bool need_timer = instr->instr.need_timer; + bool need_buf = (instr->instr.need_bufusage || + instr->instr.need_walusage); + + if (need_timer && need_buf) + return ExecProcNodeInstrFull; + else if (need_buf) + return ExecProcNodeInstrRowsBuffersWalOnly; + else if (need_timer) + return ExecProcNodeInstrRowsTimerOnly; + else + return ExecProcNodeInstrRowsOnly; +} + /* Trigger instrumentation handling */ TriggerInstrumentation * InstrAllocTrigger(int n, int instrument_options) diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h index cc33b32af1e..ac7f0d21c37 100644 --- a/src/include/executor/instrument.h +++ b/src/include/executor/instrument.h @@ -288,6 +288,11 @@ extern void InstrUpdateTupleCount(NodeInstrumentation *instr, double nTuples); extern void InstrEndLoop(NodeInstrumentation *instr); extern void InstrAggNode(NodeInstrumentation *dst, NodeInstrumentation *add); +typedef struct TupleTableSlot TupleTableSlot; +typedef struct PlanState PlanState; +typedef TupleTableSlot *(*ExecProcNodeMtd) (PlanState *pstate); +extern ExecProcNodeMtd InstrNodeSetupExecProcNode(NodeInstrumentation *instr); + extern TriggerInstrumentation *InstrAllocTrigger(int n, int instrument_options); extern void InstrStartTrigger(TriggerInstrumentation *tginstr); extern void InstrStopTrigger(TriggerInstrumentation *tginstr, int firings); -- 2.47.1