From 7546f855d138d0dac0d8c22ea5915314810f13e5 Mon Sep 17 00:00:00 2001 From: Lukas Fittl Date: Sat, 1 Mar 2025 19:31:30 -0800 Subject: [PATCH v3 2/7] Separate node instrumentation from other use of Instrumentation struct Previously different places (e.g. query "total time") were repurposing the Instrumentation struct initially introduced for capturing per-node statistics during execution. This dual use of the struct is confusing, e.g. by cluttering calls of InstrStartNode/InstrStopNode in unrelated code paths, and prevents future refactorings. Instead, simplify the Instrumentation struct to only track time, WAL/buffer usage, and tuple counts. Similarly, drop the use of InstrEndLoop outside of per-node instrumentation. Introduce the NodeInstrumentation struct to carry forward the per-node instrumentation information. --- contrib/auto_explain/auto_explain.c | 8 +- .../pg_stat_statements/pg_stat_statements.c | 8 +- contrib/postgres_fdw/postgres_fdw.c | 2 +- src/backend/commands/explain.c | 11 +-- src/backend/commands/trigger.c | 8 +- src/backend/executor/execMain.c | 10 +-- src/backend/executor/execParallel.c | 22 +++-- src/backend/executor/execProcnode.c | 4 +- src/backend/executor/instrument.c | 86 +++++++++++++++++-- src/include/executor/instrument.h | 51 ++++++++--- src/include/nodes/execnodes.h | 3 +- 11 files changed, 151 insertions(+), 62 deletions(-) diff --git a/contrib/auto_explain/auto_explain.c b/contrib/auto_explain/auto_explain.c index c10f2fc0f25..ee0c3b4c91b 100644 --- a/contrib/auto_explain/auto_explain.c +++ b/contrib/auto_explain/auto_explain.c @@ -315,7 +315,7 @@ explain_ExecutorStart(QueryDesc *queryDesc, int eflags) MemoryContext oldcxt; oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt); - queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL, false); + queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL); MemoryContextSwitchTo(oldcxt); } } @@ -381,12 +381,6 @@ explain_ExecutorEnd(QueryDesc *queryDesc) */ oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt); - /* - * Make sure stats accumulation is done. (Note: it's okay if several - * levels of hook all do this.) - */ - InstrEndLoop(queryDesc->totaltime); - /* Log plan if duration is exceeded. */ msec = INSTR_TIME_GET_MILLISEC(queryDesc->totaltime->total); if (msec >= auto_explain_log_min_duration) diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index fe987ceaf40..f43a33b3787 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -1023,7 +1023,7 @@ pgss_ExecutorStart(QueryDesc *queryDesc, int eflags) MemoryContext oldcxt; oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt); - queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL, false); + queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL); MemoryContextSwitchTo(oldcxt); } } @@ -1082,12 +1082,6 @@ pgss_ExecutorEnd(QueryDesc *queryDesc) if (queryId != INT64CONST(0) && queryDesc->totaltime && pgss_enabled(nesting_level)) { - /* - * Make sure stats accumulation is done. (Note: it's okay if several - * levels of hook all do this.) - */ - InstrEndLoop(queryDesc->totaltime); - pgss_store(queryDesc->sourceText, queryId, queryDesc->plannedstmt->stmt_location, diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 456b267f70b..7619ac486c0 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -2778,7 +2778,7 @@ postgresIterateDirectModify(ForeignScanState *node) if (!resultRelInfo->ri_projectReturning) { TupleTableSlot *slot = node->ss.ss_ScanTupleSlot; - Instrumentation *instr = node->ss.ps.instrument; + NodeInstrumentation *instr = node->ss.ps.instrument; Assert(!dmstate->has_returning); diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 46c5bf252fc..de66e48366d 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -1103,9 +1103,6 @@ report_triggers(ResultRelInfo *rInfo, bool show_relname, ExplainState *es) char *relname; char *conname = NULL; - /* Must clean up instrumentation state */ - InstrEndLoop(instr); - /* * We ignore triggers that were never invoked; they likely aren't * relevant to the current query type. @@ -1136,7 +1133,7 @@ report_triggers(ResultRelInfo *rInfo, bool show_relname, ExplainState *es) appendStringInfo(es->str, " on %s", relname); if (es->timing) appendStringInfo(es->str, ": time=%.3f calls=%.0f\n", - 1000.0 * instr->total, instr->ntuples); + 1000.0 * INSTR_TIME_GET_DOUBLE(instr->total), instr->ntuples); else appendStringInfo(es->str, ": calls=%.0f\n", instr->ntuples); } @@ -1147,7 +1144,7 @@ report_triggers(ResultRelInfo *rInfo, bool show_relname, ExplainState *es) ExplainPropertyText("Constraint Name", conname, es); ExplainPropertyText("Relation", relname, es); if (es->timing) - ExplainPropertyFloat("Time", "ms", 1000.0 * instr->total, 3, + ExplainPropertyFloat("Time", "ms", 1000.0 * INSTR_TIME_GET_DOUBLE(instr->total), 3, es); ExplainPropertyFloat("Calls", NULL, instr->ntuples, 0, es); } @@ -1893,7 +1890,7 @@ ExplainNode(PlanState *planstate, List *ancestors, for (int n = 0; n < w->num_workers; n++) { - Instrumentation *instrument = &w->instrument[n]; + NodeInstrumentation *instrument = &w->instrument[n]; double nloops = instrument->nloops; double startup_ms; double total_ms; @@ -2300,7 +2297,7 @@ ExplainNode(PlanState *planstate, List *ancestors, for (int n = 0; n < w->num_workers; n++) { - Instrumentation *instrument = &w->instrument[n]; + NodeInstrumentation *instrument = &w->instrument[n]; double nloops = instrument->nloops; if (nloops <= 0) diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 579ac8d76ae..9b53dd99e99 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -2344,7 +2344,7 @@ ExecCallTriggerFunc(TriggerData *trigdata, * If doing EXPLAIN ANALYZE, start charging time to this trigger. */ if (instr) - InstrStartNode(instr + tgindx); + InstrStart(instr + tgindx); /* * Do the function evaluation in the per-tuple memory context, so that @@ -2392,7 +2392,7 @@ ExecCallTriggerFunc(TriggerData *trigdata, * one "tuple returned" (really the number of firings). */ if (instr) - InstrStopNode(instr + tgindx, 1); + InstrStop(instr + tgindx, 1); return (HeapTuple) DatumGetPointer(result); } @@ -4381,7 +4381,7 @@ AfterTriggerExecute(EState *estate, * to include time spent re-fetching tuples in the trigger cost. */ if (instr) - InstrStartNode(instr + tgindx); + InstrStart(instr + tgindx); /* * Fetch the required tuple(s). @@ -4607,7 +4607,7 @@ AfterTriggerExecute(EState *estate, * one "tuple returned" (really the number of firings). */ if (instr) - InstrStopNode(instr + tgindx, 1); + InstrStop(instr + tgindx, 1); } diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 713e926329c..9bc7b4e20f7 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -331,7 +331,7 @@ standard_ExecutorRun(QueryDesc *queryDesc, /* Allow instrumentation of Executor overall runtime */ if (queryDesc->totaltime) - InstrStartNode(queryDesc->totaltime); + InstrStart(queryDesc->totaltime); /* * extract information from the query descriptor and the query feature. @@ -383,7 +383,7 @@ standard_ExecutorRun(QueryDesc *queryDesc, dest->rShutdown(dest); if (queryDesc->totaltime) - InstrStopNode(queryDesc->totaltime, estate->es_processed); + InstrStop(queryDesc->totaltime, estate->es_processed); MemoryContextSwitchTo(oldcontext); } @@ -433,7 +433,7 @@ standard_ExecutorFinish(QueryDesc *queryDesc) /* Allow instrumentation of Executor overall runtime */ if (queryDesc->totaltime) - InstrStartNode(queryDesc->totaltime); + InstrStart(queryDesc->totaltime); /* Run ModifyTable nodes to completion */ ExecPostprocessPlan(estate); @@ -443,7 +443,7 @@ standard_ExecutorFinish(QueryDesc *queryDesc) AfterTriggerEndQuery(estate); if (queryDesc->totaltime) - InstrStopNode(queryDesc->totaltime, 0); + InstrStop(queryDesc->totaltime, 0); MemoryContextSwitchTo(oldcontext); @@ -1266,7 +1266,7 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo, resultRelInfo->ri_TrigWhenExprs = (ExprState **) palloc0(n * sizeof(ExprState *)); if (instrument_options) - resultRelInfo->ri_TrigInstrument = InstrAlloc(n, instrument_options, false); + resultRelInfo->ri_TrigInstrument = InstrAlloc(n, instrument_options); } else { diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index f098a5557cf..e87810d292e 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -85,7 +85,7 @@ typedef struct FixedParallelExecutorState * instrument_options: Same meaning here as in instrument.c. * * instrument_offset: Offset, relative to the start of this structure, - * of the first Instrumentation object. This will depend on the length of + * of the first NodeInstrumentation object. This will depend on the length of * the plan_node_id array. * * num_workers: Number of workers. @@ -102,11 +102,15 @@ struct SharedExecutorInstrumentation int num_workers; int num_plan_nodes; int plan_node_id[FLEXIBLE_ARRAY_MEMBER]; - /* array of num_plan_nodes * num_workers Instrumentation objects follows */ + + /* + * array of num_plan_nodes * num_workers NodeInstrumentation objects + * follows + */ }; #define GetInstrumentationArray(sei) \ (AssertVariableIsOfTypeMacro(sei, SharedExecutorInstrumentation *), \ - (Instrumentation *) (((char *) sei) + sei->instrument_offset)) + (NodeInstrumentation *) (((char *) sei) + sei->instrument_offset)) /* Context object for ExecParallelEstimate. */ typedef struct ExecParallelEstimateContext @@ -713,7 +717,7 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, instrumentation_len = MAXALIGN(instrumentation_len); instrument_offset = instrumentation_len; instrumentation_len += - mul_size(sizeof(Instrumentation), + mul_size(sizeof(NodeInstrumentation), mul_size(e.nnodes, nworkers)); shm_toc_estimate_chunk(&pcxt->estimator, instrumentation_len); shm_toc_estimate_keys(&pcxt->estimator, 1); @@ -799,7 +803,7 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, */ if (estate->es_instrument) { - Instrumentation *instrument; + NodeInstrumentation *instrument; int i; instrumentation = shm_toc_allocate(pcxt->toc, instrumentation_len); @@ -809,7 +813,7 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, instrumentation->num_plan_nodes = e.nnodes; instrument = GetInstrumentationArray(instrumentation); for (i = 0; i < nworkers * e.nnodes; ++i) - InstrInit(&instrument[i], estate->es_instrument); + InstrInitNode(&instrument[i], estate->es_instrument); shm_toc_insert(pcxt->toc, PARALLEL_KEY_INSTRUMENTATION, instrumentation); pei->instrumentation = instrumentation; @@ -1036,7 +1040,7 @@ static bool ExecParallelRetrieveInstrumentation(PlanState *planstate, SharedExecutorInstrumentation *instrumentation) { - Instrumentation *instrument; + NodeInstrumentation *instrument; int i; int n; int ibytes; @@ -1064,7 +1068,7 @@ ExecParallelRetrieveInstrumentation(PlanState *planstate, * Switch into per-query memory context. */ oldcontext = MemoryContextSwitchTo(planstate->state->es_query_cxt); - ibytes = mul_size(instrumentation->num_workers, sizeof(Instrumentation)); + ibytes = mul_size(instrumentation->num_workers, sizeof(NodeInstrumentation)); planstate->worker_instrument = palloc(ibytes + offsetof(WorkerInstrumentation, instrument)); MemoryContextSwitchTo(oldcontext); @@ -1296,7 +1300,7 @@ ExecParallelReportInstrumentation(PlanState *planstate, { int i; int plan_node_id = planstate->plan->plan_node_id; - Instrumentation *instrument; + NodeInstrumentation *instrument; InstrEndLoop(planstate->instrument); diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index f5f9cfbeead..d286471254b 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -413,8 +413,8 @@ ExecInitNode(Plan *node, EState *estate, int eflags) /* Set up instrumentation for this node if requested */ if (estate->es_instrument) - result->instrument = InstrAlloc(1, estate->es_instrument, - result->async_capable); + result->instrument = InstrAllocNode(1, estate->es_instrument, + result->async_capable); return result; } diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c index 1c92abe6761..1fe0f4204e5 100644 --- a/src/backend/executor/instrument.c +++ b/src/backend/executor/instrument.c @@ -26,9 +26,9 @@ static void BufferUsageAdd(BufferUsage *dst, const BufferUsage *add); static void WalUsageAdd(WalUsage *dst, WalUsage *add); -/* Allocate new instrumentation structure(s) */ +/* General purpose instrumentation handling */ Instrumentation * -InstrAlloc(int n, int instrument_options, bool async_mode) +InstrAlloc(int n, int instrument_options) { Instrumentation *instr; @@ -41,6 +41,74 @@ InstrAlloc(int n, int instrument_options, bool async_mode) bool need_timer = (instrument_options & INSTRUMENT_TIMER) != 0; int i; + for (i = 0; i < n; i++) + { + instr[i].need_bufusage = need_buffers; + instr[i].need_walusage = need_wal; + instr[i].need_timer = need_timer; + } + } + + return instr; +} +void +InstrStart(Instrumentation *instr) +{ + if (instr->need_timer && + !INSTR_TIME_SET_CURRENT_LAZY(instr->starttime)) + elog(ERROR, "InstrStart called twice in a row"); + + if (instr->need_bufusage) + instr->bufusage_start = pgBufferUsage; + + if (instr->need_walusage) + instr->walusage_start = pgWalUsage; +} +void +InstrStop(Instrumentation *instr, double nTuples) +{ + instr_time endtime; + + /* count the specified tuples */ + instr->ntuples += nTuples; + + /* let's update the time only if the timer was requested */ + if (instr->need_timer) + { + if (INSTR_TIME_IS_ZERO(instr->starttime)) + elog(ERROR, "InstrStop called without start"); + + INSTR_TIME_SET_CURRENT(endtime); + INSTR_TIME_ACCUM_DIFF(instr->total, endtime, instr->starttime); + + INSTR_TIME_SET_ZERO(instr->starttime); + } + + /* Add delta of buffer usage since entry to node's totals */ + if (instr->need_bufusage) + BufferUsageAccumDiff(&instr->bufusage, + &pgBufferUsage, &instr->bufusage_start); + + if (instr->need_walusage) + WalUsageAccumDiff(&instr->walusage, + &pgWalUsage, &instr->walusage_start); +} + +/* Allocate new node instrumentation structure(s) */ +NodeInstrumentation * +InstrAllocNode(int n, int instrument_options, bool async_mode) +{ + NodeInstrumentation *instr; + + /* initialize all fields to zeroes, then modify as needed */ + instr = palloc0(n * sizeof(NodeInstrumentation)); + if (instrument_options & (INSTRUMENT_BUFFERS | INSTRUMENT_TIMER | INSTRUMENT_WAL)) + { + bool need_buffers = (instrument_options & INSTRUMENT_BUFFERS) != 0; + bool need_wal = (instrument_options & INSTRUMENT_WAL) != 0; + bool need_timer = (instrument_options & INSTRUMENT_TIMER) != 0; + int i; + for (i = 0; i < n; i++) { instr[i].need_bufusage = need_buffers; @@ -55,9 +123,9 @@ InstrAlloc(int n, int instrument_options, bool async_mode) /* Initialize a pre-allocated instrumentation structure. */ void -InstrInit(Instrumentation *instr, int instrument_options) +InstrInitNode(NodeInstrumentation * instr, int instrument_options) { - memset(instr, 0, sizeof(Instrumentation)); + memset(instr, 0, sizeof(NodeInstrumentation)); instr->need_bufusage = (instrument_options & INSTRUMENT_BUFFERS) != 0; instr->need_walusage = (instrument_options & INSTRUMENT_WAL) != 0; instr->need_timer = (instrument_options & INSTRUMENT_TIMER) != 0; @@ -65,7 +133,7 @@ InstrInit(Instrumentation *instr, int instrument_options) /* Entry to a plan node */ void -InstrStartNode(Instrumentation *instr) +InstrStartNode(NodeInstrumentation * instr) { if (instr->need_timer && !INSTR_TIME_SET_CURRENT_LAZY(instr->starttime)) @@ -81,7 +149,7 @@ InstrStartNode(Instrumentation *instr) /* Exit from a plan node */ void -InstrStopNode(Instrumentation *instr, double nTuples) +InstrStopNode(NodeInstrumentation * instr, double nTuples) { double save_tuplecount = instr->tuplecount; instr_time endtime; @@ -129,7 +197,7 @@ InstrStopNode(Instrumentation *instr, double nTuples) /* Update tuple count */ void -InstrUpdateTupleCount(Instrumentation *instr, double nTuples) +InstrUpdateTupleCount(NodeInstrumentation * instr, double nTuples) { /* count the returned tuples */ instr->tuplecount += nTuples; @@ -137,7 +205,7 @@ InstrUpdateTupleCount(Instrumentation *instr, double nTuples) /* Finish a run cycle for a plan node */ void -InstrEndLoop(Instrumentation *instr) +InstrEndLoop(NodeInstrumentation * instr) { /* Skip if nothing has happened, or already shut down */ if (!instr->running) @@ -162,7 +230,7 @@ InstrEndLoop(Instrumentation *instr) /* aggregate instrumentation information */ void -InstrAggNode(Instrumentation *dst, Instrumentation *add) +InstrAggNode(NodeInstrumentation * dst, NodeInstrumentation * add) { if (!dst->running && add->running) { diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h index ba5c986907e..1ae533f6704 100644 --- a/src/include/executor/instrument.h +++ b/src/include/executor/instrument.h @@ -66,7 +66,33 @@ typedef enum InstrumentOption INSTRUMENT_ALL = PG_INT32_MAX } InstrumentOption; +/* + * General purpose instrumentation that can capture time, WAL/buffer usage and tuples + * + * Initialized through InstrAlloc, followed by one or more calls to a pair of + * InstrStart/InstrStop (activity is measured inbetween). + */ typedef struct Instrumentation +{ + /* Parameters set at creation: */ + bool need_timer; /* true if we need timer data */ + bool need_bufusage; /* true if we need buffer usage data */ + bool need_walusage; /* true if we need WAL usage data */ + /* Internal state keeping: */ + instr_time starttime; /* start time of last InstrStart */ + BufferUsage bufusage_start; /* buffer usage at start */ + WalUsage walusage_start; /* WAL usage at start */ + /* Accumulated statistics: */ + instr_time total; /* total runtime */ + double ntuples; /* total tuples counted in InstrStop */ + BufferUsage bufusage; /* total buffer usage */ + WalUsage walusage; /* total WAL usage */ +} Instrumentation; + +/* + * Specialized instrumentation for per-node execution statistics + */ +typedef struct NodeInstrumentation { /* Parameters set at node creation: */ bool need_timer; /* true if we need timer data */ @@ -91,25 +117,30 @@ typedef struct Instrumentation double nfiltered2; /* # of tuples removed by "other" quals */ BufferUsage bufusage; /* total buffer usage */ WalUsage walusage; /* total WAL usage */ -} Instrumentation; +} NodeInstrumentation; typedef struct WorkerInstrumentation { int num_workers; /* # of structures that follow */ - Instrumentation instrument[FLEXIBLE_ARRAY_MEMBER]; + NodeInstrumentation instrument[FLEXIBLE_ARRAY_MEMBER]; } WorkerInstrumentation; extern PGDLLIMPORT BufferUsage pgBufferUsage; extern PGDLLIMPORT WalUsage pgWalUsage; -extern Instrumentation *InstrAlloc(int n, int instrument_options, - bool async_mode); -extern void InstrInit(Instrumentation *instr, int instrument_options); -extern void InstrStartNode(Instrumentation *instr); -extern void InstrStopNode(Instrumentation *instr, double nTuples); -extern void InstrUpdateTupleCount(Instrumentation *instr, double nTuples); -extern void InstrEndLoop(Instrumentation *instr); -extern void InstrAggNode(Instrumentation *dst, Instrumentation *add); +extern Instrumentation *InstrAlloc(int n, int instrument_options); +extern void InstrStart(Instrumentation *instr); +extern void InstrStop(Instrumentation *instr, double nTuples); + +extern NodeInstrumentation * InstrAllocNode(int n, int instrument_options, + bool async_mode); +extern void InstrInitNode(NodeInstrumentation * instr, int instrument_options); +extern void InstrStartNode(NodeInstrumentation * instr); +extern void InstrStopNode(NodeInstrumentation * instr, double nTuples); +extern void InstrUpdateTupleCount(NodeInstrumentation * instr, double nTuples); +extern void InstrEndLoop(NodeInstrumentation * instr); +extern void InstrAggNode(NodeInstrumentation * dst, NodeInstrumentation * add); + extern void InstrStartParallelQuery(void); extern void InstrEndParallelQuery(BufferUsage *bufusage, WalUsage *walusage); extern void InstrAccumParallelQuery(BufferUsage *bufusage, WalUsage *walusage); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index a36653c37f9..eb0b8f835c2 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1166,7 +1166,8 @@ typedef struct PlanState ExecProcNodeMtd ExecProcNodeReal; /* actual function, if above is a * wrapper */ - Instrumentation *instrument; /* Optional runtime stats for this node */ + NodeInstrumentation *instrument; /* Optional runtime stats for this + * node */ WorkerInstrumentation *worker_instrument; /* per-worker instrumentation */ /* Per-worker JIT instrumentation */ -- 2.47.1