Make TID Scans recalculate the TIDs less often

Started by David Rowley4 months ago6 messages
#1David Rowley
dgrowleyml@gmail.com
1 attachment(s)

Over on [1]/messages/by-id/4a6268ff-3340-453a-9bf5-c98d51a6f729@app.fastmail.com, there was some concern about having to recalculate the
TID Scan's TidList on every recheck call. This work entails
evaluating all the TID expressions, sorting the resulting list of TIDs
and deduplicating it (so that we don't scan the same TID twice).

As it turns out, it certainly is possible that doing that work could
take up quite a bit of time, and having to do it as often as every
rescan *could* be noticed *if* the list is big enough and the rescans
are frequent enough. The following case demonstrates this:

set max_parallel_Workers_per_gather=0;
set enable_seqscan=0;
set enable_material=0;
set jit=0;
select sum(c) from million m left join lateral (select count(*) c from
empty where ctid in
('(1,1)','(1,2)','(1,3)','(1,4)','(1,5)','(1,6)','(1,7)','(1,8)','(1,9)','(1,10)','(1,11)','(1,12)','(1,13)','(1,14)','(1,15)','(1,16)','(1,17)','(1,18)','(1,19)','(1,20)','(1,21)','(1,22)','(1,23)','(1,24)','(1,25)','(1,26)','(1,27)','(1,28)','(1,29)','(1,30)','(1,31)','(1,32)','(1,33)','(1,34)','(1,35)','(1,36)','(1,37)','(1,38)','(1,39)','(1,40)','(1,41)','(1,42)','(1,43)','(1,44)','(1,45)','(1,46)','(1,47)','(1,48)','(1,49)','(1,50)','(1,51)','(1,52)','(1,53)','(1,54)','(1,55)','(1,56)','(1,57)','(1,58)','(1,59)','(1,60)','(1,61)','(1,62)','(1,63)','(1,64)','(1,65)','(1,66)','(1,67)','(1,68)','(1,69)','(1,70)','(1,71)','(1,72)','(1,73)','(1,74)','(1,75)','(1,76)','(1,77)','(1,78)','(1,79)','(1,80)','(1,81)','(1,82)','(1,83)','(1,84)','(1,85)','(1,86)','(1,87)','(1,88)','(1,89)','(1,90)','(1,91)','(1,92)','(1,93)','(1,94)','(1,95)','(1,96)','(1,97)','(1,98)','(1,99)','(1,100)'))
on 1=1;

master:

Time: 613.541 ms
Time: 621.037 ms
Time: 623.430 ms

patched:

Time: 298.863 ms
Time: 298.015 ms
Time: 297.172 ms

The part I don't know is if it's at all likely that someone would ever
hit this. We've added TID scan, so filtering on ctid must be common
enough to warrant having that code (yes, I know it's required for
WHERE CURRENT OF too), I just don't know how common rescans are in
those queries.

The patch optimises the recalc by changing things so the recalc is
only done when a parameter has changed that's mentioned somewhere in
TID quals. If no such parameter has changed, we use the same list as
we did on the last scan.

Does anyone think this is worth pursuing further?

Patch attached.

David

[1]: /messages/by-id/4a6268ff-3340-453a-9bf5-c98d51a6f729@app.fastmail.com

Attachments:

v1-0001-Reduce-rescan-overheads-in-TID-Range-Scan.patchapplication/octet-stream; name=v1-0001-Reduce-rescan-overheads-in-TID-Range-Scan.patchDownload
From 4713fd08586baaf3143fa714933b53d6e62aa937 Mon Sep 17 00:00:00 2001
From: David Rowley <dgrowley@gmail.com>
Date: Mon, 15 Sep 2025 21:59:36 +1200
Subject: [PATCH v1] Reduce rescan overheads in TID [Range] Scan

Until how the TIDs to scan have been recalculated after every rescan of
both TID Scan and TID Range Scan.  Here we adjust things so that the
calculated list of TIDs or calculated range of TIDs is only marked as
needing recalculated whenever a parameter has changed that might require
a recalculation.
---
 src/backend/executor/nodeTidrangescan.c | 59 ++++++++++++++++++-------
 src/backend/executor/nodeTidscan.c      | 17 +++++--
 src/backend/optimizer/plan/createplan.c | 25 ++++++++---
 src/include/nodes/execnodes.h           |  4 ++
 src/include/nodes/plannodes.h           |  4 ++
 5 files changed, 84 insertions(+), 25 deletions(-)

diff --git a/src/backend/executor/nodeTidrangescan.c b/src/backend/executor/nodeTidrangescan.c
index 1bce8d6cbfe..39239c6692b 100644
--- a/src/backend/executor/nodeTidrangescan.c
+++ b/src/backend/executor/nodeTidrangescan.c
@@ -128,14 +128,17 @@ TidExprListCreate(TidRangeScanState *tidrangestate)
  *		TidRangeEval
  *
  *		Compute and set node's block and offset range to scan by evaluating
- *		node->trss_tidexprs.  Returns false if we detect the range cannot
- *		contain any tuples.  Returns true if it's possible for the range to
- *		contain tuples.  We don't bother validating that trss_mintid is less
- *		than or equal to trss_maxtid, as the scan_set_tidrange() table AM
- *		function will handle that.
+ *		node->trss_tidexprs.  Sets node's trss_rangeIsEmpty to true if the
+ *		calculated range must be empty of any tuples, otherwise sets
+ *		trss_rangeIsEmpty to false and sets trss_mintid and trss_maxtid to the
+ *		calculated range.
+ *
+ *		We don't bother validating that trss_mintid is less than or equal to
+ *		trss_maxtid, as the scan_set_tidrange() table AM function will handle
+ *		that.
  * ----------------------------------------------------------------
  */
-static bool
+static void
 TidRangeEval(TidRangeScanState *node)
 {
 	ExprContext *econtext = node->ss.ps.ps_ExprContext;
@@ -165,7 +168,10 @@ TidRangeEval(TidRangeScanState *node)
 
 		/* If the bound is NULL, *nothing* matches the qual. */
 		if (isNull)
-			return false;
+		{
+			node->trss_rangeIsEmpty = true;
+			return;
+		}
 
 		if (tidopexpr->exprtype == TIDEXPR_LOWER_BOUND)
 		{
@@ -207,7 +213,7 @@ TidRangeEval(TidRangeScanState *node)
 	ItemPointerCopy(&lowerBound, &node->trss_mintid);
 	ItemPointerCopy(&upperBound, &node->trss_maxtid);
 
-	return true;
+	node->trss_rangeIsEmpty = false;
 }
 
 /* ----------------------------------------------------------------
@@ -234,12 +240,19 @@ TidRangeNext(TidRangeScanState *node)
 	slot = node->ss.ss_ScanTupleSlot;
 	direction = estate->es_direction;
 
-	if (!node->trss_inScan)
+	/* First time through, compute TID range to scan */
+	if (!node->trss_rangeCalcDone)
 	{
-		/* First time through, compute TID range to scan */
-		if (!TidRangeEval(node))
-			return NULL;
+		TidRangeEval(node);
+		node->trss_rangeCalcDone = true;
+	}
+
+	/* Check if the range was detected not to contain any tuples */
+	if (node->trss_rangeIsEmpty)
+		return NULL;
 
+	if (!node->trss_inScan)
+	{
 		if (scandesc == NULL)
 		{
 			scandesc = table_beginscan_tidrange(node->ss.ss_currentRelation,
@@ -274,13 +287,18 @@ TidRangeNext(TidRangeScanState *node)
 static bool
 TidRangeRecheck(TidRangeScanState *node, TupleTableSlot *slot)
 {
-	if (!TidRangeEval(node))
-		return false;
+	/* First call? Compute the TID Range */
+	if (!node->trss_rangeCalcDone)
+	{
+		TidRangeEval(node);
+		node->trss_rangeCalcDone = true;
+	}
 
 	Assert(ItemPointerIsValid(&slot->tts_tid));
 
 	/* Recheck the ctid is still within range */
-	if (ItemPointerCompare(&slot->tts_tid, &node->trss_mintid) < 0 ||
+	if (node->trss_rangeIsEmpty ||
+		ItemPointerCompare(&slot->tts_tid, &node->trss_mintid) < 0 ||
 		ItemPointerCompare(&slot->tts_tid, &node->trss_maxtid) > 0)
 		return false;
 
@@ -322,6 +340,13 @@ ExecReScanTidRangeScan(TidRangeScanState *node)
 	/* mark scan as not in progress, and tid range list as not computed yet */
 	node->trss_inScan = false;
 
+	/*
+	 * Set the Tid Range to be recalculated when any parameters that might
+	 * effect the calculated range have changed.
+	 */
+	if (bms_overlap(node->ss.ps.chgParam, ((TidRangeScan *) node->ss.ps.plan)->tidparamids))
+		node->trss_rangeCalcDone = false;
+
 	/*
 	 * We must wait until TidRangeNext before calling table_rescan_tidrange.
 	 */
@@ -380,6 +405,10 @@ ExecInitTidRangeScan(TidRangeScan *node, EState *estate, int eflags)
 	 * mark scan as not in progress, and TID range as not computed yet
 	 */
 	tidrangestate->trss_inScan = false;
+	tidrangestate->trss_rangeCalcDone = false;
+
+	/* This will be calculated correctly in TidRangeEval() */
+	tidrangestate->trss_rangeIsEmpty = true;
 
 	/*
 	 * open the scan relation
diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c
index d50c6600358..b40a31e0d2f 100644
--- a/src/backend/executor/nodeTidscan.c
+++ b/src/backend/executor/nodeTidscan.c
@@ -457,10 +457,19 @@ ExecTidScan(PlanState *pstate)
 void
 ExecReScanTidScan(TidScanState *node)
 {
-	if (node->tss_TidList)
-		pfree(node->tss_TidList);
-	node->tss_TidList = NULL;
-	node->tss_NumTids = 0;
+	/*
+	 * Set the TidList to be recalculated when any parameters that might
+	 * effect the computed list has changed.
+	 */
+	if (bms_overlap(node->ss.ps.chgParam,
+					 ((TidScan *) node->ss.ps.plan)->tidparamids))
+	{
+		if (node->tss_TidList)
+			pfree(node->tss_TidList);
+		node->tss_TidList = NULL;
+		node->tss_NumTids = 0;
+	}
+
 	node->tss_TidPtr = -1;
 
 	/* not really necessary, but seems good form */
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 6791cbeb416..87e2597ee8c 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -201,9 +201,10 @@ static BitmapHeapScan *make_bitmap_heapscan(List *qptlist,
 											List *bitmapqualorig,
 											Index scanrelid);
 static TidScan *make_tidscan(List *qptlist, List *qpqual, Index scanrelid,
-							 List *tidquals);
+							 List *tidquals, Bitmapset *paramids);
 static TidRangeScan *make_tidrangescan(List *qptlist, List *qpqual,
-									   Index scanrelid, List *tidrangequals);
+									   Index scanrelid, List *tidrangequals,
+									   Bitmapset *paramids);
 static SubqueryScan *make_subqueryscan(List *qptlist,
 									   List *qpqual,
 									   Index scanrelid,
@@ -3384,6 +3385,7 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path,
 	TidScan    *scan_plan;
 	Index		scan_relid = best_path->path.parent->relid;
 	List	   *tidquals = best_path->tidquals;
+	Bitmapset  *paramids;
 
 	/* it should be a base rel... */
 	Assert(scan_relid > 0);
@@ -3459,10 +3461,13 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path,
 			replace_nestloop_params(root, (Node *) scan_clauses);
 	}
 
+	paramids = pull_paramids((Expr *) tidquals);
+
 	scan_plan = make_tidscan(tlist,
 							 scan_clauses,
 							 scan_relid,
-							 tidquals);
+							 tidquals,
+							 paramids);
 
 	copy_generic_path_info(&scan_plan->scan.plan, &best_path->path);
 
@@ -3481,6 +3486,7 @@ create_tidrangescan_plan(PlannerInfo *root, TidRangePath *best_path,
 	TidRangeScan *scan_plan;
 	Index		scan_relid = best_path->path.parent->relid;
 	List	   *tidrangequals = best_path->tidrangequals;
+	Bitmapset  *paramids;
 
 	/* it should be a base rel... */
 	Assert(scan_relid > 0);
@@ -3524,10 +3530,13 @@ create_tidrangescan_plan(PlannerInfo *root, TidRangePath *best_path,
 			replace_nestloop_params(root, (Node *) scan_clauses);
 	}
 
+	paramids = pull_paramids((Expr *) tidrangequals);
+
 	scan_plan = make_tidrangescan(tlist,
 								  scan_clauses,
 								  scan_relid,
-								  tidrangequals);
+								  tidrangequals,
+								  paramids);
 
 	copy_generic_path_info(&scan_plan->scan.plan, &best_path->path);
 
@@ -5622,7 +5631,8 @@ static TidScan *
 make_tidscan(List *qptlist,
 			 List *qpqual,
 			 Index scanrelid,
-			 List *tidquals)
+			 List *tidquals,
+			 Bitmapset *paramids)
 {
 	TidScan    *node = makeNode(TidScan);
 	Plan	   *plan = &node->scan.plan;
@@ -5633,6 +5643,7 @@ make_tidscan(List *qptlist,
 	plan->righttree = NULL;
 	node->scan.scanrelid = scanrelid;
 	node->tidquals = tidquals;
+	node->tidparamids = paramids;
 
 	return node;
 }
@@ -5641,7 +5652,8 @@ static TidRangeScan *
 make_tidrangescan(List *qptlist,
 				  List *qpqual,
 				  Index scanrelid,
-				  List *tidrangequals)
+				  List *tidrangequals,
+				  Bitmapset *paramids)
 {
 	TidRangeScan *node = makeNode(TidRangeScan);
 	Plan	   *plan = &node->scan.plan;
@@ -5652,6 +5664,7 @@ make_tidrangescan(List *qptlist,
 	plan->righttree = NULL;
 	node->scan.scanrelid = scanrelid;
 	node->tidrangequals = tidrangequals;
+	node->tidparamids = paramids;
 
 	return node;
 }
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 3a920cc7d17..a5abe176aef 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1924,6 +1924,8 @@ typedef struct TidScanState
  *		trss_mintid			the lowest TID in the scan range
  *		trss_maxtid			the highest TID in the scan range
  *		trss_inScan			is a scan currently in progress?
+ *		trss_rangeCalcDone	has the TID range been calculated yet?
+ *		trss_rangeIsEmpty	true if the TID range is certainly empty
  * ----------------
  */
 typedef struct TidRangeScanState
@@ -1933,6 +1935,8 @@ typedef struct TidRangeScanState
 	ItemPointerData trss_mintid;
 	ItemPointerData trss_maxtid;
 	bool		trss_inScan;
+	bool		trss_rangeCalcDone;
+	bool		trss_rangeIsEmpty;
 } TidRangeScanState;
 
 /* ----------------
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 29d7732d6a0..a3d220e9d5e 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -677,6 +677,8 @@ typedef struct TidScan
 	Scan		scan;
 	/* qual(s) involving CTID = something */
 	List	   *tidquals;
+	/* Set of ParamIds mentioned in tidquals */
+	Bitmapset  *tidparamids;
 } TidScan;
 
 /* ----------------
@@ -691,6 +693,8 @@ typedef struct TidRangeScan
 	Scan		scan;
 	/* qual(s) involving CTID op something */
 	List	   *tidrangequals;
+	/* Set of ParamIds mentioned in tidrangequals */
+	Bitmapset *tidparamids;
 } TidRangeScan;
 
 /* ----------------
-- 
2.43.0

#2Andrey Borodin
x4mmm@yandex-team.ru
In reply to: David Rowley (#1)
Re: Make TID Scans recalculate the TIDs less often

On 17 Sep 2025, at 09:59, David Rowley <dgrowleyml@gmail.com> wrote:

Does anyone think this is worth pursuing further?

I heard of following use-case: data transferring system partition big tables by ctid ranges to mimic parallel secscan, but with many network connections. Some performance improvement was claimed and connection failure resistance (when one connection was broken only one partition is rescanned with same snapshot).

I'm not entirely sure this is a safe approach (I have a gut feeling that tid scan is not MVCC safe). But for retrieving very big tables without strong guarantees this might make some sense.

Would your patch improve performance of such case?

Best regards, Andrey Borodin.

#3David Rowley
dgrowleyml@gmail.com
In reply to: Andrey Borodin (#2)
Re: Make TID Scans recalculate the TIDs less often

On Wed, 17 Sept 2025 at 18:29, Andrey Borodin <x4mmm@yandex-team.ru> wrote:

I heard of following use-case: data transferring system partition big tables by ctid ranges to mimic parallel secscan, but with many network connections. Some performance improvement was claimed and connection failure resistance (when one connection was broken only one partition is rescanned with same snapshot).

Would your patch improve performance of such case?

I suspect they're just running a SELECT * to a single table "WHERE
ctid BETWEEN" some fixed range of TIDs. If that's the case then this
won't help as there are no rescans, therefore the TID Range is only
calculated once. Also, I imagine TID Range Scans are less affected
than TID Scans simply because TidExprListCreate() is more complex than
TidRangeEval().

What you'd need for this to ever be slow is lots of rescans, so
something like TID Scan on the inside of a nested loop. If you look at
ExecReScanTidScan() you'll see it pfrees the tss_TidList, which
requires that the list gets built all over again on the next call to
TidNext(). It's the call to TidListEval() that is potentially
expensive due to the expression evaluation, memory allocation, sorting
and distinct work.

David

#4Andrey Borodin
x4mmm@yandex-team.ru
In reply to: David Rowley (#3)
Re: Make TID Scans recalculate the TIDs less often

On 17 Sep 2025, at 14:51, David Rowley <dgrowleyml@gmail.com> wrote:

What you'd need for this to ever be slow is lots of rescans, so
something like TID Scan on the inside of a nested loop. If you look at
ExecReScanTidScan() you'll see it pfrees the tss_TidList, which
requires that the list gets built all over again on the next call to
TidNext(). It's the call to TidListEval() that is potentially
expensive due to the expression evaluation, memory allocation, sorting
and distinct work.

Occasionally (when dealing with corruption) I do stuff like

begin;
update public.tablename set description = description where ctid in (select ('('||b.blkno::text||','||(x::text)||')')::tid from generate_series(1,300) x, blocks b);

in some forms they are actually joins. Also, pageinspecting things out is always a join (CTAS a copy of table rows that have particular infomask bits). But, fortunately, it's not that frequent case. It's always "plumbing", not a "regular database usage".

Best regards, Andrey Borodin.

#5David Rowley
dgrowleyml@gmail.com
In reply to: Andrey Borodin (#4)
Re: Make TID Scans recalculate the TIDs less often

On Wed, 17 Sept 2025 at 22:13, Andrey Borodin <x4mmm@yandex-team.ru> wrote:

Occasionally (when dealing with corruption) I do stuff like

begin;
update public.tablename set description = description where ctid in (select ('('||b.blkno::text||','||(x::text)||')')::tid from generate_series(1,300) x, blocks b);

in some forms they are actually joins. Also, pageinspecting things out is always a join (CTAS a copy of table rows that have particular infomask bits). But, fortunately, it's not that frequent case. It's always "plumbing", not a "regular database usage".

Thanks for sharing that one. If that UPDATE did do a Nested Loop join
with a TID Scan on the inner side, the optimisation I have in the
patch *wouldn't* be applied as a parameter is changing that genuinely
does need the TidList to be recalculated over again.

David

#6Chao Li
li.evan.chao@gmail.com
In reply to: David Rowley (#5)
2 attachment(s)
Re: Make TID Scans recalculate the TIDs less often

On Sep 17, 2025, at 18:31, David Rowley <dgrowleyml@gmail.com> wrote:

On Wed, 17 Sept 2025 at 22:13, Andrey Borodin <x4mmm@yandex-team.ru> wrote:

Occasionally (when dealing with corruption) I do stuff like

begin;
update public.tablename set description = description where ctid in (select
('('||b.blkno::text||','||(x::text)||')')::tid from generate_series(1,300)
x, blocks b);

in some forms they are actually joins. Also, pageinspecting things out is
always a join (CTAS a copy of table rows that have particular infomask
bits). But, fortunately, it's not that frequent case. It's always
"plumbing", not a "regular database usage".

Thanks for sharing that one. If that UPDATE did do a Nested Loop join
with a TID Scan on the inner side, the optimisation I have in the
patch *wouldn't* be applied as a parameter is changing that genuinely
does need the TidList to be recalculated over again.

David

I tried to trace this patch again today.

With David’s example with “million" and “empty" tables, TidScan.tidparamids
is NULL, so that in ExecRescanTidScan(), bms_overlap() will return false,
and TidList is not free-ed.

Same thing for Andrey’s example.

Based on David’s example, I build this SQL:

```
# Insert tuples to empty first
select sum(c) from million m left join lateral (select a c from empty e
where e.ctid = m.ctid) on 1=1;
```

With this SQL, outer scan passes a parameter to inter TidScan, so that
“tidparamids” is not NULL now. Then I noticed one thing: now it needs to
re-evaluate TidList. However, the new TidList always has the same length of
the old TidList, so that we don’t need to free the old TidList, instead, we
can actually reuse memory of the old TidList, which could be a small
optimization.

For that, I created 0002. But TBH, with 0002, and with psql timing on, I
don’t see obvious improvement wrt execution time.

Best regards,
--
Chao Li (Evan)
HighGo Software Co., Ltd.
https://www.highgo.com/

Attachments:

v2-0001-Reduce-rescan-overheads-in-TID-Range-Scan.patchapplication/octet-stream; name=v2-0001-Reduce-rescan-overheads-in-TID-Range-Scan.patchDownload
From d2e9335de2d32ea17c5540839b3257bf7a836304 Mon Sep 17 00:00:00 2001
From: David Rowley <dgrowley@gmail.com>
Date: Mon, 15 Sep 2025 21:59:36 +1200
Subject: [PATCH v2 1/2] Reduce rescan overheads in TID [Range] Scan

Until how the TIDs to scan have been recalculated after every rescan of
both TID Scan and TID Range Scan.  Here we adjust things so that the
calculated list of TIDs or calculated range of TIDs is only marked as
needing recalculated whenever a parameter has changed that might require
a recalculation.
---
 src/backend/executor/nodeTidrangescan.c | 59 ++++++++++++++++++-------
 src/backend/executor/nodeTidscan.c      | 17 +++++--
 src/backend/optimizer/plan/createplan.c | 25 ++++++++---
 src/include/nodes/execnodes.h           |  4 ++
 src/include/nodes/plannodes.h           |  4 ++
 5 files changed, 84 insertions(+), 25 deletions(-)

diff --git a/src/backend/executor/nodeTidrangescan.c b/src/backend/executor/nodeTidrangescan.c
index 1bce8d6cbfe..39239c6692b 100644
--- a/src/backend/executor/nodeTidrangescan.c
+++ b/src/backend/executor/nodeTidrangescan.c
@@ -128,14 +128,17 @@ TidExprListCreate(TidRangeScanState *tidrangestate)
  *		TidRangeEval
  *
  *		Compute and set node's block and offset range to scan by evaluating
- *		node->trss_tidexprs.  Returns false if we detect the range cannot
- *		contain any tuples.  Returns true if it's possible for the range to
- *		contain tuples.  We don't bother validating that trss_mintid is less
- *		than or equal to trss_maxtid, as the scan_set_tidrange() table AM
- *		function will handle that.
+ *		node->trss_tidexprs.  Sets node's trss_rangeIsEmpty to true if the
+ *		calculated range must be empty of any tuples, otherwise sets
+ *		trss_rangeIsEmpty to false and sets trss_mintid and trss_maxtid to the
+ *		calculated range.
+ *
+ *		We don't bother validating that trss_mintid is less than or equal to
+ *		trss_maxtid, as the scan_set_tidrange() table AM function will handle
+ *		that.
  * ----------------------------------------------------------------
  */
-static bool
+static void
 TidRangeEval(TidRangeScanState *node)
 {
 	ExprContext *econtext = node->ss.ps.ps_ExprContext;
@@ -165,7 +168,10 @@ TidRangeEval(TidRangeScanState *node)
 
 		/* If the bound is NULL, *nothing* matches the qual. */
 		if (isNull)
-			return false;
+		{
+			node->trss_rangeIsEmpty = true;
+			return;
+		}
 
 		if (tidopexpr->exprtype == TIDEXPR_LOWER_BOUND)
 		{
@@ -207,7 +213,7 @@ TidRangeEval(TidRangeScanState *node)
 	ItemPointerCopy(&lowerBound, &node->trss_mintid);
 	ItemPointerCopy(&upperBound, &node->trss_maxtid);
 
-	return true;
+	node->trss_rangeIsEmpty = false;
 }
 
 /* ----------------------------------------------------------------
@@ -234,12 +240,19 @@ TidRangeNext(TidRangeScanState *node)
 	slot = node->ss.ss_ScanTupleSlot;
 	direction = estate->es_direction;
 
-	if (!node->trss_inScan)
+	/* First time through, compute TID range to scan */
+	if (!node->trss_rangeCalcDone)
 	{
-		/* First time through, compute TID range to scan */
-		if (!TidRangeEval(node))
-			return NULL;
+		TidRangeEval(node);
+		node->trss_rangeCalcDone = true;
+	}
+
+	/* Check if the range was detected not to contain any tuples */
+	if (node->trss_rangeIsEmpty)
+		return NULL;
 
+	if (!node->trss_inScan)
+	{
 		if (scandesc == NULL)
 		{
 			scandesc = table_beginscan_tidrange(node->ss.ss_currentRelation,
@@ -274,13 +287,18 @@ TidRangeNext(TidRangeScanState *node)
 static bool
 TidRangeRecheck(TidRangeScanState *node, TupleTableSlot *slot)
 {
-	if (!TidRangeEval(node))
-		return false;
+	/* First call? Compute the TID Range */
+	if (!node->trss_rangeCalcDone)
+	{
+		TidRangeEval(node);
+		node->trss_rangeCalcDone = true;
+	}
 
 	Assert(ItemPointerIsValid(&slot->tts_tid));
 
 	/* Recheck the ctid is still within range */
-	if (ItemPointerCompare(&slot->tts_tid, &node->trss_mintid) < 0 ||
+	if (node->trss_rangeIsEmpty ||
+		ItemPointerCompare(&slot->tts_tid, &node->trss_mintid) < 0 ||
 		ItemPointerCompare(&slot->tts_tid, &node->trss_maxtid) > 0)
 		return false;
 
@@ -322,6 +340,13 @@ ExecReScanTidRangeScan(TidRangeScanState *node)
 	/* mark scan as not in progress, and tid range list as not computed yet */
 	node->trss_inScan = false;
 
+	/*
+	 * Set the Tid Range to be recalculated when any parameters that might
+	 * effect the calculated range have changed.
+	 */
+	if (bms_overlap(node->ss.ps.chgParam, ((TidRangeScan *) node->ss.ps.plan)->tidparamids))
+		node->trss_rangeCalcDone = false;
+
 	/*
 	 * We must wait until TidRangeNext before calling table_rescan_tidrange.
 	 */
@@ -380,6 +405,10 @@ ExecInitTidRangeScan(TidRangeScan *node, EState *estate, int eflags)
 	 * mark scan as not in progress, and TID range as not computed yet
 	 */
 	tidrangestate->trss_inScan = false;
+	tidrangestate->trss_rangeCalcDone = false;
+
+	/* This will be calculated correctly in TidRangeEval() */
+	tidrangestate->trss_rangeIsEmpty = true;
 
 	/*
 	 * open the scan relation
diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c
index d50c6600358..b40a31e0d2f 100644
--- a/src/backend/executor/nodeTidscan.c
+++ b/src/backend/executor/nodeTidscan.c
@@ -457,10 +457,19 @@ ExecTidScan(PlanState *pstate)
 void
 ExecReScanTidScan(TidScanState *node)
 {
-	if (node->tss_TidList)
-		pfree(node->tss_TidList);
-	node->tss_TidList = NULL;
-	node->tss_NumTids = 0;
+	/*
+	 * Set the TidList to be recalculated when any parameters that might
+	 * effect the computed list has changed.
+	 */
+	if (bms_overlap(node->ss.ps.chgParam,
+					 ((TidScan *) node->ss.ps.plan)->tidparamids))
+	{
+		if (node->tss_TidList)
+			pfree(node->tss_TidList);
+		node->tss_TidList = NULL;
+		node->tss_NumTids = 0;
+	}
+
 	node->tss_TidPtr = -1;
 
 	/* not really necessary, but seems good form */
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 6791cbeb416..87e2597ee8c 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -201,9 +201,10 @@ static BitmapHeapScan *make_bitmap_heapscan(List *qptlist,
 											List *bitmapqualorig,
 											Index scanrelid);
 static TidScan *make_tidscan(List *qptlist, List *qpqual, Index scanrelid,
-							 List *tidquals);
+							 List *tidquals, Bitmapset *paramids);
 static TidRangeScan *make_tidrangescan(List *qptlist, List *qpqual,
-									   Index scanrelid, List *tidrangequals);
+									   Index scanrelid, List *tidrangequals,
+									   Bitmapset *paramids);
 static SubqueryScan *make_subqueryscan(List *qptlist,
 									   List *qpqual,
 									   Index scanrelid,
@@ -3384,6 +3385,7 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path,
 	TidScan    *scan_plan;
 	Index		scan_relid = best_path->path.parent->relid;
 	List	   *tidquals = best_path->tidquals;
+	Bitmapset  *paramids;
 
 	/* it should be a base rel... */
 	Assert(scan_relid > 0);
@@ -3459,10 +3461,13 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path,
 			replace_nestloop_params(root, (Node *) scan_clauses);
 	}
 
+	paramids = pull_paramids((Expr *) tidquals);
+
 	scan_plan = make_tidscan(tlist,
 							 scan_clauses,
 							 scan_relid,
-							 tidquals);
+							 tidquals,
+							 paramids);
 
 	copy_generic_path_info(&scan_plan->scan.plan, &best_path->path);
 
@@ -3481,6 +3486,7 @@ create_tidrangescan_plan(PlannerInfo *root, TidRangePath *best_path,
 	TidRangeScan *scan_plan;
 	Index		scan_relid = best_path->path.parent->relid;
 	List	   *tidrangequals = best_path->tidrangequals;
+	Bitmapset  *paramids;
 
 	/* it should be a base rel... */
 	Assert(scan_relid > 0);
@@ -3524,10 +3530,13 @@ create_tidrangescan_plan(PlannerInfo *root, TidRangePath *best_path,
 			replace_nestloop_params(root, (Node *) scan_clauses);
 	}
 
+	paramids = pull_paramids((Expr *) tidrangequals);
+
 	scan_plan = make_tidrangescan(tlist,
 								  scan_clauses,
 								  scan_relid,
-								  tidrangequals);
+								  tidrangequals,
+								  paramids);
 
 	copy_generic_path_info(&scan_plan->scan.plan, &best_path->path);
 
@@ -5622,7 +5631,8 @@ static TidScan *
 make_tidscan(List *qptlist,
 			 List *qpqual,
 			 Index scanrelid,
-			 List *tidquals)
+			 List *tidquals,
+			 Bitmapset *paramids)
 {
 	TidScan    *node = makeNode(TidScan);
 	Plan	   *plan = &node->scan.plan;
@@ -5633,6 +5643,7 @@ make_tidscan(List *qptlist,
 	plan->righttree = NULL;
 	node->scan.scanrelid = scanrelid;
 	node->tidquals = tidquals;
+	node->tidparamids = paramids;
 
 	return node;
 }
@@ -5641,7 +5652,8 @@ static TidRangeScan *
 make_tidrangescan(List *qptlist,
 				  List *qpqual,
 				  Index scanrelid,
-				  List *tidrangequals)
+				  List *tidrangequals,
+				  Bitmapset *paramids)
 {
 	TidRangeScan *node = makeNode(TidRangeScan);
 	Plan	   *plan = &node->scan.plan;
@@ -5652,6 +5664,7 @@ make_tidrangescan(List *qptlist,
 	plan->righttree = NULL;
 	node->scan.scanrelid = scanrelid;
 	node->tidrangequals = tidrangequals;
+	node->tidparamids = paramids;
 
 	return node;
 }
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 3a920cc7d17..a5abe176aef 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1924,6 +1924,8 @@ typedef struct TidScanState
  *		trss_mintid			the lowest TID in the scan range
  *		trss_maxtid			the highest TID in the scan range
  *		trss_inScan			is a scan currently in progress?
+ *		trss_rangeCalcDone	has the TID range been calculated yet?
+ *		trss_rangeIsEmpty	true if the TID range is certainly empty
  * ----------------
  */
 typedef struct TidRangeScanState
@@ -1933,6 +1935,8 @@ typedef struct TidRangeScanState
 	ItemPointerData trss_mintid;
 	ItemPointerData trss_maxtid;
 	bool		trss_inScan;
+	bool		trss_rangeCalcDone;
+	bool		trss_rangeIsEmpty;
 } TidRangeScanState;
 
 /* ----------------
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 29d7732d6a0..a3d220e9d5e 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -677,6 +677,8 @@ typedef struct TidScan
 	Scan		scan;
 	/* qual(s) involving CTID = something */
 	List	   *tidquals;
+	/* Set of ParamIds mentioned in tidquals */
+	Bitmapset  *tidparamids;
 } TidScan;
 
 /* ----------------
@@ -691,6 +693,8 @@ typedef struct TidRangeScan
 	Scan		scan;
 	/* qual(s) involving CTID op something */
 	List	   *tidrangequals;
+	/* Set of ParamIds mentioned in tidrangequals */
+	Bitmapset *tidparamids;
 } TidRangeScan;
 
 /* ----------------
-- 
2.39.5 (Apple Git-154)

v2-0002-Avoid-memory-alloc-and-free-in-TidScan.patchapplication/octet-stream; name=v2-0002-Avoid-memory-alloc-and-free-in-TidScan.patchDownload
From 7d0114a57244f82f61e0db43a5a58a524a666f52 Mon Sep 17 00:00:00 2001
From: "Chao Li (Evan)" <lic@highgo.com>
Date: Thu, 18 Sep 2025 15:22:08 +0800
Subject: [PATCH v2 2/2] Avoid memory alloc and free in TidScan

When TidScan needs to rebuild TidList, most of time the rebuilt
TidList may have the same length as the existing TidList. So that
we can reuse memory of existing TidList, this way we avoid freeing
memory and re-allocate memory.

Author: Chao Li <lic@highgo.go>
---
 src/backend/executor/nodeTidscan.c | 53 +++++++++++++++++-------------
 src/include/nodes/execnodes.h      |  1 +
 2 files changed, 31 insertions(+), 23 deletions(-)

diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c
index b40a31e0d2f..7e9b8f9743b 100644
--- a/src/backend/executor/nodeTidscan.c
+++ b/src/backend/executor/nodeTidscan.c
@@ -157,8 +157,17 @@ TidListEval(TidScanState *tidstate)
 	 * ScalarArrayOpExprs, we may have to enlarge the array.
 	 */
 	numAllocTids = list_length(tidstate->tss_tidexprs);
-	tidList = (ItemPointerData *)
-		palloc(numAllocTids * sizeof(ItemPointerData));
+	if (numAllocTids > tidstate->tss_MaxNumTids)
+	{
+		if (tidstate->tss_TidList != NULL)
+			tidstate->tss_TidList = (ItemPointerData *)
+				repalloc(tidstate->tss_TidList, numAllocTids * sizeof(ItemPointerData));
+		else
+			tidstate->tss_TidList = (ItemPointerData *)
+				palloc(numAllocTids * sizeof(ItemPointerData));
+		tidstate->tss_MaxNumTids = numAllocTids;
+	}
+	tidList = tidstate->tss_TidList;
 	numTids = 0;
 
 	foreach(l, tidstate->tss_tidexprs)
@@ -186,12 +195,13 @@ TidListEval(TidScanState *tidstate)
 			if (!table_tuple_tid_valid(scan, itemptr))
 				continue;
 
-			if (numTids >= numAllocTids)
+			if (numTids >= tidstate->tss_MaxNumTids)
 			{
-				numAllocTids *= 2;
-				tidList = (ItemPointerData *)
-					repalloc(tidList,
-							 numAllocTids * sizeof(ItemPointerData));
+				tidstate->tss_MaxNumTids <<= 1;
+				tidstate->tss_TidList = (ItemPointerData *)
+					repalloc(tidstate->tss_TidList,
+							 tidstate->tss_MaxNumTids * sizeof(ItemPointerData));
+				tidList = tidstate->tss_TidList;
 			}
 			tidList[numTids++] = *itemptr;
 		}
@@ -211,12 +221,12 @@ TidListEval(TidScanState *tidstate)
 				continue;
 			itemarray = DatumGetArrayTypeP(arraydatum);
 			deconstruct_array_builtin(itemarray, TIDOID, &ipdatums, &ipnulls, &ndatums);
-			if (numTids + ndatums > numAllocTids)
+			if (numTids + ndatums >= tidstate->tss_MaxNumTids)
 			{
-				numAllocTids = numTids + ndatums;
-				tidList = (ItemPointerData *)
-					repalloc(tidList,
-							 numAllocTids * sizeof(ItemPointerData));
+				tidstate->tss_MaxNumTids = numTids + ndatums;
+				tidstate->tss_TidList = (ItemPointerData *)
+					repalloc(tidstate->tss_TidList, tidstate->tss_MaxNumTids * sizeof(ItemPointerData));
+				tidList = tidstate->tss_TidList;
 			}
 			for (i = 0; i < ndatums; i++)
 			{
@@ -242,12 +252,12 @@ TidListEval(TidScanState *tidstate)
 							  RelationGetRelid(tidstate->ss.ss_currentRelation),
 							  &cursor_tid))
 			{
-				if (numTids >= numAllocTids)
+				if (numTids >= tidstate->tss_MaxNumTids)
 				{
-					numAllocTids *= 2;
+					tidstate->tss_MaxNumTids <<= 1;
 					tidList = (ItemPointerData *)
 						repalloc(tidList,
-								 numAllocTids * sizeof(ItemPointerData));
+								 tidstate->tss_MaxNumTids * sizeof(ItemPointerData));
 				}
 				tidList[numTids++] = cursor_tid;
 			}
@@ -271,7 +281,6 @@ TidListEval(TidScanState *tidstate)
 						  itemptr_comparator);
 	}
 
-	tidstate->tss_TidList = tidList;
 	tidstate->tss_NumTids = numTids;
 	tidstate->tss_TidPtr = -1;
 }
@@ -333,7 +342,7 @@ TidNext(TidScanState *node)
 	/*
 	 * First time through, compute the list of TIDs to be visited
 	 */
-	if (node->tss_TidList == NULL)
+	if (node->tss_NumTids < 0)
 		TidListEval(node);
 
 	scan = node->ss.ss_currentScanDesc;
@@ -408,7 +417,7 @@ TidRecheck(TidScanState *node, TupleTableSlot *slot)
 	if (node->tss_isCurrentOf)
 		return true;
 
-	if (node->tss_TidList == NULL)
+	if (node->tss_NumTids == 0)
 		TidListEval(node);
 
 	/*
@@ -464,10 +473,8 @@ ExecReScanTidScan(TidScanState *node)
 	if (bms_overlap(node->ss.ps.chgParam,
 					 ((TidScan *) node->ss.ps.plan)->tidparamids))
 	{
-		if (node->tss_TidList)
-			pfree(node->tss_TidList);
-		node->tss_TidList = NULL;
-		node->tss_NumTids = 0;
+		/* indicate that the TidList must be recalculated */
+		node->tss_NumTids = -1;
 	}
 
 	node->tss_TidPtr = -1;
@@ -529,7 +536,7 @@ ExecInitTidScan(TidScan *node, EState *estate, int eflags)
 	 * mark tid list as not computed yet
 	 */
 	tidstate->tss_TidList = NULL;
-	tidstate->tss_NumTids = 0;
+	tidstate->tss_NumTids = -1;
 	tidstate->tss_TidPtr = -1;
 
 	/*
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index a5abe176aef..db3f62cf3fe 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1913,6 +1913,7 @@ typedef struct TidScanState
 	List	   *tss_tidexprs;
 	bool		tss_isCurrentOf;
 	int			tss_NumTids;
+	int			tss_MaxNumTids;	/* allocated size of TidList */
 	int			tss_TidPtr;
 	ItemPointerData *tss_TidList;
 } TidScanState;
-- 
2.39.5 (Apple Git-154)