Add memory/disk usage for WindowAgg nodes in EXPLAIN

Started by Tatsuo Ishiiover 1 year ago53 messages
#1Tatsuo Ishii
ishii@postgresql.org
1 attachment(s)

1eff8279d4 added memory/disk usage for materialize nodes in EXPLAIN
ANALYZE.
In the commit message:

There are a few other executor node types that use tuplestores, so we
could also consider adding these details to the EXPLAIN ANALYZE for
those nodes too.

So I wanted to Add memory/disk usage for WindowAgg. Patch attached.

Since WindowAgg node could create multiple tuplestore for each Window
partition, we need to track each tuplestore storage usage so that the
maximum storage usage is determined. For this purpose I added new
fields to the WindowAggState.
--
Tatsuo Ishii
SRA OSS LLC
English: http://www.sraoss.co.jp/index_en/
Japanese:http://www.sraoss.co.jp

Attachments:

v1-0001-Add-memory-disk-usage-for-WindowAgg-nodes-in-EXPL.patchtext/x-patch; charset=us-asciiDownload
From 00c7546659e305be45dbeb13a14bcde5066be76f Mon Sep 17 00:00:00 2001
From: Tatsuo Ishii <ishii@postgresql.org>
Date: Sat, 6 Jul 2024 19:48:30 +0900
Subject: [PATCH v1] Add memory/disk usage for WindowAgg nodes in EXPLAIN.

---
 src/backend/commands/explain.c       | 37 ++++++++++++++++++++++++++++
 src/backend/executor/nodeWindowAgg.c | 19 ++++++++++++++
 src/include/nodes/execnodes.h        |  2 ++
 3 files changed, 58 insertions(+)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 1e80fd8b68..177687ea80 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -126,6 +126,7 @@ static void show_incremental_sort_info(IncrementalSortState *incrsortstate,
 									   ExplainState *es);
 static void show_hash_info(HashState *hashstate, ExplainState *es);
 static void show_material_info(MaterialState *mstate, ExplainState *es);
+static void show_windowagg_info(WindowAggState *winstate, ExplainState *es);
 static void show_memoize_info(MemoizeState *mstate, List *ancestors,
 							  ExplainState *es);
 static void show_hashagg_info(AggState *aggstate, ExplainState *es);
@@ -2215,6 +2216,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 										   planstate, es);
 			show_upper_qual(((WindowAgg *) plan)->runConditionOrig,
 							"Run Condition", planstate, ancestors, es);
+			show_windowagg_info(castNode(WindowAggState, planstate), es);
 			break;
 		case T_Group:
 			show_group_keys(castNode(GroupState, planstate), ancestors, es);
@@ -3362,6 +3364,41 @@ show_material_info(MaterialState *mstate, ExplainState *es)
 	}
 }
 
+/*
+ * Show information on WindowAgg node, storage method and maximum memory/disk
+ * space used.
+ */
+static void
+show_windowagg_info(WindowAggState *winstate, ExplainState *es)
+{
+	const char *storageType;
+	int64		spaceUsedKB;
+
+	/*
+	 * Nothing to show if ANALYZE option wasn't used or if execution didn't
+	 * get as far as creating the tuplestore.
+	 */
+	if (!es->analyze || winstate->storageType == NULL)
+		return;
+
+	storageType = winstate->storageType;
+	spaceUsedKB = BYTES_TO_KILOBYTES(winstate->storageSize);
+
+	if (es->format != EXPLAIN_FORMAT_TEXT)
+	{
+		ExplainPropertyText("Storage", storageType, es);
+		ExplainPropertyInteger("Maximum Storage", "kB", spaceUsedKB, es);
+	}
+	else
+	{
+		ExplainIndentText(es);
+		appendStringInfo(es->str,
+						 "Storage: %s  Maximum Storage: " INT64_FORMAT "kB\n",
+						 storageType,
+						 spaceUsedKB);
+	}
+}
+
 /*
  * Show information on memoize hits/misses/evictions and memory usage.
  */
diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c
index 3221fa1522..bcfe144511 100644
--- a/src/backend/executor/nodeWindowAgg.c
+++ b/src/backend/executor/nodeWindowAgg.c
@@ -1360,7 +1360,23 @@ release_partition(WindowAggState *winstate)
 	}
 
 	if (winstate->buffer)
+	{
+		int64	spaceUsed = tuplestore_space_used(winstate->buffer);
+
+		/*
+		 * We want to track the max mem/disk usage so that we can use the info
+		 * in EXPLAIN (ANALYZE).
+		 */
+		if (spaceUsed > winstate->storageSize)
+		{
+			if (winstate->storageType != NULL)
+				pfree(winstate->storageType);
+			winstate->storageType =
+				pstrdup(tuplestore_storage_type_name(winstate->buffer));
+			winstate->storageSize = spaceUsed;
+		}
 		tuplestore_end(winstate->buffer);
+	}
 	winstate->buffer = NULL;
 	winstate->partition_spooled = false;
 }
@@ -2671,6 +2687,9 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags)
 	winstate->partition_spooled = false;
 	winstate->more_partitions = false;
 
+	winstate->storageType = NULL;
+	winstate->storageSize = 0;
+
 	return winstate;
 }
 
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index b62c96f206..7a3dfa2bc3 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -2567,6 +2567,8 @@ typedef struct WindowAggState
 	ExprState  *partEqfunction; /* equality funcs for partition columns */
 	ExprState  *ordEqfunction;	/* equality funcs for ordering columns */
 	Tuplestorestate *buffer;	/* stores rows of current partition */
+	int64		storageSize;	/* max storage size in buffer */
+	char		*storageType;	/* the storage type above */
 	int			current_ptr;	/* read pointer # for current row */
 	int			framehead_ptr;	/* read pointer # for frame head, if used */
 	int			frametail_ptr;	/* read pointer # for frame tail, if used */
-- 
2.25.1

#2David Rowley
dgrowleyml@gmail.com
In reply to: Tatsuo Ishii (#1)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Sat, 6 Jul 2024 at 23:23, Tatsuo Ishii <ishii@postgresql.org> wrote:

So I wanted to Add memory/disk usage for WindowAgg. Patch attached.

Thanks for working on that.

Since WindowAgg node could create multiple tuplestore for each Window
partition, we need to track each tuplestore storage usage so that the
maximum storage usage is determined. For this purpose I added new
fields to the WindowAggState.

I'd recently been looking at the code that recreates the tuplestore
for each partition and thought we could do a bit better. In [1]/messages/by-id/CAHoyFK9n-QCXKTUWT_xxtXninSMEv+gbJN66-y6prM3f4WkEHw@mail.gmail.com, I
proposed a patch to make this better.

If you based your patch on [1]/messages/by-id/CAHoyFK9n-QCXKTUWT_xxtXninSMEv+gbJN66-y6prM3f4WkEHw@mail.gmail.com, maybe a better way of doing this is
having tuplestore.c track the maximum space used on disk in an extra
field which is updated with tuplestore_clear(). It's probably ok to
update a new field called maxDiskSpace in tuplestore_clear() if
state->status != TSS_INMEM. If the tuplestore went to disk then an
extra call to BufFileSize() isn't going to be noticeable, even in
cases where we only just went over work_mem. You could then adjust
tuplestore_space_used() to look at maxDiskSpace and return that value
if it's larger than BufFileSize(state->myfile) and state->maxSpace.
You could check if maxDiskSpace == 0 to determine if the tuplestore
has ever gone to disk. tuplestore_storage_type_name() would also need
to check maxDiskSpace and return "Disk" if that's non-zero.

David

[1]: /messages/by-id/CAHoyFK9n-QCXKTUWT_xxtXninSMEv+gbJN66-y6prM3f4WkEHw@mail.gmail.com

#3Tatsuo Ishii
ishii@postgresql.org
In reply to: David Rowley (#2)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Sat, 6 Jul 2024 at 23:23, Tatsuo Ishii <ishii@postgresql.org> wrote:

So I wanted to Add memory/disk usage for WindowAgg. Patch attached.

Thanks for working on that.

Thank you for the infrastructure you created in tuplestore.c and explain.c.

BTW, it seems these executor nodes (other than Materialize and Window
Aggregate node) use tuplestore for their own purpose.

CTE Scan
Recursive Union
Table Function Scan

I have already implemented that for CTE Scan. Do you think other two
nodes are worth to add the information? I think for consistency sake,
it will better to add the info Recursive Union and Table Function
Scan.

Since WindowAgg node could create multiple tuplestore for each Window
partition, we need to track each tuplestore storage usage so that the
maximum storage usage is determined. For this purpose I added new
fields to the WindowAggState.

I'd recently been looking at the code that recreates the tuplestore
for each partition and thought we could do a bit better. In [1], I
proposed a patch to make this better.

If you based your patch on [1], maybe a better way of doing this is
having tuplestore.c track the maximum space used on disk in an extra
field which is updated with tuplestore_clear(). It's probably ok to
update a new field called maxDiskSpace in tuplestore_clear() if
state->status != TSS_INMEM. If the tuplestore went to disk then an
extra call to BufFileSize() isn't going to be noticeable, even in
cases where we only just went over work_mem. You could then adjust
tuplestore_space_used() to look at maxDiskSpace and return that value
if it's larger than BufFileSize(state->myfile) and state->maxSpace.
You could check if maxDiskSpace == 0 to determine if the tuplestore
has ever gone to disk. tuplestore_storage_type_name() would also need
to check maxDiskSpace and return "Disk" if that's non-zero.

Thank you for the suggestion. Yes, I noticed [1] and once it is
committed, I will start to study tuplestore.c in this direction.

Best reagards,
--
Tatsuo Ishii
SRA OSS LLC
English: http://www.sraoss.co.jp/index_en/
Japanese:http://www.sraoss.co.jp

#4David Rowley
dgrowleyml@gmail.com
In reply to: Tatsuo Ishii (#3)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Tue, 9 Jul 2024 at 14:44, Tatsuo Ishii <ishii@postgresql.org> wrote:

BTW, it seems these executor nodes (other than Materialize and Window
Aggregate node) use tuplestore for their own purpose.

CTE Scan
Recursive Union
Table Function Scan

I have already implemented that for CTE Scan. Do you think other two
nodes are worth to add the information?

Yes, I think so. I'd keep each as a separate patch so they can be
considered independently. Doing all of them should hopefully ensure we
strike the right balance of what code to put in explain.c and what
code to put in tuplestore.c. I think the WindowAgg's tuplestore usage
pattern might show that the API I picked isn't well suited when a
tuplestore is cleared and refilled over and over.

David

#5Ashutosh Bapat
ashutosh.bapat.oss@gmail.com
In reply to: David Rowley (#4)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Tue, Jul 9, 2024 at 8:20 AM David Rowley <dgrowleyml@gmail.com> wrote:

On Tue, 9 Jul 2024 at 14:44, Tatsuo Ishii <ishii@postgresql.org> wrote:

BTW, it seems these executor nodes (other than Materialize and Window
Aggregate node) use tuplestore for their own purpose.

CTE Scan
Recursive Union
Table Function Scan

I have already implemented that for CTE Scan. Do you think other two
nodes are worth to add the information?

Yes, I think so. I'd keep each as a separate patch so they can be
considered independently. Doing all of them should hopefully ensure we
strike the right balance of what code to put in explain.c and what
code to put in tuplestore.c.

+1

+ if (es->format != EXPLAIN_FORMAT_TEXT)
+ {
+ ExplainPropertyText("Storage", storageType, es);
+ ExplainPropertyInteger("Maximum Storage", "kB", spaceUsedKB, es);
+ }
+ else
+ {
+ ExplainIndentText(es);
+ appendStringInfo(es->str,
+ "Storage: %s  Maximum Storage: " INT64_FORMAT "kB\n",
+ storageType,
+ spaceUsedKB);
+ }

It will be good to move this code to a function which will be called
by show_*_info functions(). We might even convert it into a tuplestore
specific implementation hook after David's work.

--
Best Wishes,
Ashutosh Bapat

#6Tatsuo Ishii
ishii@postgresql.org
In reply to: Ashutosh Bapat (#5)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

Yes, I think so. I'd keep each as a separate patch so they can be
considered independently. Doing all of them should hopefully ensure we
strike the right balance of what code to put in explain.c and what
code to put in tuplestore.c.

+1

+ if (es->format != EXPLAIN_FORMAT_TEXT)
+ {
+ ExplainPropertyText("Storage", storageType, es);
+ ExplainPropertyInteger("Maximum Storage", "kB", spaceUsedKB, es);
+ }
+ else
+ {
+ ExplainIndentText(es);
+ appendStringInfo(es->str,
+ "Storage: %s  Maximum Storage: " INT64_FORMAT "kB\n",
+ storageType,
+ spaceUsedKB);
+ }

It will be good to move this code to a function which will be called
by show_*_info functions().

I have already implemented that in this direction in my working in
progress patch:

/*
* Show information regarding storage method and maximum memory/disk space
* used.
*/
static void
show_storage_info(Tuplestorestate *tupstore, ExplainState *es)

Which can be shared by Material and CTE scan node. I am going to post
it after I take care Recursive Union and Table Function Scan node.

We might even convert it into a tuplestore
specific implementation hook after David's work.

Best reagards,
--
Tatsuo Ishii
SRA OSS LLC
English: http://www.sraoss.co.jp/index_en/
Japanese:http://www.sraoss.co.jp

#7Tatsuo Ishii
ishii@postgresql.org
In reply to: Tatsuo Ishii (#6)
6 attachment(s)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

Yes, I think so. I'd keep each as a separate patch so they can be
considered independently. Doing all of them should hopefully ensure we
strike the right balance of what code to put in explain.c and what
code to put in tuplestore.c.

+1

+ if (es->format != EXPLAIN_FORMAT_TEXT)
+ {
+ ExplainPropertyText("Storage", storageType, es);
+ ExplainPropertyInteger("Maximum Storage", "kB", spaceUsedKB, es);
+ }
+ else
+ {
+ ExplainIndentText(es);
+ appendStringInfo(es->str,
+ "Storage: %s  Maximum Storage: " INT64_FORMAT "kB\n",
+ storageType,
+ spaceUsedKB);
+ }

It will be good to move this code to a function which will be called
by show_*_info functions().

I have already implemented that in this direction in my working in
progress patch:

Attached are the v2 patches. As suggested by David, I split them
into multiple patches so that each patch implements the feature for
each node. You need to apply the patches in the order of patch number
(if you want to apply all of them, "git apply v2-*.patch" should
work).

v2-0001-Refactor-show_material_info.patch:
This refactors show_material_info(). The guts are moved to new
show_storage_info() so that it can be shared by not only Materialized
node.

v2-0002-Add-memory-disk-usage-for-CTE-Scan-nodes-in-EXPLA.patch:
This adds memory/disk usage for CTE Scan nodes in EXPLAIN (ANALYZE) command.

v2-0003-Add-memory-disk-usage-for-Table-Function-Scan-nod.patch:
This adds memory/disk usage for Table Function Scan nodes in EXPLAIN (ANALYZE) command.

v2-0004-Add-memory-disk-usage-for-Recursive-Union-nodes-i.patch:
This adds memory/disk usage for Recursive Union nodes in EXPLAIN
(ANALYZE) command. Also show_storage_info() is changed so that it
accepts int64 storage_used, char *storage_type arguments. They are
used if the target node uses multiple tuplestores, in case a simple
call to tuplestore_space_used() does not work. Such executor nodes
need to collect storage_used while running the node. This type of node
includes Recursive Union and Window Aggregate.

v2-0005-Add-memory-disk-usage-for-Window-Aggregate-nodes-.patch: This
adds memory/disk usage for Window Aggregate nodes in EXPLAIN (ANALYZE)
command. Note that if David's proposal
/messages/by-id/CAHoyFK9n-QCXKTUWT_xxtXninSMEv+gbJN66-y6prM3f4WkEHw@mail.gmail.com
is committed, this will need to be adjusted.

For a demonstration, how storage/memory usage is shown in EXPLAIN
(notice "Storage: Memory Maximum Storage: 120kB" etc.). The script
used is attached (test.sql.txt). The SQLs are shamelessly copied from
David's example and the regression test (some of them were modified by
me).

EXPLAIN (ANALYZE, COSTS OFF)
SELECT count(t1.b) FROM (VALUES(1),(2)) t2(x) LEFT JOIN (SELECT * FROM t1 WHERE a <= 100) t1 ON TRUE;
QUERY PLAN
---------------------------------------------------------------------------------
Aggregate (actual time=0.345..0.346 rows=1 loops=1)
-> Nested Loop Left Join (actual time=0.015..0.330 rows=200 loops=1)
-> Values Scan on "*VALUES*" (actual time=0.001..0.003 rows=2 loops=1)
-> Materialize (actual time=0.006..0.152 rows=100 loops=2)
Storage: Memory Maximum Storage: 120kB
-> Seq Scan on t1 (actual time=0.007..0.213 rows=100 loops=1)
Filter: (a <= 100)
Rows Removed by Filter: 900
Planning Time: 0.202 ms
Execution Time: 0.377 ms
(10 rows)

-- CTE Scan node
EXPLAIN (ANALYZE, COSTS OFF)
WITH RECURSIVE t(n) AS (
VALUES (1)
UNION ALL
SELECT n+1 FROM t WHERE n < 100
)
SELECT sum(n) OVER() FROM t;
QUERY PLAN
-----------------------------------------------------------------------------------
WindowAgg (actual time=0.151..0.169 rows=100 loops=1)
Storage: Memory Maximum Storage: 20kB
CTE t
-> Recursive Union (actual time=0.001..0.105 rows=100 loops=1)
Storage: Memory Maximum Storage: 17kB
-> Result (actual time=0.001..0.001 rows=1 loops=1)
-> WorkTable Scan on t t_1 (actual time=0.000..0.000 rows=1 loops=100)
Filter: (n < 100)
Rows Removed by Filter: 0
-> CTE Scan on t (actual time=0.002..0.127 rows=100 loops=1)
Storage: Memory Maximum Storage: 20kB
Planning Time: 0.053 ms
Execution Time: 0.192 ms
(13 rows)

-- Table Function Scan node
CREATE OR REPLACE VIEW public.jsonb_table_view6 AS
SELECT js2,
jsb2w,
jsb2q,
ia,
ta,
jba
FROM JSON_TABLE(
'null'::jsonb, '$[*]' AS json_table_path_0
PASSING
1 + 2 AS a,
'"foo"'::json AS "b c"
COLUMNS (
js2 json PATH '$' WITHOUT WRAPPER KEEP QUOTES,
jsb2w jsonb PATH '$' WITH UNCONDITIONAL WRAPPER KEEP QUOTES,
jsb2q jsonb PATH '$' WITHOUT WRAPPER OMIT QUOTES,
ia integer[] PATH '$' WITHOUT WRAPPER KEEP QUOTES,
ta text[] PATH '$' WITHOUT WRAPPER KEEP QUOTES,
jba jsonb[] PATH '$' WITHOUT WRAPPER KEEP QUOTES
)
);
CREATE VIEW
EXPLAIN (ANALYZE, COSTS OFF) SELECT * FROM jsonb_table_view6;
QUERY PLAN
-------------------------------------------------------------------------------
Table Function Scan on "json_table" (actual time=0.024..0.025 rows=1 loops=1)
Storage: Memory Maximum Storage: 17kB
Planning Time: 0.100 ms
Execution Time: 0.054 ms
(4 rows)

Attachments:

v2-0001-Refactor-show_material_info.patchtext/x-patch; charset=us-asciiDownload
From 1d2f67dff48601f92b5378838c0e908d200d4493 Mon Sep 17 00:00:00 2001
From: Tatsuo Ishii <ishii@postgresql.org>
Date: Wed, 10 Jul 2024 14:02:13 +0900
Subject: [PATCH v2 1/5] Refactor show_material_info().

---
 src/backend/commands/explain.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 118db12903..43ef33295e 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -125,6 +125,7 @@ static void show_sort_info(SortState *sortstate, ExplainState *es);
 static void show_incremental_sort_info(IncrementalSortState *incrsortstate,
 									   ExplainState *es);
 static void show_hash_info(HashState *hashstate, ExplainState *es);
+static void show_storage_info(Tuplestorestate *tupstore, ExplainState *es);
 static void show_material_info(MaterialState *mstate, ExplainState *es);
 static void show_memoize_info(MemoizeState *mstate, List *ancestors,
 							  ExplainState *es);
@@ -3326,13 +3327,11 @@ show_hash_info(HashState *hashstate, ExplainState *es)
 }
 
 /*
- * Show information on material node, storage method and maximum memory/disk
- * space used.
+ * Show information on storage method and maximum memory/disk space used.
  */
 static void
-show_material_info(MaterialState *mstate, ExplainState *es)
+show_storage_info(Tuplestorestate *tupstore, ExplainState *es)
 {
-	Tuplestorestate *tupstore = mstate->tuplestorestate;
 	const char *storageType;
 	int64		spaceUsedKB;
 
@@ -3361,6 +3360,18 @@ show_material_info(MaterialState *mstate, ExplainState *es)
 	}
 }
 
+/*
+ * Show information on material node, storage method and maximum memory/disk
+ * space used.
+ */
+static void
+show_material_info(MaterialState *mstate, ExplainState *es)
+{
+	Tuplestorestate *tupstore = mstate->tuplestorestate;
+
+	show_storage_info(tupstore, es);
+}
+
 /*
  * Show information on memoize hits/misses/evictions and memory usage.
  */
-- 
2.25.1

v2-0002-Add-memory-disk-usage-for-CTE-Scan-nodes-in-EXPLA.patchtext/x-patch; charset=us-asciiDownload
From e4eb2288c90e92a42ba85c672d477b1a93ebf6f8 Mon Sep 17 00:00:00 2001
From: Tatsuo Ishii <ishii@postgresql.org>
Date: Wed, 10 Jul 2024 14:09:46 +0900
Subject: [PATCH v2 2/5] Add memory/disk usage for CTE Scan nodes in EXPLAIN

---
 src/backend/commands/explain.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 43ef33295e..661e9101cb 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -127,6 +127,7 @@ static void show_incremental_sort_info(IncrementalSortState *incrsortstate,
 static void show_hash_info(HashState *hashstate, ExplainState *es);
 static void show_storage_info(Tuplestorestate *tupstore, ExplainState *es);
 static void show_material_info(MaterialState *mstate, ExplainState *es);
+static void show_ctescan_info(CteScanState *ctescanstate, ExplainState *es);
 static void show_memoize_info(MemoizeState *mstate, List *ancestors,
 							  ExplainState *es);
 static void show_hashagg_info(AggState *aggstate, ExplainState *es);
@@ -2028,6 +2029,8 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			if (plan->qual)
 				show_instrumentation_count("Rows Removed by Filter", 1,
 										   planstate, es);
+			if (IsA(plan, CteScan))
+				show_ctescan_info(castNode(CteScanState, planstate), es);
 			break;
 		case T_Gather:
 			{
@@ -3372,6 +3375,18 @@ show_material_info(MaterialState *mstate, ExplainState *es)
 	show_storage_info(tupstore, es);
 }
 
+/*
+ * Show information on CTE Scan node, storage method and maximum memory/disk
+ * space used.
+ */
+static void
+show_ctescan_info(CteScanState *ctescanstate, ExplainState *es)
+{
+	Tuplestorestate *tupstore = ctescanstate->leader->cte_table;
+
+	show_storage_info(tupstore, es);
+}
+
 /*
  * Show information on memoize hits/misses/evictions and memory usage.
  */
-- 
2.25.1

v2-0003-Add-memory-disk-usage-for-Table-Function-Scan-nod.patchtext/x-patch; charset=us-asciiDownload
From a7bfaf711b5a1ecb4fe58d3b46db1911383dbfb8 Mon Sep 17 00:00:00 2001
From: Tatsuo Ishii <ishii@postgresql.org>
Date: Wed, 10 Jul 2024 14:32:59 +0900
Subject: [PATCH v2 3/5] Add memory/disk usage for Table Function Scan nodes in
 EXPLAIN

---
 src/backend/commands/explain.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 661e9101cb..1a4e83ad38 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -128,6 +128,7 @@ static void show_hash_info(HashState *hashstate, ExplainState *es);
 static void show_storage_info(Tuplestorestate *tupstore, ExplainState *es);
 static void show_material_info(MaterialState *mstate, ExplainState *es);
 static void show_ctescan_info(CteScanState *ctescanstate, ExplainState *es);
+static void show_table_func_can_info(TableFuncScanState *tscanstate, ExplainState *es);
 static void show_memoize_info(MemoizeState *mstate, List *ancestors,
 							  ExplainState *es);
 static void show_hashagg_info(AggState *aggstate, ExplainState *es);
@@ -2112,6 +2113,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			if (plan->qual)
 				show_instrumentation_count("Rows Removed by Filter", 1,
 										   planstate, es);
+			show_table_func_can_info(castNode(TableFuncScanState, planstate), es);
 			break;
 		case T_TidScan:
 			{
@@ -3387,6 +3389,18 @@ show_ctescan_info(CteScanState *ctescanstate, ExplainState *es)
 	show_storage_info(tupstore, es);
 }
 
+/*
+ * Show information on Table Function Scan node, storage method and maximum
+ * memory/disk space used.
+ */
+static void
+show_table_func_can_info(TableFuncScanState *tscanstate, ExplainState *es)
+{
+	Tuplestorestate *tupstore = tscanstate->tupstore;
+
+	show_storage_info(tupstore, es);
+}
+
 /*
  * Show information on memoize hits/misses/evictions and memory usage.
  */
-- 
2.25.1

v2-0004-Add-memory-disk-usage-for-Recursive-Union-nodes-i.patchtext/x-patch; charset=us-asciiDownload
From b03264d55d0c23885fe9ce4f9d93a6ca65d45261 Mon Sep 17 00:00:00 2001
From: Tatsuo Ishii <ishii@postgresql.org>
Date: Wed, 10 Jul 2024 16:06:59 +0900
Subject: [PATCH v2 4/5] Add memory/disk usage for Recursive Union nodes in
 EXPLAIN

---
 src/backend/commands/explain.c            | 47 +++++++++++++++++------
 src/backend/executor/nodeRecursiveunion.c | 30 +++++++++++++++
 src/include/nodes/execnodes.h             |  2 +
 3 files changed, 68 insertions(+), 11 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 1a4e83ad38..054d909093 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -125,10 +125,12 @@ static void show_sort_info(SortState *sortstate, ExplainState *es);
 static void show_incremental_sort_info(IncrementalSortState *incrsortstate,
 									   ExplainState *es);
 static void show_hash_info(HashState *hashstate, ExplainState *es);
-static void show_storage_info(Tuplestorestate *tupstore, ExplainState *es);
+static void show_storage_info(Tuplestorestate *tupstore, int64 storage_used, char *storage_type,
+							  ExplainState *es);
 static void show_material_info(MaterialState *mstate, ExplainState *es);
 static void show_ctescan_info(CteScanState *ctescanstate, ExplainState *es);
 static void show_table_func_can_info(TableFuncScanState *tscanstate, ExplainState *es);
+static void show_recursive_union_info(RecursiveUnionState *rstate, ExplainState *es);
 static void show_memoize_info(MemoizeState *mstate, List *ancestors,
 							  ExplainState *es);
 static void show_hashagg_info(AggState *aggstate, ExplainState *es);
@@ -2264,6 +2266,9 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			show_memoize_info(castNode(MemoizeState, planstate), ancestors,
 							  es);
 			break;
+		case T_RecursiveUnion:
+			show_recursive_union_info(castNode(RecursiveUnionState, planstate), es);
+			break;
 		default:
 			break;
 	}
@@ -3332,23 +3337,32 @@ show_hash_info(HashState *hashstate, ExplainState *es)
 }
 
 /*
- * Show information on storage method and maximum memory/disk space used.
+ * Show information on storage method and maximum memory/disk space used.  If
+ * tupstore is NULL, then storage_used and storage_type are used instead.
  */
 static void
-show_storage_info(Tuplestorestate *tupstore, ExplainState *es)
+show_storage_info(Tuplestorestate *tupstore, int64 storage_used, char *storage_type,
+				  ExplainState *es)
 {
 	const char *storageType;
 	int64		spaceUsedKB;
 
 	/*
-	 * Nothing to show if ANALYZE option wasn't used or if execution didn't
-	 * get as far as creating the tuplestore.
+	 * Nothing to show if ANALYZE option wasn't used.
 	 */
-	if (!es->analyze || tupstore == NULL)
+	if (!es->analyze)
 		return;
 
-	storageType = tuplestore_storage_type_name(tupstore);
-	spaceUsedKB = BYTES_TO_KILOBYTES(tuplestore_space_used(tupstore));
+	if (tupstore != NULL)
+	{
+		storageType = tuplestore_storage_type_name(tupstore);
+		spaceUsedKB = BYTES_TO_KILOBYTES(tuplestore_space_used(tupstore));
+	}
+	else
+	{
+		storageType = storage_type;
+		spaceUsedKB = BYTES_TO_KILOBYTES(storage_used);
+	}
 
 	if (es->format != EXPLAIN_FORMAT_TEXT)
 	{
@@ -3374,7 +3388,7 @@ show_material_info(MaterialState *mstate, ExplainState *es)
 {
 	Tuplestorestate *tupstore = mstate->tuplestorestate;
 
-	show_storage_info(tupstore, es);
+	show_storage_info(tupstore, 0, NULL, es);
 }
 
 /*
@@ -3386,7 +3400,7 @@ show_ctescan_info(CteScanState *ctescanstate, ExplainState *es)
 {
 	Tuplestorestate *tupstore = ctescanstate->leader->cte_table;
 
-	show_storage_info(tupstore, es);
+	show_storage_info(tupstore, 0, NULL, es);
 }
 
 /*
@@ -3398,7 +3412,18 @@ show_table_func_can_info(TableFuncScanState *tscanstate, ExplainState *es)
 {
 	Tuplestorestate *tupstore = tscanstate->tupstore;
 
-	show_storage_info(tupstore, es);
+	show_storage_info(tupstore, 0, NULL, es);
+}
+
+/*
+ * Show information on Recursive Union node, storage method and maximum
+ * memory/disk space used.
+ */
+
+static void
+show_recursive_union_info(RecursiveUnionState *rstate, ExplainState *es)
+{
+	show_storage_info(NULL, rstate->storageSize, rstate->storageType, es);
 }
 
 /*
diff --git a/src/backend/executor/nodeRecursiveunion.c b/src/backend/executor/nodeRecursiveunion.c
index c7f8a19fa4..8667b7ca93 100644
--- a/src/backend/executor/nodeRecursiveunion.c
+++ b/src/backend/executor/nodeRecursiveunion.c
@@ -52,6 +52,33 @@ build_hash_table(RecursiveUnionState *rustate)
 												false);
 }
 
+/*
+ * Track the maximum memory/disk usage in working_table and
+ * intermediate_table.  Supposed to be called just before tuplestore_end.
+ */
+static void
+track_storage_usage(RecursiveUnionState *state, Tuplestorestate *tup)
+{
+	int64	spaceUsed;
+
+	if (tup == NULL)
+		return;
+
+	spaceUsed = tuplestore_space_used(tup);
+
+	/*
+	 * We want to track the maximum mem/disk usage so that we can use the info
+	 * in EXPLAIN (ANALYZE).
+	 */
+	if (spaceUsed > state->storageSize)
+	{
+		if (state->storageType != NULL)
+			pfree(state->storageType);
+		state->storageType =
+			pstrdup(tuplestore_storage_type_name(tup));
+		state->storageSize = spaceUsed;
+	}
+}
 
 /* ----------------------------------------------------------------
  *		ExecRecursiveUnion(node)
@@ -120,6 +147,7 @@ ExecRecursiveUnion(PlanState *pstate)
 				break;
 
 			/* done with old working table ... */
+			track_storage_usage(node, node->working_table);
 			tuplestore_end(node->working_table);
 
 			/* intermediate table becomes working table */
@@ -191,6 +219,8 @@ ExecInitRecursiveUnion(RecursiveUnion *node, EState *estate, int eflags)
 	rustate->intermediate_empty = true;
 	rustate->working_table = tuplestore_begin_heap(false, false, work_mem);
 	rustate->intermediate_table = tuplestore_begin_heap(false, false, work_mem);
+	rustate->storageSize = 0;
+	rustate->storageType = NULL;
 
 	/*
 	 * If hashing, we need a per-tuple memory context for comparisons, and a
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index cac684d9b3..bc2c0baed6 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1509,6 +1509,8 @@ typedef struct RecursiveUnionState
 	bool		intermediate_empty;
 	Tuplestorestate *working_table;
 	Tuplestorestate *intermediate_table;
+	int64		storageSize;	/* max storage size Tuplestore */
+	char		*storageType;	/* the storage type above */
 	/* Remaining fields are unused in UNION ALL case */
 	Oid		   *eqfuncoids;		/* per-grouping-field equality fns */
 	FmgrInfo   *hashfunctions;	/* per-grouping-field hash fns */
-- 
2.25.1

v2-0005-Add-memory-disk-usage-for-Window-Aggregate-nodes-.patchtext/x-patch; charset=us-asciiDownload
From b61edc12210eb71c8a7a987171e8bbf39299989c Mon Sep 17 00:00:00 2001
From: Tatsuo Ishii <ishii@postgresql.org>
Date: Wed, 10 Jul 2024 16:22:41 +0900
Subject: [PATCH v2 5/5] Add memory/disk usage for Window Aggregate nodes in
 EXPLAIN

---
 src/backend/commands/explain.c       | 12 ++++++++++++
 src/backend/executor/nodeWindowAgg.c | 19 +++++++++++++++++++
 src/include/nodes/execnodes.h        |  2 ++
 3 files changed, 33 insertions(+)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 054d909093..5f32c967e1 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -131,6 +131,7 @@ static void show_material_info(MaterialState *mstate, ExplainState *es);
 static void show_ctescan_info(CteScanState *ctescanstate, ExplainState *es);
 static void show_table_func_can_info(TableFuncScanState *tscanstate, ExplainState *es);
 static void show_recursive_union_info(RecursiveUnionState *rstate, ExplainState *es);
+static void show_windowagg_info(WindowAggState *winstate, ExplainState *es);
 static void show_memoize_info(MemoizeState *mstate, List *ancestors,
 							  ExplainState *es);
 static void show_hashagg_info(AggState *aggstate, ExplainState *es);
@@ -2222,6 +2223,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 										   planstate, es);
 			show_upper_qual(((WindowAgg *) plan)->runConditionOrig,
 							"Run Condition", planstate, ancestors, es);
+			show_windowagg_info(castNode(WindowAggState, planstate), es);
 			break;
 		case T_Group:
 			show_group_keys(castNode(GroupState, planstate), ancestors, es);
@@ -3426,6 +3428,16 @@ show_recursive_union_info(RecursiveUnionState *rstate, ExplainState *es)
 	show_storage_info(NULL, rstate->storageSize, rstate->storageType, es);
 }
 
+/*
+ * Show information on WindowAgg node, storage method and maximum memory/disk
+ * space used.
+ */
+static void
+show_windowagg_info(WindowAggState *winstate, ExplainState *es)
+{
+	show_storage_info(NULL, winstate->storageSize, winstate->storageType, es);
+}
+
 /*
  * Show information on memoize hits/misses/evictions and memory usage.
  */
diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c
index 3221fa1522..bcfe144511 100644
--- a/src/backend/executor/nodeWindowAgg.c
+++ b/src/backend/executor/nodeWindowAgg.c
@@ -1360,7 +1360,23 @@ release_partition(WindowAggState *winstate)
 	}
 
 	if (winstate->buffer)
+	{
+		int64	spaceUsed = tuplestore_space_used(winstate->buffer);
+
+		/*
+		 * We want to track the max mem/disk usage so that we can use the info
+		 * in EXPLAIN (ANALYZE).
+		 */
+		if (spaceUsed > winstate->storageSize)
+		{
+			if (winstate->storageType != NULL)
+				pfree(winstate->storageType);
+			winstate->storageType =
+				pstrdup(tuplestore_storage_type_name(winstate->buffer));
+			winstate->storageSize = spaceUsed;
+		}
 		tuplestore_end(winstate->buffer);
+	}
 	winstate->buffer = NULL;
 	winstate->partition_spooled = false;
 }
@@ -2671,6 +2687,9 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags)
 	winstate->partition_spooled = false;
 	winstate->more_partitions = false;
 
+	winstate->storageType = NULL;
+	winstate->storageSize = 0;
+
 	return winstate;
 }
 
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index bc2c0baed6..82a597aae9 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -2596,6 +2596,8 @@ typedef struct WindowAggState
 	ExprState  *partEqfunction; /* equality funcs for partition columns */
 	ExprState  *ordEqfunction;	/* equality funcs for ordering columns */
 	Tuplestorestate *buffer;	/* stores rows of current partition */
+	int64		storageSize;	/* max storage size in buffer */
+	char		*storageType;	/* the storage type above */
 	int			current_ptr;	/* read pointer # for current row */
 	int			framehead_ptr;	/* read pointer # for frame head, if used */
 	int			frametail_ptr;	/* read pointer # for frame tail, if used */
-- 
2.25.1

test.sql.txttext/plain; charset=us-asciiDownload
#8Maxim Orlov
orlovmg@gmail.com
In reply to: Tatsuo Ishii (#7)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

Hi!

+1 for the idea of the patch. Consider it useful.

I looked at the patch set and don't see any obvious defects. It applies
without any problems and looks pretty good for me.
Only one thing is left to do. Add basic tests for the added functionality
to make it committable. For example, as in the
mentioned 1eff8279d494b9.

--
Best regards,
Maxim Orlov.

#9Tatsuo Ishii
ishii@postgresql.org
In reply to: Maxim Orlov (#8)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

Hi!

+1 for the idea of the patch. Consider it useful.

I looked at the patch set and don't see any obvious defects. It applies
without any problems and looks pretty good for me.

Thank you for reviewing my patch.

Only one thing is left to do. Add basic tests for the added functionality
to make it committable. For example, as in the
mentioned 1eff8279d494b9.

Agreed. Probably add to explain.sql?

Best reagards,
--
Tatsuo Ishii
SRA OSS K.K.
English: http://www.sraoss.co.jp/index_en/
Japanese:http://www.sraoss.co.jp

#10Maxim Orlov
orlovmg@gmail.com
In reply to: Tatsuo Ishii (#9)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Wed, 4 Sept 2024 at 03:07, Tatsuo Ishii <ishii@postgresql.org> wrote:

Agreed. Probably add to explain.sql?

Yeah, I think this is an appropriate place.

--
Best regards,
Maxim Orlov.

#11jian he
jian.universality@gmail.com
In reply to: Tatsuo Ishii (#7)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Wed, Jul 10, 2024 at 5:36 PM Tatsuo Ishii <ishii@postgresql.org> wrote:

Attached are the v2 patches. As suggested by David, I split them
into multiple patches so that each patch implements the feature for
each node. You need to apply the patches in the order of patch number
(if you want to apply all of them, "git apply v2-*.patch" should
work).

v2-0001-Refactor-show_material_info.patch:
This refactors show_material_info(). The guts are moved to new
show_storage_info() so that it can be shared by not only Materialized
node.

v2-0002-Add-memory-disk-usage-for-CTE-Scan-nodes-in-EXPLA.patch:
This adds memory/disk usage for CTE Scan nodes in EXPLAIN (ANALYZE) command.

v2-0003-Add-memory-disk-usage-for-Table-Function-Scan-nod.patch:
This adds memory/disk usage for Table Function Scan nodes in EXPLAIN (ANALYZE) command.

v2-0004-Add-memory-disk-usage-for-Recursive-Union-nodes-i.patch:
This adds memory/disk usage for Recursive Union nodes in EXPLAIN
(ANALYZE) command. Also show_storage_info() is changed so that it
accepts int64 storage_used, char *storage_type arguments. They are
used if the target node uses multiple tuplestores, in case a simple
call to tuplestore_space_used() does not work. Such executor nodes
need to collect storage_used while running the node. This type of node
includes Recursive Union and Window Aggregate.

v2-0005-Add-memory-disk-usage-for-Window-Aggregate-nodes-.patch: This
adds memory/disk usage for Window Aggregate nodes in EXPLAIN (ANALYZE)
command. Note that if David's proposal
/messages/by-id/CAHoyFK9n-QCXKTUWT_xxtXninSMEv+gbJN66-y6prM3f4WkEHw@mail.gmail.com
is committed, this will need to be adjusted.

For a demonstration, how storage/memory usage is shown in EXPLAIN
(notice "Storage: Memory Maximum Storage: 120kB" etc.). The script
used is attached (test.sql.txt). The SQLs are shamelessly copied from
David's example and the regression test (some of them were modified by
me).

hi. I can roughly understand it.

I have one minor issue with the comment.

typedef struct RecursiveUnionState
{
PlanState ps; /* its first field is NodeTag */
bool recursing;
bool intermediate_empty;
Tuplestorestate *working_table;
Tuplestorestate *intermediate_table;
int64 storageSize; /* max storage size Tuplestore */
char *storageType; /* the storage type above */
....
}

"/* the storage type above */"
is kind of ambiguous, since there is more than one Tuplestorestate.

i think it roughly means: the storage type of working_table
while the max storage of working_table.

typedef struct WindowAggState
{
ScanState ss; /* its first field is NodeTag */

/* these fields are filled in by ExecInitExpr: */
List *funcs; /* all WindowFunc nodes in targetlist */
int numfuncs; /* total number of window functions */
int numaggs; /* number that are plain aggregates */

WindowStatePerFunc perfunc; /* per-window-function information */
WindowStatePerAgg peragg; /* per-plain-aggregate information */
ExprState *partEqfunction; /* equality funcs for partition columns */
ExprState *ordEqfunction; /* equality funcs for ordering columns */
Tuplestorestate *buffer; /* stores rows of current partition */
int64 storageSize; /* max storage size in buffer */
char *storageType; /* the storage type above */
}

" /* the storage type above */"
I think it roughly means:
" the storage type of WindowAggState->buffer while the max storage of
WindowAggState->buffer".

#12David Rowley
dgrowleyml@gmail.com
In reply to: Tatsuo Ishii (#7)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Wed, 10 Jul 2024 at 21:36, Tatsuo Ishii <ishii@postgresql.org> wrote:

v2-0005-Add-memory-disk-usage-for-Window-Aggregate-nodes-.patch: This
adds memory/disk usage for Window Aggregate nodes in EXPLAIN (ANALYZE)
command. Note that if David's proposal
/messages/by-id/CAHoyFK9n-QCXKTUWT_xxtXninSMEv+gbJN66-y6prM3f4WkEHw@mail.gmail.com
is committed, this will need to be adjusted.

Hi,

I pushed the changes to WindowAgg so as not to call tuplestore_end()
on every partition. Can you rebase this patch over that change?

It would be good to do this in a way that does not add any new state
to WindowAggState, you can see that I had to shuffle fields around in
that struct because the next_parition field would have caused the
struct to become larger. I've not looked closely, but I expect this
can be done by adding more code to tuplestore_updatemax() to also
track the disk space used if the current storage has gone to disk. I
expect the maxSpace field can be used for both, but we'd need another
bool field to track if the max used was by disk or memory.

I think the performance of this would also need to be tested as it
means doing an lseek() on every tuplestore_clear() when we've gone to
disk. Probably that will be dominated by all the other overheads of a
tuplestore going to disk (i.e. dumptuples() etc), but it would be good
to check this. I suggest setting work_mem = 64 and making a test case
that only just spills to disk. Maybe do a few thousand partitions
worth of that and see if you can measure any slowdown.

David

#13Tatsuo Ishii
ishii@postgresql.org
In reply to: David Rowley (#12)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

Hi,

On Wed, 10 Jul 2024 at 21:36, Tatsuo Ishii <ishii@postgresql.org> wrote:

v2-0005-Add-memory-disk-usage-for-Window-Aggregate-nodes-.patch: This
adds memory/disk usage for Window Aggregate nodes in EXPLAIN (ANALYZE)
command. Note that if David's proposal
/messages/by-id/CAHoyFK9n-QCXKTUWT_xxtXninSMEv+gbJN66-y6prM3f4WkEHw@mail.gmail.com
is committed, this will need to be adjusted.

Hi,

I pushed the changes to WindowAgg so as not to call tuplestore_end()
on every partition. Can you rebase this patch over that change?

It would be good to do this in a way that does not add any new state
to WindowAggState, you can see that I had to shuffle fields around in
that struct because the next_parition field would have caused the
struct to become larger. I've not looked closely, but I expect this
can be done by adding more code to tuplestore_updatemax() to also
track the disk space used if the current storage has gone to disk. I
expect the maxSpace field can be used for both, but we'd need another
bool field to track if the max used was by disk or memory.

I think the performance of this would also need to be tested as it
means doing an lseek() on every tuplestore_clear() when we've gone to
disk. Probably that will be dominated by all the other overheads of a
tuplestore going to disk (i.e. dumptuples() etc), but it would be good
to check this. I suggest setting work_mem = 64 and making a test case
that only just spills to disk. Maybe do a few thousand partitions
worth of that and see if you can measure any slowdown.

Thank you for the suggestion. I will look into this.

Best reagards,
--
Tatsuo Ishii
SRA OSS K.K.
English: http://www.sraoss.co.jp/index_en/
Japanese:http://www.sraoss.co.jp

#14Tatsuo Ishii
ishii@postgresql.org
In reply to: jian he (#11)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

Hi,

hi. I can roughly understand it.

I have one minor issue with the comment.

typedef struct RecursiveUnionState
{
PlanState ps; /* its first field is NodeTag */
bool recursing;
bool intermediate_empty;
Tuplestorestate *working_table;
Tuplestorestate *intermediate_table;
int64 storageSize; /* max storage size Tuplestore */
char *storageType; /* the storage type above */
....
}

"/* the storage type above */"
is kind of ambiguous, since there is more than one Tuplestorestate.

i think it roughly means: the storage type of working_table
while the max storage of working_table.

typedef struct WindowAggState
{
ScanState ss; /* its first field is NodeTag */

/* these fields are filled in by ExecInitExpr: */
List *funcs; /* all WindowFunc nodes in targetlist */
int numfuncs; /* total number of window functions */
int numaggs; /* number that are plain aggregates */

WindowStatePerFunc perfunc; /* per-window-function information */
WindowStatePerAgg peragg; /* per-plain-aggregate information */
ExprState *partEqfunction; /* equality funcs for partition columns */
ExprState *ordEqfunction; /* equality funcs for ordering columns */
Tuplestorestate *buffer; /* stores rows of current partition */
int64 storageSize; /* max storage size in buffer */
char *storageType; /* the storage type above */
}

" /* the storage type above */"
I think it roughly means:
" the storage type of WindowAggState->buffer while the max storage of
WindowAggState->buffer".

Thank you for looking into my patch. Unfortunately I need to work on
other issue before adjusting the comments because the fields might go
away if I change the tuplestore infrastructure per David's suggestion:
/messages/by-id/CAApHDvoY8cibGcicLV0fNh=9JVx9PANcWvhkdjBnDCc9Quqytg@mail.gmail.com

After this I will rebase the patches. This commit requires changes.
https://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=908a968612f9ed61911d8ca0a185b262b82f1269

Best reagards,
--
Tatsuo Ishii
SRA OSS K.K.
English: http://www.sraoss.co.jp/index_en/
Japanese:http://www.sraoss.co.jp

#15Tatsuo Ishii
ishii@postgresql.org
In reply to: Tatsuo Ishii (#13)
3 attachment(s)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

Hi David,

I pushed the changes to WindowAgg so as not to call tuplestore_end()
on every partition. Can you rebase this patch over that change?

It would be good to do this in a way that does not add any new state
to WindowAggState, you can see that I had to shuffle fields around in
that struct because the next_parition field would have caused the
struct to become larger. I've not looked closely, but I expect this
can be done by adding more code to tuplestore_updatemax() to also
track the disk space used if the current storage has gone to disk. I
expect the maxSpace field can be used for both, but we'd need another
bool field to track if the max used was by disk or memory.

I have created a patch in the direction you suggested. See attached
patch (v1-0001-Enhance-tuplestore.txt). To not confuse CFbot, the
extension is "txt", not "patch".

I think the performance of this would also need to be tested as it
means doing an lseek() on every tuplestore_clear() when we've gone to
disk. Probably that will be dominated by all the other overheads of a
tuplestore going to disk (i.e. dumptuples() etc), but it would be good
to check this. I suggest setting work_mem = 64 and making a test case
that only just spills to disk. Maybe do a few thousand partitions
worth of that and see if you can measure any slowdown.

I copied your shell script and slightly modified it then ran pgbench
with (1 10 100 1000 5000 10000) window partitions (see attached shell
script). In the script I set work_mem to 64kB. It seems for 10000,
1000 and 100 partitions, the performance difference seems
noises. However, for 10, 2, 1 partitions. I see large performance
degradation with the patched version: patched is slower than stock
master in 1.5% (10 partitions), 41% (2 partitions) and 55.7% (1
partition). See the attached graph.

Attachments:

v1-0001-Enhance-tuplestore.txttext/plain; charset=us-asciiDownload
From 4749d2018f33e883c292eb904f3253d393a47c99 Mon Sep 17 00:00:00 2001
From: Tatsuo Ishii <ishii@postgresql.org>
Date: Thu, 5 Sep 2024 20:59:01 +0900
Subject: [PATCH v1] Enhance tuplestore.

Let tuplestore_updatemax() handle both memory and disk case.
---
 src/backend/utils/sort/tuplestore.c | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/src/backend/utils/sort/tuplestore.c b/src/backend/utils/sort/tuplestore.c
index 444c8e25c2..854121fc11 100644
--- a/src/backend/utils/sort/tuplestore.c
+++ b/src/backend/utils/sort/tuplestore.c
@@ -107,9 +107,10 @@ struct Tuplestorestate
 	bool		backward;		/* store extra length words in file? */
 	bool		interXact;		/* keep open through transactions? */
 	bool		truncated;		/* tuplestore_trim has removed tuples? */
+	bool		inMem;			/* true if maxSpace is for memory */
 	int64		availMem;		/* remaining memory available, in bytes */
 	int64		allowedMem;		/* total memory allowed, in bytes */
-	int64		maxSpace;		/* maximum space used in memory */
+	int64		maxSpace;		/* maximum space used in memory or disk */
 	int64		tuples;			/* number of tuples added */
 	BufFile    *myfile;			/* underlying file, or NULL if none */
 	MemoryContext context;		/* memory context for holding tuples */
@@ -262,6 +263,7 @@ tuplestore_begin_common(int eflags, bool interXact, int maxKBytes)
 	state->eflags = eflags;
 	state->interXact = interXact;
 	state->truncated = false;
+	state->inMem = true;
 	state->allowedMem = maxKBytes * 1024L;
 	state->availMem = state->allowedMem;
 	state->maxSpace = 0;
@@ -1497,8 +1499,17 @@ static void
 tuplestore_updatemax(Tuplestorestate *state)
 {
 	if (state->status == TSS_INMEM)
+	{
 		state->maxSpace = Max(state->maxSpace,
 							  state->allowedMem - state->availMem);
+		state->inMem = true;
+	}
+	else
+	{
+		state->maxSpace = Max(state->maxSpace,
+							  BufFileSize(state->myfile));
+		state->inMem = false;
+	}
 }
 
 /*
@@ -1509,7 +1520,7 @@ tuplestore_updatemax(Tuplestorestate *state)
 const char *
 tuplestore_storage_type_name(Tuplestorestate *state)
 {
-	if (state->status == TSS_INMEM)
+	if (state->inMem)
 		return "Memory";
 	else
 		return "Disk";
@@ -1517,8 +1528,7 @@ tuplestore_storage_type_name(Tuplestorestate *state)
 
 /*
  * tuplestore_space_used
- *		Return the maximum space used in memory unless the tuplestore has spilled
- *		to disk, in which case, return the disk space used.
+ *		Return the maximum space used in memory or disk.
  */
 int64
 tuplestore_space_used(Tuplestorestate *state)
@@ -1526,10 +1536,7 @@ tuplestore_space_used(Tuplestorestate *state)
 	/* First, update the maxSpace field */
 	tuplestore_updatemax(state);
 
-	if (state->status == TSS_INMEM)
-		return state->maxSpace;
-	else
-		return BufFileSize(state->myfile);
+	return state->maxSpace;
 }
 
 /*
@@ -1601,7 +1608,9 @@ writetup_heap(Tuplestorestate *state, void *tup)
 	if (state->backward)		/* need trailing length word? */
 		BufFileWrite(state->myfile, &tuplen, sizeof(tuplen));
 
-	/* no need to call tuplestore_updatemax() when not in TSS_INMEM */
+	/* update maxSpace */
+	tuplestore_updatemax(state);
+
 	FREEMEM(state, GetMemoryChunkSpace(tuple));
 	heap_free_minimal_tuple(tuple);
 }
-- 
2.25.1

bench.shtext/plain; charset=us-asciiDownload
bench.pngimage/pngDownload
�PNG


IHDR^D���O�IDATx���x���o�!��P����i�)��w/E��RU�QA���B�RT:TzPZ������&$@�=�&����5;3{�?�0��2��6���������O��8�H�l���;�F��^`�#x�a/0�����0�F��^`�#x�a/0�����0�F��^`�#x���X��s7�����7�����$%�k��`m-m�V�-��7��=��s��4{��{�w���OnN��W�7�bc��$s6����n��3���r�j���\{��~:J_�/��K�S����g����d�����M��.�����(E�y�����+)��-n��Mg���[�
z�����i ��; �!xH��G4��<��"��$H>��������K�������?�k����rDhI�lj��%m]YB�s6��~s�n�0
�n��E��p�^;�5�L{�z"*x��z{�rm;vY�r�S������+���Ay��-�~?��?��9�){���<�}�Z��i	�Xt��<@��\�����Y��f��zw`#��r����v�<G�+zi�����d�$���u/V�lw���W{&��%�����|����o����c���T@��K��b���~��*�<eQ�
�k�8�\3��3J��SZ1���N\��a)��Z'���=�-����:����	���i������sL��U�c��������h���{���������P]���_l�Q[U�:A�^J������Kw��o!5<S�()�������x��.��s?��X_c����_���f�����U�^o����jdH����UW���O�s|�^�d����R�j]4�>vA���^���=$#/��[u�n�N����yfj��J��#&���<=��u��i��6��<v��A]��W^��y�X��i��zj63��OY��*���?�V����w�%������pz�c-�YA�tLk���O�����_c�sZ���Z��G�N[�o��Q��i����/.���^����������c���������Z��e;g?���C��S
;���������t|�=���z������~A��4P�y���K�(�%m��?�����7�P�<1����w5�aoM�i�����:S��jh��
�{��1;P�i�;&�_��f�A��WV-7����/���UKz���~���G���<\F���q�tpZ+U��EO�RY���D�������K��v����m����Zg��vV����^����a��O����������7[�u{K-_����
����]������c��Pi�<��������kh���Z{���k� � xH����&�|=-���V@@���A�k��/o?H
/�I#:���l�6�s��{�k-
�}cI��y��>������_�y;M���~R����N�������bf5x��*��/�K�������+ ��|R������4��c*7t�7�vF.�0��}�*M��eo���{��=�e�7Vn���R��^�������6�mWC���!��W�����N���C��L������	������z����L�7I'�O��}�e�suNgl����(W�����S���!:�PE����s�K�D��;mw�	��<�&��,)�6}�R���~WdL}6](�N�����=�~����_�����j�3��yJ����we�����E�n ��?�����q�����L�����*��%i��T�]{���o����q8_��Fw-��1�3o���O������w���@�A���+[��z������7b�v��X*���
��M�j��5��?Q^^m��6�)TO
r��)��r��
�[[�KeU���]]�fsWD��E[#���
9b���T�r��r�6��?&9��1{}�b��HT-Yn���y��Y����W�j��:v���l6��!������Z�_�����jY���eJYM�|Y�������
b�����id	���[LD�;Vo��v�O��nu��~}�.�4�����J�,�[(���X���K�w����c�������(����JW�����Uw�C{{���C�b
���E����2v���=$/H�E~i��\�eI�B>���o!Z��),��&��������(E���;iOUY�k���*2f�fN��>|���rTW��'jt�b�.����)?���{t�����:���1�w�o*_�Ye�j����]�3l�^z������f��I����������z�f
;�s6_�Jy��Z���w��nw��}�B������[�o�_���#V�^��8������]��n�x�c�&]��}h�)u��}�^�k���qk�������>��<�<�:��)*2*����]�r2������y�!���2��~��[��J��G�|5O�+r����[�s]�����:��yD*t��������|n�����,u������^�}��.(������6#�����w����?*�hv��b��v�uJ'N�_�X����R�k?�������n_�_�]�;�Q
�����p��~������O����;��}��������D�a���R���������`��:���(e
�����+�W�>�_[�</����h��IG������u�.�(�"9����RE����':p�_e�qLn=u@��G������~���T�A	��x*]���6n�~�����vF��qk���Y��m�xX�*y��������)<]E����������P��a;sy�v4\�,i�.���7|��vD�V4VL�V�G��U��C�]�_�U\=��X�w����[~�Y��<N�w�:���`���w����Z����=��t�>�[?~��~�����7��������?u!Q}x�+���������� y����W:��
-�-�
�,���ET���>��s��BQ�J����)�������w���[@�����/����J��-=�Z7��%��^�����!F�m������&��������;)��/���JU1A���N���>��K��qY,��5�h����F�������}���������C*��g���r� 1��H��Iw�N��r�/���yj�FE����>��uRi��m���R����z�~[�Fu��9F������+zw����e?��>��u��U[�����L�,��W��+Kn{h����}�o]=�����# �=����?�|w�P���khk{��J�N�j���w������]��\���=$/��%��zv��NS��^��|�M��fR��&jm�Aj�c��3R���N��j�$RQW^�����w���Z��V�������oYk~�9z��M;@C�W[#�\�{@~Ul2BGt�z�[�=�0�u�:��j���.X���X
���{�^:��p5�]�/���w�T����J������F���|�kn����%�;m�}v��?i�:��^�gz+G�g4h�5<�Rk��FM����U�k���-�g����>�5��E�7�+���o�%l��M�w��r��^��UX���}��������o���&�Z��@��S?��XG*2�]��V����q��:t9��$�u�������5'�o��#xH������a��S��>Zg���]�����Xq��w~��\����f-M>r��S�\
<���M	��*�y�����1������H`q�*��R3QKBn��Wf�P����}��R���~f���������Z�l$��gB�{����u������}���T���g��}�{B���\���������~��������<���1�e�������k�����������j~���d�����0�����K	�����I�N������0�F��^`�#x�a/0�����0�F��^`�#x�a/0�����0�����g}�m������r���i��*�,�.@2@����k�:��K�?�����4�g}�����\G��.@�G��M������������C��t@�W��G�Qm/g� �#x�Yb�(��n>�K-��{�/��������!x�"���U��5qC�^-yQ+'}�-�^ja�2����Po}��^u��>����=��Z�?���-�r�����:3<�_����� x]�������vW�E����W�Tq�
���(((�!��GU�1'�6�����O���r5�a��f>��C�P�g�����vnY[������bf����X����Q:�C�-bX�T��\�����H�^`�#x�a/0�����0�F��^`�#x�a/0�����0�F��^`�#x�a/0�����0�F��^`�#x�a/0�����0�F��^`�#x�a/0�����0�F��^W\�����n���h�����zm�x�/����H^v��S����j�f�z���Ni�*=����~�����$u/��;u [U��u�y�j��u�.���#x�y�����?��?z�p��h��V������@r@���dh�w}��U�kb��<kh��F�`qve���]����r��F�u\-sI���R��o���wU���r�����l�^�`���W,�$���h\�\��������L����*��_E�J���
�����������?�'}/�+s�|��d�V�l���-
]�T�
�i��������Fif��z�B��X�T%����
�sve��W�,���\����H�^`�#x�a/0����ohG��>gWW��R�:��.������Ni����"�s���.�����0�F��^`�#x�a/0�����0�F��^`�#x�a/0�����0�F��^`�#x�a/0�����0�F��^`�#x�a/0�����0��l:9����X��7�Y�'4i�R��bqbm���,J��G��p��Mg�T���T/���#x���f��f��L���n�.<4G�I!?;���R���7vv���UG����������]x�vl�����U���(�H&^�Em�����lzC��*CB��%b�,}��BsJy&8?88X6�-���.
�uG�J;��[����j�3�	{_�9��[����Hx
g�A��!J�.�[��*������������� ���R�K��]D\~~�9�H�y����."���K�tm���O�^��N+d�?*��0�����E���K�L|�]	�d��u�G	
9��*$C/0�����0�F��^`�#x�a/0�����0�F��^`�#x�a/0�����0�F��^`�#x�a/0�����0�F��^`�#x�a/0�����0�F��^`�#x�a�k���+�h���r���^��c���Fg �#x������/ig��y~�����L��V��SV7g �#x�E��N_k�1��)�EJ�v���uvU���]�_hO������:/����j����T*g�  x����gu~�J{e�~���=S����F��oCU��������[�E�V�'2��"��E���_�XU&��y�f��ka���``@�!*��"nvN�9�H�'�}���"n�#�)�]\���'o~e;�3�3�=\Y�.����{mL�


2_$0+E����E���������7\:��"�*]��J�v��������e�Q��������:jV�����iZ��	���[xp������^U������.���S��K*I�p��5)Kt����9���#x�a/0�����0�F��^`�#x�a/0�����0�F��^`�#x�a/0�����0�F��������������u���)]���R�����wg��������8>x����������z�Kg
�n�����T:�����e�kb�j���cM�VR~�W.���+r�~:�Js67V.��3R+S��j��rP���������u�� yp|����~��n
��0�/M
�y��SC�:|����\���N|VC�#�U�SR����P��h���������^k4�������/xV?�����SjH�YZ���\b��a.x��),eN��	���;T�UU��:�,��t�fl�p�����,pvq�$=���U��\��,�R�!z��F�PE�&x����w�����a�L�a���?��2�x%l������2���tv�=3�����������z�U_�vM��������&Y�7�S��=�.#���/*��������Z��J�����['����u1O��7i���M���0����{�zm��"����=�U�b��>��k7+���*���k�����QF]'NU����^u�m����6/�'��"���
.��g�����#~
�}���������%>���a����T/�Kz:p�������th��Z�a�~��t!�j�E���E��������cW�Jw�To7���n��
.��7jY/�*4�[�Y�+��&��]�?S��O*�xD�^�;4�I#��RKe3���I.OU�[+��7�������/�Sc+q�s��q�Zd��}}�f�R*� 90xs�R��y���IU����!�g��|�nZ�s�jy�}y�/5�Wgw}�f�Q�������$�'4�����Bw6��;��Ti��M?`.x��Q�W���n��d��*l����z����[�!u
k����)�ToN�N�}�]�d����3�H���w$�4�����*�xQ����Q-���a*2\�\���+�>n��K�gRt�8l�+3{���^-�1C���9KZ���\]�UW6��{��;���*�����Z�?Z�
���}pp�l���d\�`��p����
G;u���K����#
q@�k��3�~������Y�_j��"nvN�]�}�����]B<�S�W����EHH����pvp��������H�,�����F������6��)
��r�&�K������T�H��F��m���g�"��6&t=p
S7|+{�v) ��������+k������8���K�x�k��q�d?��9�p�	��|}}e?�t)�K�V���?G���e.xEl���4k�+*t��Q�J����l�.t�+b�X�|1R��MW�\��q�D��U_���n���e��g�y]�������.�;��	R���[��U3�g��2�i��_�WqW+@�d.xyU���^�3��+�F����p�B����^r��]WX2���s����.@rd����6�'}���Z�����E�|���7i�(1�.k��j���V�����4�N�<2G?�.&W;�����/��M~MY�M�����������������:$/��g��������&y�Vj�h�xt��FU������?��s)�-T;W������
�vx�<��J�G.��e�k��}:�l���BM}��}<:/�K'u`�~�UR��zZ)~_����/��_
����e;��=�i�oE�l���)��C�vlW-���2X��x��=}AAg;��x6�L���+UMJ��*:s�+r�k�)?���f?�'x������{��\A���<TQ����_g���>�� ���������x�^j�%/K�"m�&]>���}T��]!�jXEm+U�f[��'T����QK�H��z���F��WT���>
���C���_�L#��������?������v���(:��E���m0x�+�*^��+���5{u��N��_�f��*���H�6#��3���3�U�T�D��Qf�~^%��I�7����W������R�����C������w5��R������gk������N����:t�\��:�-q�I��/�_����k��unvKe��)���uZ�����'7d����4kK�������������$�0��X�?�����������1�)\}�G�G,��V������I�t�-{�W�l�R������y�Z���F����R]r���9j��*����?qR��~D���S�q_kD}?m�����^�Q��d
�;3�*��n�����Q��t������)���xL�����3�.k��g5�:B_u>�!��k�!xa�����|����x�V�
G�����J���R����P�������*�/��?����Q���i�/k���Yj��-}pd����L������MTb�z1�e��5Q�_�G���y�t�����e���K��tj:�5���j�O%t�>ww�v���#7k���:�����8ZAKh����j�.���������F������wOQ�7��������1�hQQ]�U���$�m�!�=�)3�"�h��9�kQ�_��~(;K{W��������U��yI@
U�QJ������������N�z�EW9�5[K�f/���������en��g(YR���W'�6��Z���w��y��O�T�>������]�%]-5��N1���Q_�NN�������-U��<U�jq]������	�l:�j�����f9c���v_�P3�Tr�q�m��/�9����>�Z�t�*���T^G��������r��"�j��e��9�Q����Z6�~u�N�>���k�w��s�W����q#77��l��cU���J�����i�.�9����J���+��0��J~>��O����K�����,��("w�[^i��Sg��&���LsO�Z������6�s����JY����u`C����S�fh����j��e�=W5U
�_����R��'�U%�~��?���U)���������U��S�(]9��K�u*<�J�v��f�G��l���d�MT����:��)'�����V�������Z��[�Km��/�(�����������D���������v�E �|�5c������^mU����G�����*���S�"��pvq=?B�]��UH�<J�F�}2����J�������k��K�0��=T�
^���<���wj��L5�����������W���W�T���)�������a6�I���V���E�v���b��B�W���ub�<mx���i��9�r�Yd;�I���M��O���_��,J_������f��\�����=�kL��Z��A]��f�������v�y�����
�����u������T��*_�1g�������gW�������TFg.\Jp^���o���
�sk���z����O��������by{
�ysI�UU��V��Q�o�~��/h����XXc��������{�h�P
h��^�\ISr�T��uT��hYc��o�j
��K��sWS�UT��:r1�:N���"k����U��e�WC�_[
�����F�C����{^�F���
si�yOe���>�WB{����M���W������Jk�s#�k�-�����o^}�/P�r���2�7g�����T����e����&��?�@r��3��ai�������nN���Z]~���������s
���������^�Q�m�����G��M�/�n]W
5��^�O4
�w��ec��p�Y���'�4����G�)-v��S�xc������u�Gqj�u����=�v�WV������o����������VH=�]|�\��o��;��8.^�T����.#��6�������+z���O�L�u1g��W�{��1nq�l�Z�3��e���f�:~@�d��W�g�n�����(;L[��}9gq|�r���5���������5F������\�#�K
�0.5��/$j�+*��]������Z��/
x���lK�Z�i��c�o���w��.��5���x
�"��Hx^��R���n���@��JJ�..o�'o�F����!��F����)x�
3�V�x�G��i�w�F:)��vm��Y��2:|�m�:��?	������#����J?o�^+��s�[*{�(M9<G��Z�lM=�!�����Y[�(�u7}�����Fo�( ��i������f��g&�����)���Fm|U���P�'N������~j2�k������Em�����^�Q��d
�;3�*�����o����;B;>��.���%oe�3@S&u���D�`���x�
�;��kc�@�I�%�����a3�V��YZ5*�����V(�~[sH��Z�>$B�k�k���X��B��/��G]&�������g���������#�2���S�f6Q�Q_�����^��.��U�w�f-�VgvU�G����U��gz���j�~�����^e�nd#���������S������z�:<vL3ZTT�qU��)���3����������m���J��Bk�R���+�uf��=�:���^�G�U�k�����K!�y�%um�k�[^��3�,��N��V�=�t���f�+'l����r''h�Q��������[��hy�b���8���s2����k[��j�3f�j��5sO%����$s����Z�O����Ke���S^o�����\�E���+��
�s<�6�����l�����}L�3�P�4�5�v�N���������l6%t�[:���<R���������8����h������)��=��]��W[u��y�I#�kS�S��r(�j0�\���������;��^�X�zj�����N/�*�sUS����eiZ+�Yy�ZU���Z����\e�r�}�����
�����$���:�Z%����W}���RZ��[�Km��/�(��[_���i~��.������=�]��)�W���W�F-8�UO�W>sk`�GI�(�OC�Z��_IyzZU1wM�uI����X���C��t6��Va]�����N��:�p�6<VI�d�����R����g���&M��7Y/>��~7��mQ��T���5���������Q]c��������f.yR�B;u>��Z��g�I�����-�U�znm�4T����2��r���8�^,�-��������*��E+k����.�=w5e_�A���#s���	z��M�����U��e�WC�_[
�����F�C��W��o�j
������[�������V�5}���<�~�c��Hd
��]3���W*X(���xyU����j|�2�<�:����?7Oa��2��������'�G��m��R�E����g�c�����
�u�UdQ��3��-	��XcM�h����`�k?���7�z�H��Zg�q�:�X�A�^���~K:�<���6U�o~��*��*�a����Z�_� �3p�+J�j�w�u����T5s��hD����7��]��pv��\����{au��;�6X�T����P�Je����vj���j��};��2s�x�y������\Rf�3^�U��T��5N�wn��C'u��J��T���cd����o�G��?����������k�~�&�Y�Sr7z���:
u�"��:������
y�e�\�M�^2���HF���}Z�����,�tI�����'�������V,qv5�s��vZ�wR��U�ugu��BO�Nu��z�{H���W*2�'UIuH+n�Xpp�l6[��,x�������h�N�rv	�9rD!�oG:�����k�����wu��Qg��I��N���Z��D�|���]N���8��C�]B<aa�����=Wu�����[HH����pvp��<W��[���?�b�l}���^u+�&���}�r��}�&����e}��m-��F����7-&t=pS7|+{�v)  @:��*���5��:�����s���.#I�\��T�����c��%�6�: M����:���s$W���}��~��]E\�{���?�t��U��������+&��}�}}}����U�U�ti�����s���\��(�PTu����^�1�cM�7�z;���`�����3`�3Z��j��e��������o�5������2	@e0x]����������U�U�Igu��7}T8��vOU�g����cU��\�b�W�9'�����mR���f��v��\���J����������+�%�����>�;_3=��J���]`����]G�����y{��s�+7������>=G?�.��/u1���^\�6mtv�s�+�M�&��,���1��TD�~�@{�N���?P/ck�b�3^Q���W@�X��S+�g�}<:�/��jS�-��D
*�Rj[�v���U?����1x�+���\��������tF�T�����d�xt�/���:*���M�6�.�[�/>>���T���a.xE���&�47K-���s�6�Wx�����Z���^Q;�w�AZ��sJcl%���/�R��y���IU���\��K
���]��Y�o��pf��~�����@U�4�fp)���{�{e���w�[z6zKp-�"�[���X��T�=��OKg���K��[O+���t�p�Kit��R�/�)���L�}+�g�f�H)��c�j�g���r�& 90�"�k��V��c
�j��}��J�0^���
��ml��R^j�%/K�"cY��:~�Ge9��b.xyUQ��C���y�	��7zj���:�x���2�Vp9����U���.�����k���%���������#*���#�@�����=���e�����������O��g-r��� y1������Z�p�������TS~��X�S(}�
��<B�E ��M5��[�F�������V�������'�y�M{C#d���a�}5d�������xd���Z���^��+���������,���U��!��W�:������m�������+�������o�l����"�����|�}��NFI��5W�=C�6�����#�\�����l/���&�s�z�yYMr���CE5�S^!�x����U������^�g~������o:�qn'�b6y�W��1?�Q��o�^�}��=��0POp�
���{��v^�������Vp*.��^`������9o'�p)���
���>u�J�u9>xy���[�k��>�e�Q:���#�����J�)����$��x�a/0�����0�F��^`����czo�xg����(�DS���Du*����H^vQ!��~���_��i���
|�#�\��r�9�:I������^���Z�J+w��r�j)�W��O�^���3wM��}�g��=���`E��� Z���/U�1������t��JcqvM���
����.]��6N���
+���*�us���`�l�x�\�`�����=p�v��)g���#G���v�a��.!�X�f�Np�Wuv	��t�}�t�i�e�K���.���+�^���uv	�����j��\�����"n�#�)�]\���78�M���S���j]#��|2�B���������*�������:&t=pS7|+{�v)  @:��*���5��:�����s���.#I�\��T�����c��%�6�: M����:���s$W���}��~��]E\�{���?�t��U��������+&lsv�����������t��*]�1���?���^����\
�?W���Uni�����������5<8��]�F�������jY���M�r�T�YcT����H^1�2����+�Mg 9"x�a/0�����0�F��^`�#x�a/0�����0�F��^`�#x�a/0�����0�F��^`�#x�a/0�����0�F��^`�#x�a/0�����0�F��^`�#x]�=�����i��h�h��~��%R:�0�����w�����svhIEwm���^�P�W�Q7gW �#x��.�.�P���a��?]W9g���h�<0���[��z.��g�:�r�N>�M��@��������g����E-����z$�"���nzzD���������-,��o��x�������p�S�N9��x�9���#��tv	I��5ku��k��:x���K����{�COk-�^�\�t���\��'�t��K�',��V�����.\� gq���	O��2�"\���I�:6�����a�?VP���+F```�i1�+((��+���[�@���H�agWW��Y����H����f�pvIB�*��b��]F�/����iz�����;�w�#�����c?�;��*�b�K�t���#��".?��.��]1a��+��������]E\�K�V���?G���E����B/8����m���E��M�K�����\�W��>���&�\��Y���C��E����uH�^`�#x�a/0�����0�F��^`�#x�a/0�����0�F��^`�#x�a/0�����0�F��^`�#x�a/0�����0�F��^`�#x�a/0�����0�F��^�E���m�i����Y��Y�]�d���zH��n�/TWq���]
�d��uE
=>h�Z����^���-�����������0��=��f�7}�������{�6���S�.!�#G�(���H'�"�]B��v�Z�>�Z��8���9�������Z���(�/]r�7W���	=���	;��.����.���E�"$$DG�S8��W{opi�����������n{��o�a��#H��]E\Y�fUV��#:~N����e$	��T��Urvq,��D��Z��]p��O����u���������������}/q�����8�����R���w��m�� ___{zvvq�.]Z�k;f���~8�#x���MW����2��(���<)�������X=��9�<I����������2$S/0�����0�F��^`�#x�a/0�����0�F��^`�#x�a/0�����0�F��^`�#x�a/0�����0�F��^`�#x�a/0�����0�F��^`�#x�a�+�:��Mu�����(S�.?c��gqsva������l���L���K�U��apC=����|�R�-��@RG��;�j�V��I�����*��M��}���[���������v�W�|�����-K�����_�GKE��[�$��%�.^�$��>�qU��G)��u�����H&^���2U
]:�1�f]�R(�o�x�f��,X��U�*��&�
>p;�t��S+�TtvqD{��������6Mo�Zcc��:��x.��W��5~U3G�����y���{��]FVwO]b�K�`s�:��8������\�w�����������V/y*��e�q��(���7}K�^rW��yuq�n���Pf{�����]��f����{uL�


zX�������K�����K������s��|k4W�~#4���5��U?���v������@r@����7�����c�"J����������7n���u�EkV����]�d��������|R��%m�_���%m�_���%m�_�G�z@
4pv	�O�]���%]�]���%]�]���%}/0�����0�F�zD���m�i����Y���|A�UG��N}>���e��E�gU�,nw��{����-O^��#���8l,3����������������h�J4��?P�)�]+��p=���zH��n�/TWq��nL����>����)���6n�g�����Z*�d��6O����������1�82^���1s��K���X�NQ��[Tc�-���MC�R����>�CVv}��WD�J�R��A�����v���Us��XwM��^���J/vS�b�k�����y��������������G��aq�X2f�v�c�x� ����a
5��A��*�t]���W��E�J�WD�J��2�h	�/���v�W�|����__-i�*��~�:�b	����~�=L�i���U�/�s���x=4K����y,/W������v�Y��\�����2GHIC��!�����H����K�N��W�X|��;\.Zo;��9��3$~������I�$��#��L�7���+��X2^�������g����E-�,���I�L�B�����a��~Pp)�����v^�4]����Y|y�2����x9���%c���4�������/����7o�����$�#x=����`^]\�[�l5��~,`;�C�.�S��^���_��I_�J����x�2����x9���%c���4���k�����zz���-�XAY'f�Q�5��N����^�����P;�TM����_����'~zL[0����x9���%c�����z��0���_<���(�W2g;6]
����������P����"��x�:v/���J��<�O>i|����M���{����j�a�#��p8r,3�������)t��Z�w������'{�����5���n�.V�l�������Y�t��s]��m��������v���u:�a�#�s�X�y�o,�Z,
���������!�b^`�#x�a/0�����0�F��^`�#x�a/0�����0�F��^`�#x������)��-��Sn1#�k@��*��u��<P������*���vJ����CS�4���
����j�`5�`=�e�T������
���g�������*���*�����Y#C���3������6�L�28(�8��f����z.n����h��q
�v��������4t�Xm�ym��iOWU���5�>��.����-��!+����Jk^����;������R)BK����jv��Td��:�S���@���U`�V��[[���ks��y�+��d;�V���Rk����i��CU���~z���ZzF^��e��Q��tS�����{R��k��>��W�1�X�j�]5d�E�z\Td�N�8��Jz��g}�����������+��b�������C���w��v_���_��Z1g�"vhf����C��>/�2�h�����:����T�5��|�U��h�Wg�9����:�D�L��j���\E�����	��R�\�����P�<����~�8UI#��T�K��R�aZw�G���>�'/�VJg�$K/p5V7|a�k��������x����{�Tn����@s���3�k�O-�^X���V�N��,��)�z��e:��Jh���T��X�������NY�������z
.����U���j�����K��uu�������;>n_�����Gm����������P]�k/pwW���*;w������z��?�_���](7H?|T[YlkD�����i��w������]n����M���\z��6�h�6u�=���xeUo[[�+���Y�j�^MwAs��������qZT|���_Y��[��'Gt�*�rs��]/pA���������=��R�^�Q��J����J��*Wq_��2�Ux��t5xy����c���U*�}kvh��
:��f=[��+�X�K�~�]��|�*��>�%�����w����r_yI���
�������R�<KTS��1���D�����.�����U�V�����k�����9{��Nh�wc��;���G�
z�J����W��]R�ZO+w�.
<�TA����Se't��.��4O��7>��W>��n7��f�Zc��k�En�?�u����������R���-��]Y[M������1D���='���.��v���%�����mq�yn���5`G���Z����������$b�w�677�Z�g��pkm]�T��P��j��_���>��\�%�ZA3�O��*3)��R���){�J�S�~?+[�{k6j�/Zw�����-w)�"*P��.�6_[/�S���2�-m�0T]l�]y+������`t�u���v�D��w|S���ZkC�������(l_������/w>ep�.�9G?n����h��,QQ���47��Cm	53u���hU���[��*�Dv�S|�BBm*��unnH�^����t��mg��������v����Z7���r*C����!�8� �UnSjK�f�~������	��(U�!�����Q���RD*:w{�k�#mI���O�������������iR�l��$����G��6�'����w'M|/�}y�
���b�;�������z�O!�������/rTR��j70�.g�gZ��|4����R����{�]�
���=k�c7?�����a���p����/h���S�T����������w�f��1��������~������\{��2C���������ZsF�y{��q������z��eQ���j�PSi�������lIW]>���a�;��?:�����z��OM��������������V��j���=��:v�_���>_�#���!^`�`����t�IgW����0�F��^`�#x�a/0�����0�F��^`�#x�a/0�������.�����pIEND�B`�
#16David Rowley
dgrowleyml@gmail.com
In reply to: Tatsuo Ishii (#15)
1 attachment(s)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Fri, 6 Sept 2024 at 16:21, Tatsuo Ishii <ishii@postgresql.org> wrote:

However, for 10, 2, 1 partitions. I see large performance
degradation with the patched version: patched is slower than stock
master in 1.5% (10 partitions), 41% (2 partitions) and 55.7% (1
partition). See the attached graph.

Thanks for making the adjustments to this.

I don't think there is any need to call tuplestore_updatemax() from
within writetup_heap(). That means having to update the maximum space
used every time a tuple is written to disk. That's a fairly massive
overhead.

Instead, it should be fine to modify tuplestore_updatemax() to set a
flag to true if state->status != TSS_INMEM and then record the disk
space used. That flag won't ever be set to false again.
tuplestore_storage_type_name() should just return "Disk" if the new
disk flag is set, even if state->status == TSS_INMEM. Since the
work_mem size won't change between tuplestore_clear() calls, if we've
once spilt to disk, then we shouldn't care about the memory used for
runs that didn't. Those will always have used less memory.

I did this quickly, but playing around with the attached, I didn't see
any slowdown.

Here's the results I got on my Zen2 AMD machine:

parts master yours mine mine_v_master
10000 5.01 5.12 5.09 99%
1000 4.30 4.25 4.24 101%
100 4.17 4.13 4.12 101%
10 4.16 4.12 4.10 101%
2 4.75 7.64 4.73 100%
1 4.75 8.57 4.73 100%

David

Attachments:

tuplestore_track_max_disk_space_used.patch.txttext/plain; charset=US-ASCII; name=tuplestore_track_max_disk_space_used.patch.txtDownload
diff --git a/src/backend/utils/sort/tuplestore.c b/src/backend/utils/sort/tuplestore.c
index 444c8e25c2..03d745c609 100644
--- a/src/backend/utils/sort/tuplestore.c
+++ b/src/backend/utils/sort/tuplestore.c
@@ -107,9 +107,10 @@ struct Tuplestorestate
 	bool		backward;		/* store extra length words in file? */
 	bool		interXact;		/* keep open through transactions? */
 	bool		truncated;		/* tuplestore_trim has removed tuples? */
+	bool		usedDisk;		/* true if tuple store has ever gone to disk */
 	int64		availMem;		/* remaining memory available, in bytes */
 	int64		allowedMem;		/* total memory allowed, in bytes */
-	int64		maxSpace;		/* maximum space used in memory */
+	int64		maxSpace;		/* maximum space used in memory or disk */
 	int64		tuples;			/* number of tuples added */
 	BufFile    *myfile;			/* underlying file, or NULL if none */
 	MemoryContext context;		/* memory context for holding tuples */
@@ -262,6 +263,7 @@ tuplestore_begin_common(int eflags, bool interXact, int maxKBytes)
 	state->eflags = eflags;
 	state->interXact = interXact;
 	state->truncated = false;
+	state->usedDisk = false;
 	state->allowedMem = maxKBytes * 1024L;
 	state->availMem = state->allowedMem;
 	state->maxSpace = 0;
@@ -1499,17 +1501,25 @@ tuplestore_updatemax(Tuplestorestate *state)
 	if (state->status == TSS_INMEM)
 		state->maxSpace = Max(state->maxSpace,
 							  state->allowedMem - state->availMem);
+	else
+	{
+		state->maxSpace = Max(state->maxSpace,
+							  BufFileSize(state->myfile));
+		state->usedDisk = true;
+	}
 }
 
 /*
  * tuplestore_storage_type_name
  *		Return a string description of the storage method used to store the
- *		tuples.
+ *		tuples.  We don't use the current status as if the tuplestore went to
+ *		disk and a tuplestore_clear() allowed it to switch back to memory
+ *		again, we still want to know that it has spilled to disk.
  */
 const char *
 tuplestore_storage_type_name(Tuplestorestate *state)
 {
-	if (state->status == TSS_INMEM)
+	if (!state->usedDisk)
 		return "Memory";
 	else
 		return "Disk";
@@ -1517,8 +1527,7 @@ tuplestore_storage_type_name(Tuplestorestate *state)
 
 /*
  * tuplestore_space_used
- *		Return the maximum space used in memory unless the tuplestore has spilled
- *		to disk, in which case, return the disk space used.
+ *		Return the maximum space used in memory or disk.
  */
 int64
 tuplestore_space_used(Tuplestorestate *state)
@@ -1526,10 +1535,7 @@ tuplestore_space_used(Tuplestorestate *state)
 	/* First, update the maxSpace field */
 	tuplestore_updatemax(state);
 
-	if (state->status == TSS_INMEM)
-		return state->maxSpace;
-	else
-		return BufFileSize(state->myfile);
+	return state->maxSpace;
 }
 
 /*
@@ -1601,7 +1607,6 @@ writetup_heap(Tuplestorestate *state, void *tup)
 	if (state->backward)		/* need trailing length word? */
 		BufFileWrite(state->myfile, &tuplen, sizeof(tuplen));
 
-	/* no need to call tuplestore_updatemax() when not in TSS_INMEM */
 	FREEMEM(state, GetMemoryChunkSpace(tuple));
 	heap_free_minimal_tuple(tuple);
 }
#17Tatsuo Ishii
ishii@postgresql.org
In reply to: David Rowley (#16)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

Thanks for making the adjustments to this.

I don't think there is any need to call tuplestore_updatemax() from
within writetup_heap(). That means having to update the maximum space
used every time a tuple is written to disk. That's a fairly massive
overhead.

Instead, it should be fine to modify tuplestore_updatemax() to set a
flag to true if state->status != TSS_INMEM and then record the disk
space used. That flag won't ever be set to false again.
tuplestore_storage_type_name() should just return "Disk" if the new
disk flag is set, even if state->status == TSS_INMEM. Since the
work_mem size won't change between tuplestore_clear() calls, if we've
once spilt to disk, then we shouldn't care about the memory used for
runs that didn't. Those will always have used less memory.

I did this quickly, but playing around with the attached, I didn't see
any slowdown.

Your patch looks good to me and I confirmed that with your patch I
didn't see any slowdown either. Thanks!

Best reagards,
--
Tatsuo Ishii
SRA OSS K.K.
English: http://www.sraoss.co.jp/index_en/
Japanese:http://www.sraoss.co.jp

#18Ashutosh Bapat
ashutosh.bapat.oss@gmail.com
In reply to: Tatsuo Ishii (#17)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Fri, Sep 6, 2024 at 11:32 AM Tatsuo Ishii <ishii@postgresql.org> wrote:

Thanks for making the adjustments to this.

I don't think there is any need to call tuplestore_updatemax() from
within writetup_heap(). That means having to update the maximum space
used every time a tuple is written to disk. That's a fairly massive
overhead.

Instead, it should be fine to modify tuplestore_updatemax() to set a
flag to true if state->status != TSS_INMEM and then record the disk
space used. That flag won't ever be set to false again.
tuplestore_storage_type_name() should just return "Disk" if the new
disk flag is set, even if state->status == TSS_INMEM. Since the
work_mem size won't change between tuplestore_clear() calls, if we've
once spilt to disk, then we shouldn't care about the memory used for
runs that didn't. Those will always have used less memory.

I did this quickly, but playing around with the attached, I didn't see
any slowdown.

Your patch looks good to me and I confirmed that with your patch I
didn't see any slowdown either. Thanks!

The changes look better. A nitpick though. With their definitions
changed, I think it's better to change the names of the functions
since their purpose has changed. Right now they report the storage
type and size used, respectively, at the time of calling the function.
With this patch, they report maximum space ever used and the storage
corresponding to the maximum space. tuplestore_space_used() may be
changed to tuplestore_maxspace_used(). I am having difficulty with
tuplestore_storage_type_name(); tuplestore_largest_storage_type_name()
seems mouthful and yet not doing justice to the functionality. It
might be better to just have one funciton tuplestore_maxspace_used()
which returns both the maximum space used as well as the storage type
when maximum space was used.

The comments need a bit of grammar fixes, but that can be done when
finalizing the patches.

--
Best Wishes,
Ashutosh Bapat

#19Tatsuo Ishii
ishii@postgresql.org
In reply to: Ashutosh Bapat (#18)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

The changes look better. A nitpick though. With their definitions
changed, I think it's better to change the names of the functions
since their purpose has changed. Right now they report the storage
type and size used, respectively, at the time of calling the function.
With this patch, they report maximum space ever used and the storage
corresponding to the maximum space. tuplestore_space_used() may be
changed to tuplestore_maxspace_used(). I am having difficulty with
tuplestore_storage_type_name(); tuplestore_largest_storage_type_name()
seems mouthful and yet not doing justice to the functionality. It
might be better to just have one funciton tuplestore_maxspace_used()
which returns both the maximum space used as well as the storage type
when maximum space was used.

+1. Returning the storage type by the same function, not by a separate
function looks more natural.

Best reagards,
--
Tatsuo Ishii
SRA OSS K.K.
English: http://www.sraoss.co.jp/index_en/
Japanese:http://www.sraoss.co.jp

#20David Rowley
dgrowleyml@gmail.com
In reply to: Ashutosh Bapat (#18)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Fri, 6 Sept 2024 at 19:08, Ashutosh Bapat
<ashutosh.bapat.oss@gmail.com> wrote:

The changes look better. A nitpick though. With their definitions
changed, I think it's better to change the names of the functions
since their purpose has changed. Right now they report the storage
type and size used, respectively, at the time of calling the function.
With this patch, they report maximum space ever used and the storage
corresponding to the maximum space. tuplestore_space_used() may be
changed to tuplestore_maxspace_used(). I am having difficulty with
tuplestore_storage_type_name(); tuplestore_largest_storage_type_name()
seems mouthful and yet not doing justice to the functionality. It
might be better to just have one funciton tuplestore_maxspace_used()
which returns both the maximum space used as well as the storage type
when maximum space was used.

How about just removing tuplestore_storage_type_name() and
tuplestore_space_used() and adding tuplestore_get_stats(). I did take
some inspiration from tuplesort.c for this, so maybe we can defer back
there for further guidance. I'm not so sure it's worth having a stats
struct type like tuplesort.c has. All we need is a char ** and an
int64 * output parameter to pass to the stats function. I don't think
we need to copy the tuplesort_method_name(). It seems fine just to
point the output parameter of the stats function at the statically
allocated constant.

David

#21Ashutosh Bapat
ashutosh.bapat.oss@gmail.com
In reply to: David Rowley (#20)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Fri, Sep 6, 2024 at 7:21 PM David Rowley <dgrowleyml@gmail.com> wrote:

On Fri, 6 Sept 2024 at 19:08, Ashutosh Bapat
<ashutosh.bapat.oss@gmail.com> wrote:

The changes look better. A nitpick though. With their definitions
changed, I think it's better to change the names of the functions
since their purpose has changed. Right now they report the storage
type and size used, respectively, at the time of calling the function.
With this patch, they report maximum space ever used and the storage
corresponding to the maximum space. tuplestore_space_used() may be
changed to tuplestore_maxspace_used(). I am having difficulty with
tuplestore_storage_type_name(); tuplestore_largest_storage_type_name()
seems mouthful and yet not doing justice to the functionality. It
might be better to just have one funciton tuplestore_maxspace_used()
which returns both the maximum space used as well as the storage type
when maximum space was used.

How about just removing tuplestore_storage_type_name() and
tuplestore_space_used() and adding tuplestore_get_stats(). I did take
some inspiration from tuplesort.c for this, so maybe we can defer back
there for further guidance. I'm not so sure it's worth having a stats
struct type like tuplesort.c has. All we need is a char ** and an
int64 * output parameter to pass to the stats function. I don't think
we need to copy the tuplesort_method_name(). It seems fine just to
point the output parameter of the stats function at the statically
allocated constant.

tuplestore_get_stats() similar to tuplesort_get_stats() looks fine. In
future the stats reported by this function might expand e.g. maximum
number of readers may be included in the stats. If it expands beyond
two values, we could think of a separate structure, but for now it
looks fine given its limited use. A comment explaining why we aren't
using a stats structure and some guidance on when that would be
appropriate will be better.

--
Best Wishes,
Ashutosh Bapat

#22Tatsuo Ishii
ishii@postgresql.org
In reply to: David Rowley (#20)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Fri, 6 Sept 2024 at 19:08, Ashutosh Bapat
<ashutosh.bapat.oss@gmail.com> wrote:

The changes look better. A nitpick though. With their definitions
changed, I think it's better to change the names of the functions
since their purpose has changed. Right now they report the storage
type and size used, respectively, at the time of calling the function.
With this patch, they report maximum space ever used and the storage
corresponding to the maximum space. tuplestore_space_used() may be
changed to tuplestore_maxspace_used(). I am having difficulty with
tuplestore_storage_type_name(); tuplestore_largest_storage_type_name()
seems mouthful and yet not doing justice to the functionality. It
might be better to just have one funciton tuplestore_maxspace_used()
which returns both the maximum space used as well as the storage type
when maximum space was used.

How about just removing tuplestore_storage_type_name() and
tuplestore_space_used() and adding tuplestore_get_stats(). I did take
some inspiration from tuplesort.c for this, so maybe we can defer back
there for further guidance. I'm not so sure it's worth having a stats
struct type like tuplesort.c has. All we need is a char ** and an
int64 * output parameter to pass to the stats function. I don't think
we need to copy the tuplesort_method_name(). It seems fine just to
point the output parameter of the stats function at the statically
allocated constant.

Are you going to push the changes to tuplestore.c anytime soon? I
would like to rebase my patch[1]/messages/by-id/CAApHDvoY8cibGcicLV0fNh=9JVx9PANcWvhkdjBnDCc9Quqytg@mail.gmail.com -- Tatsuo Ishii SRA OSS K.K. English: http://www.sraoss.co.jp/index_en/ Japanese:http://www.sraoss.co.jp but the patch could be affected by
the tuplestore API change.

Best reagards,

[1]: /messages/by-id/CAApHDvoY8cibGcicLV0fNh=9JVx9PANcWvhkdjBnDCc9Quqytg@mail.gmail.com -- Tatsuo Ishii SRA OSS K.K. English: http://www.sraoss.co.jp/index_en/ Japanese:http://www.sraoss.co.jp
--
Tatsuo Ishii
SRA OSS K.K.
English: http://www.sraoss.co.jp/index_en/
Japanese:http://www.sraoss.co.jp

#23David Rowley
dgrowleyml@gmail.com
In reply to: Tatsuo Ishii (#22)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Thu, 12 Sept 2024 at 14:04, Tatsuo Ishii <ishii@postgresql.org> wrote:

Are you going to push the changes to tuplestore.c anytime soon? I
would like to rebase my patch[1] but the patch could be affected by
the tuplestore API change.

Ok, I'll look at that. I had thought you were taking care of writing the patch.

David

#24Tatsuo Ishii
ishii@postgresql.org
In reply to: David Rowley (#23)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Thu, 12 Sept 2024 at 14:04, Tatsuo Ishii <ishii@postgresql.org> wrote:

Are you going to push the changes to tuplestore.c anytime soon? I
would like to rebase my patch[1] but the patch could be affected by
the tuplestore API change.

Ok, I'll look at that.

Thanks.

I had thought you were taking care of writing the patch.

Sorry, I should have asked you first if you are going to write the API
change patch.
--
Tatsuo Ishii
SRA OSS K.K.
English: http://www.sraoss.co.jp/index_en/
Japanese:http://www.sraoss.co.jp

#25David Rowley
dgrowleyml@gmail.com
In reply to: Tatsuo Ishii (#24)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Thu, 12 Sept 2024 at 14:42, Tatsuo Ishii <ishii@postgresql.org> wrote:

Sorry, I should have asked you first if you are going to write the API
change patch.

I pushed a patch to change the API.

David

#26Tatsuo Ishii
ishii@postgresql.org
In reply to: David Rowley (#25)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

I pushed a patch to change the API.

Thank you!
--
Tatsuo Ishii
SRA OSS K.K.
English: http://www.sraoss.co.jp/index_en/
Japanese:http://www.sraoss.co.jp

#27Tatsuo Ishii
ishii@postgresql.org
In reply to: Tatsuo Ishii (#26)
1 attachment(s)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

Here is the v3 patch. This time I only include patch for the Window
Aggregate node. Patches for other node types will come after this
patch getting committed or come close to commitable state.

David,
In this patch I refactored show_material_info. I divided it into
show_material_info and show_storage_info so that the latter can be
used by other node types including window aggregate node. What do you
think?

I also added a test case in explain.sql per discussion with Maxim
Orlov.

Best reagards,
--
Tatsuo Ishii
SRA OSS K.K.
English: http://www.sraoss.co.jp/index_en/
Japanese:http://www.sraoss.co.jp

Attachments:

v3-0001-Add-memory-disk-usage-for-Window-aggregate-nodes-.patchtext/x-patch; charset=us-asciiDownload
From 74ab7724bfdb49dc111eca5d96abed9cd29abdc7 Mon Sep 17 00:00:00 2001
From: Tatsuo Ishii <ishii@postgresql.org>
Date: Thu, 12 Sep 2024 16:15:43 +0900
Subject: [PATCH v3] Add memory/disk usage for Window aggregate nodes in
 EXPLAIN.

This commit is similar to 1eff8279d and expands the idea to Window
aggregate nodes so that users can know how much memory or disk the
tuplestore used.

This commit uses newly introduced tuplestore_get_stats() to query this
information and add some additional output in EXPLAIN ANALYZE to
display the information for the Window aggregate node.
---
 src/backend/commands/explain.c        | 31 +++++++++++++++++++++++----
 src/test/regress/expected/explain.out | 11 ++++++++++
 src/test/regress/sql/explain.sql      |  3 +++
 3 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 2819e479f8..ce6792be8a 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -126,7 +126,9 @@ static void show_sort_info(SortState *sortstate, ExplainState *es);
 static void show_incremental_sort_info(IncrementalSortState *incrsortstate,
 									   ExplainState *es);
 static void show_hash_info(HashState *hashstate, ExplainState *es);
+static void show_storage_info(Tuplestorestate *tupstore, ExplainState *es);
 static void show_material_info(MaterialState *mstate, ExplainState *es);
+static void show_windowagg_info(WindowAggState *winstate, ExplainState *es);
 static void show_memoize_info(MemoizeState *mstate, List *ancestors,
 							  ExplainState *es);
 static void show_hashagg_info(AggState *aggstate, ExplainState *es);
@@ -2231,6 +2233,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 										   planstate, es);
 			show_upper_qual(((WindowAgg *) plan)->runConditionOrig,
 							"Run Condition", planstate, ancestors, es);
+			show_windowagg_info(castNode(WindowAggState, planstate), es);
 			break;
 		case T_Group:
 			show_group_keys(castNode(GroupState, planstate), ancestors, es);
@@ -3343,13 +3346,11 @@ show_hash_info(HashState *hashstate, ExplainState *es)
 }
 
 /*
- * Show information on material node, storage method and maximum memory/disk
- * space used.
+ * Show information on storage method and maximum memory/disk space used.
  */
 static void
-show_material_info(MaterialState *mstate, ExplainState *es)
+show_storage_info(Tuplestorestate *tupstore, ExplainState *es)
 {
-	Tuplestorestate *tupstore = mstate->tuplestorestate;
 	char	   *maxStorageType;
 	int64		maxSpaceUsed,
 				maxSpaceUsedKB;
@@ -3379,6 +3380,28 @@ show_material_info(MaterialState *mstate, ExplainState *es)
 	}
 }
 
+/*
+ * Show information on material node, storage method and maximum memory/disk
+ * space used.
+ */
+static void
+show_material_info(MaterialState *mstate, ExplainState *es)
+{
+	Tuplestorestate *tupstore = mstate->tuplestorestate;
+
+	show_storage_info(tupstore, es);
+}
+
+/*
+ * Show information on WindowAgg node, storage method and maximum memory/disk
+ * space used.
+ */
+static void
+show_windowagg_info(WindowAggState *winstate, ExplainState *es)
+{
+	show_storage_info(winstate->buffer, es);
+}
+
 /*
  * Show information on memoize hits/misses/evictions and memory usage.
  */
diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out
index 6585c6a69e..d27fbdfebb 100644
--- a/src/test/regress/expected/explain.out
+++ b/src/test/regress/expected/explain.out
@@ -691,3 +691,14 @@ select explain_filter('explain (analyze,serialize) create temp table explain_tem
  Execution Time: N.N ms
 (4 rows)
 
+-- Test tuplestore storage usage in Window aggregate
+select explain_filter('explain (analyze) select sum(n) over() from generate_series(1,100) a(n)');
+                                                 explain_filter                                                 
+----------------------------------------------------------------------------------------------------------------
+ WindowAgg  (cost=N.N..N.N rows=N width=N) (actual time=N.N..N.N rows=N loops=N)
+   Storage: Memory  Maximum Storage: NkB
+   ->  Function Scan on generate_series a  (cost=N.N..N.N rows=N width=N) (actual time=N.N..N.N rows=N loops=N)
+ Planning Time: N.N ms
+ Execution Time: N.N ms
+(5 rows)
+
diff --git a/src/test/regress/sql/explain.sql b/src/test/regress/sql/explain.sql
index c7055f850c..50eaf554eb 100644
--- a/src/test/regress/sql/explain.sql
+++ b/src/test/regress/sql/explain.sql
@@ -169,3 +169,6 @@ select explain_filter('explain (analyze,serialize text,buffers,timing off) selec
 select explain_filter('explain (analyze,serialize binary,buffers,timing) select * from int8_tbl i8');
 -- this tests an edge case where we have no data to return
 select explain_filter('explain (analyze,serialize) create temp table explain_temp as select * from int8_tbl i8');
+
+-- Test tuplestore storage usage in Window aggregate
+select explain_filter('explain (analyze) select sum(n) over() from generate_series(1,100) a(n)');
-- 
2.25.1

#28David Rowley
dgrowleyml@gmail.com
In reply to: Tatsuo Ishii (#27)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Fri, 13 Sept 2024 at 00:12, Tatsuo Ishii <ishii@postgresql.org> wrote:

In this patch I refactored show_material_info. I divided it into
show_material_info and show_storage_info so that the latter can be
used by other node types including window aggregate node. What do you
think?

Yes, I think it's a good idea to move that into a helper function. If
you do the other node types, without that helper the could would have
to be repeated quite a few times. Maybe show_storage_info() can be
moved up with the other helper functions, say below
show_sortorder_options() ? It might be a good idea to keep the "if
(!es->analyze || tupstore == NULL)" checks in the calling function
rather than the helper too.

I thought about the location of the test for a while and read the
"This file is concerned with testing EXPLAIN in its own right."
comment at the top of that explain.out. I was trying to decide if
testing output of a specific node type met this or not. I can't pick
out any other tests there which are specific to a node type, so I'm
unsure if this is the location for it or not. However, to put it
anywhere else means having to add a plpgsql function to mask out the
unstable parts of EXPLAIN, so maybe the location is good as it saves
from having to do that. I'm 50/50 on this, so I'm happy to let you
decide. You could also shrink that 100 rows into a smaller number for
the generate_series without losing any coverage.

Aside from that, I think the patch is good. Thanks for working on it.

David

#29Tatsuo Ishii
ishii@postgresql.org
In reply to: David Rowley (#28)
1 attachment(s)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

David,

Thank you for your review.

Yes, I think it's a good idea to move that into a helper function. If
you do the other node types, without that helper the could would have
to be repeated quite a few times. Maybe show_storage_info() can be
moved up with the other helper functions, say below
show_sortorder_options() ?

Yeah, that makes sense. Looks less random.

It might be a good idea to keep the "if
(!es->analyze || tupstore == NULL)" checks in the calling function
rather than the helper too.

I agree with this. This kind of check should be done in the calling
function.

I thought about the location of the test for a while and read the
"This file is concerned with testing EXPLAIN in its own right."
comment at the top of that explain.out. I was trying to decide if
testing output of a specific node type met this or not. I can't pick
out any other tests there which are specific to a node type, so I'm
unsure if this is the location for it or not. However, to put it
anywhere else means having to add a plpgsql function to mask out the
unstable parts of EXPLAIN, so maybe the location is good as it saves
from having to do that. I'm 50/50 on this, so I'm happy to let you
decide.

Yeah. Maybe we should move the function to elsewhere so that it can be
shared by other tests. However in this case it's purpose is testing an
additional output in an explain command. I think this is not far from
"This file is concerned with testing EXPLAIN in its own right.". So I
would like to keep the test in explain.sql.

You could also shrink that 100 rows into a smaller number for
the generate_series without losing any coverage.

Right. I will make the change.

Aside from that, I think the patch is good. Thanks for working on it.

Thanks. Attached is the v4 patch. I am going push it if there's no
objection.

After this, I will work on remaining node types.

Best reagards,
--
Tatsuo Ishii
SRA OSS K.K.
English: http://www.sraoss.co.jp/index_en/
Japanese:http://www.sraoss.co.jp

Attachments:

v4-0001-Add-memory-disk-usage-for-Window-aggregate-nodes-.patchtext/x-patch; charset=us-asciiDownload
From e71e697f4ec399bc19af6f1aeac614185acc38c7 Mon Sep 17 00:00:00 2001
From: Tatsuo Ishii <ishii@postgresql.org>
Date: Fri, 13 Sep 2024 14:51:45 +0900
Subject: [PATCH v4] Add memory/disk usage for Window aggregate nodes in 
 EXPLAIN.

This commit is similar to 1eff8279d and expands the idea to Window
aggregate nodes so that users can know how much memory or disk the
tuplestore used.

This commit uses newly introduced tuplestore_get_stats() to query this
information and add some additional output in EXPLAIN ANALYZE to
display the information for the Window aggregate node.
---
 src/backend/commands/explain.c        | 68 ++++++++++++++++++++-------
 src/test/regress/expected/explain.out | 11 +++++
 src/test/regress/sql/explain.sql      |  3 ++
 3 files changed, 64 insertions(+), 18 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 2819e479f8..aaec439892 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -120,6 +120,7 @@ static void show_sort_group_keys(PlanState *planstate, const char *qlabel,
 								 List *ancestors, ExplainState *es);
 static void show_sortorder_options(StringInfo buf, Node *sortexpr,
 								   Oid sortOperator, Oid collation, bool nullsFirst);
+static void show_storage_info(Tuplestorestate *tupstore, ExplainState *es);
 static void show_tablesample(TableSampleClause *tsc, PlanState *planstate,
 							 List *ancestors, ExplainState *es);
 static void show_sort_info(SortState *sortstate, ExplainState *es);
@@ -127,6 +128,7 @@ static void show_incremental_sort_info(IncrementalSortState *incrsortstate,
 									   ExplainState *es);
 static void show_hash_info(HashState *hashstate, ExplainState *es);
 static void show_material_info(MaterialState *mstate, ExplainState *es);
+static void show_windowagg_info(WindowAggState *winstate, ExplainState *es);
 static void show_memoize_info(MemoizeState *mstate, List *ancestors,
 							  ExplainState *es);
 static void show_hashagg_info(AggState *aggstate, ExplainState *es);
@@ -2231,6 +2233,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 										   planstate, es);
 			show_upper_qual(((WindowAgg *) plan)->runConditionOrig,
 							"Run Condition", planstate, ancestors, es);
+			show_windowagg_info(castNode(WindowAggState, planstate), es);
 			break;
 		case T_Group:
 			show_group_keys(castNode(GroupState, planstate), ancestors, es);
@@ -2894,6 +2897,34 @@ show_sortorder_options(StringInfo buf, Node *sortexpr,
 	}
 }
 
+/*
+ * Show information on storage method and maximum memory/disk space used.
+ */
+static void
+show_storage_info(Tuplestorestate *tupstore, ExplainState *es)
+{
+	char	   *maxStorageType;
+	int64		maxSpaceUsed,
+				maxSpaceUsedKB;
+
+	tuplestore_get_stats(tupstore, &maxStorageType, &maxSpaceUsed);
+	maxSpaceUsedKB = BYTES_TO_KILOBYTES(maxSpaceUsed);
+
+	if (es->format != EXPLAIN_FORMAT_TEXT)
+	{
+		ExplainPropertyText("Storage", maxStorageType, es);
+		ExplainPropertyInteger("Maximum Storage", "kB", maxSpaceUsedKB, es);
+	}
+	else
+	{
+		ExplainIndentText(es);
+		appendStringInfo(es->str,
+						 "Storage: %s  Maximum Storage: " INT64_FORMAT "kB\n",
+						 maxStorageType,
+						 maxSpaceUsedKB);
+	}
+}
+
 /*
  * Show TABLESAMPLE properties
  */
@@ -3350,9 +3381,6 @@ static void
 show_material_info(MaterialState *mstate, ExplainState *es)
 {
 	Tuplestorestate *tupstore = mstate->tuplestorestate;
-	char	   *maxStorageType;
-	int64		maxSpaceUsed,
-				maxSpaceUsedKB;
 
 	/*
 	 * Nothing to show if ANALYZE option wasn't used or if execution didn't
@@ -3361,22 +3389,26 @@ show_material_info(MaterialState *mstate, ExplainState *es)
 	if (!es->analyze || tupstore == NULL)
 		return;
 
-	tuplestore_get_stats(tupstore, &maxStorageType, &maxSpaceUsed);
-	maxSpaceUsedKB = BYTES_TO_KILOBYTES(maxSpaceUsed);
+	show_storage_info(tupstore, es);
+}
 
-	if (es->format != EXPLAIN_FORMAT_TEXT)
-	{
-		ExplainPropertyText("Storage", maxStorageType, es);
-		ExplainPropertyInteger("Maximum Storage", "kB", maxSpaceUsedKB, es);
-	}
-	else
-	{
-		ExplainIndentText(es);
-		appendStringInfo(es->str,
-						 "Storage: %s  Maximum Storage: " INT64_FORMAT "kB\n",
-						 maxStorageType,
-						 maxSpaceUsedKB);
-	}
+/*
+ * Show information on WindowAgg node, storage method and maximum memory/disk
+ * space used.
+ */
+static void
+show_windowagg_info(WindowAggState *winstate, ExplainState *es)
+{
+	Tuplestorestate *tupstore = winstate->buffer;
+
+	/*
+	 * Nothing to show if ANALYZE option wasn't used or if execution didn't
+	 * get as far as creating the tuplestore.
+	 */
+	if (!es->analyze || tupstore == NULL)
+		return;
+
+	show_storage_info(tupstore, es);
 }
 
 /*
diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out
index 6585c6a69e..88355b6a99 100644
--- a/src/test/regress/expected/explain.out
+++ b/src/test/regress/expected/explain.out
@@ -691,3 +691,14 @@ select explain_filter('explain (analyze,serialize) create temp table explain_tem
  Execution Time: N.N ms
 (4 rows)
 
+-- Test tuplestore storage usage in Window aggregate
+select explain_filter('explain (analyze) select sum(n) over() from generate_series(1,10) a(n)');
+                                                 explain_filter                                                 
+----------------------------------------------------------------------------------------------------------------
+ WindowAgg  (cost=N.N..N.N rows=N width=N) (actual time=N.N..N.N rows=N loops=N)
+   Storage: Memory  Maximum Storage: NkB
+   ->  Function Scan on generate_series a  (cost=N.N..N.N rows=N width=N) (actual time=N.N..N.N rows=N loops=N)
+ Planning Time: N.N ms
+ Execution Time: N.N ms
+(5 rows)
+
diff --git a/src/test/regress/sql/explain.sql b/src/test/regress/sql/explain.sql
index c7055f850c..3f11b559aa 100644
--- a/src/test/regress/sql/explain.sql
+++ b/src/test/regress/sql/explain.sql
@@ -169,3 +169,6 @@ select explain_filter('explain (analyze,serialize text,buffers,timing off) selec
 select explain_filter('explain (analyze,serialize binary,buffers,timing) select * from int8_tbl i8');
 -- this tests an edge case where we have no data to return
 select explain_filter('explain (analyze,serialize) create temp table explain_temp as select * from int8_tbl i8');
+
+-- Test tuplestore storage usage in Window aggregate
+select explain_filter('explain (analyze) select sum(n) over() from generate_series(1,10) a(n)');
-- 
2.25.1

#30Maxim Orlov
orlovmg@gmail.com
In reply to: Tatsuo Ishii (#29)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Fri, 13 Sept 2024 at 09:11, Tatsuo Ishii <ishii@postgresql.org> wrote:

Thanks. Attached is the v4 patch. I am going push it if there's no
objection.

Looks good to me. Thank you for your work.

--
Best regards,
Maxim Orlov.

#31Ashutosh Bapat
ashutosh.bapat.oss@gmail.com
In reply to: Tatsuo Ishii (#29)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

The patch looks fine but it doesn't add a test case where Storage is
Disk or the case when the last usage fit in memory but an earlier
usage spilled to disk. Do we want to cover those. This test would be
the only one where those code paths could be tested.

On Fri, Sep 13, 2024 at 11:41 AM Tatsuo Ishii <ishii@postgresql.org> wrote:

David,

Thank you for your review.

Yes, I think it's a good idea to move that into a helper function. If
you do the other node types, without that helper the could would have
to be repeated quite a few times. Maybe show_storage_info() can be
moved up with the other helper functions, say below
show_sortorder_options() ?

Yeah, that makes sense. Looks less random.

It might be a good idea to keep the "if
(!es->analyze || tupstore == NULL)" checks in the calling function
rather than the helper too.

I agree with this. This kind of check should be done in the calling
function.

I thought about the location of the test for a while and read the
"This file is concerned with testing EXPLAIN in its own right."
comment at the top of that explain.out. I was trying to decide if
testing output of a specific node type met this or not. I can't pick
out any other tests there which are specific to a node type, so I'm
unsure if this is the location for it or not. However, to put it
anywhere else means having to add a plpgsql function to mask out the
unstable parts of EXPLAIN, so maybe the location is good as it saves
from having to do that. I'm 50/50 on this, so I'm happy to let you
decide.

Yeah. Maybe we should move the function to elsewhere so that it can be
shared by other tests. However in this case it's purpose is testing an
additional output in an explain command. I think this is not far from
"This file is concerned with testing EXPLAIN in its own right.". So I
would like to keep the test in explain.sql.

You could also shrink that 100 rows into a smaller number for
the generate_series without losing any coverage.

Right. I will make the change.

Aside from that, I think the patch is good. Thanks for working on it.

Thanks. Attached is the v4 patch. I am going push it if there's no
objection.

After this, I will work on remaining node types.

Best reagards,
--
Tatsuo Ishii
SRA OSS K.K.
English: http://www.sraoss.co.jp/index_en/
Japanese:http://www.sraoss.co.jp

--
Best Wishes,
Ashutosh Bapat

#32Tatsuo Ishii
ishii@postgresql.org
In reply to: Ashutosh Bapat (#31)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

The patch looks fine but it doesn't add a test case where Storage is
Disk

We can test the case by setting work_mem to the minimum size (64kB)
and giving slightly larger "stop" parameter to generate_series.

or the case when the last usage fit in memory but an earlier
usage spilled to disk.

In my understanding once tuplestore changes the storage type to disk,
it never returns to the memory storage type in terms of
tuplestore_get_stats. i.e. once state->usedDisk is set to true, it
never goes back to false. So the test case is not necessary.
David, am I correct?

Do we want to cover those. This test would be
the only one where those code paths could be tested.

I am fine to add the first test case.

Best reagards,
--
Tatsuo Ishii
SRA OSS K.K.
English: http://www.sraoss.co.jp/index_en/
Japanese:http://www.sraoss.co.jp

#33Ashutosh Bapat
ashutosh.bapat.oss@gmail.com
In reply to: Tatsuo Ishii (#32)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Fri, Sep 13, 2024 at 3:02 PM Tatsuo Ishii <ishii@postgresql.org> wrote:

The patch looks fine but it doesn't add a test case where Storage is
Disk

We can test the case by setting work_mem to the minimum size (64kB)
and giving slightly larger "stop" parameter to generate_series.

WFM

or the case when the last usage fit in memory but an earlier
usage spilled to disk.

In my understanding once tuplestore changes the storage type to disk,
it never returns to the memory storage type in terms of
tuplestore_get_stats. i.e. once state->usedDisk is set to true, it
never goes back to false. So the test case is not necessary.
David, am I correct?

I understand that. I am requesting a testcase to test that same logic.
--
Best Wishes,
Ashutosh Bapat

#34Tatsuo Ishii
ishii@postgresql.org
In reply to: Ashutosh Bapat (#33)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

or the case when the last usage fit in memory but an earlier
usage spilled to disk.

In my understanding once tuplestore changes the storage type to disk,
it never returns to the memory storage type in terms of
tuplestore_get_stats. i.e. once state->usedDisk is set to true, it
never goes back to false. So the test case is not necessary.
David, am I correct?

I understand that. I am requesting a testcase to test that same logic.

Maybe something like this? In the example below there are 2
partitions. the first one has 1998 rows and the second one has 2
rows. Assuming that work_mem is 64kB, the first one does not fit the
memory and spills to disk. The second partition fits memory. However as
state->usedDisk remains true, explain shows "Storage: Disk".

test=# explain (analyze,costs off) select sum(n) over(partition by m) from (SELECT n < 3 as m, n from generate_series(1,2000) a(n));
n QUERY PLAN

--------------------------------------------------------------------------------
-------------
WindowAgg (actual time=1.958..473328.589 rows=2000 loops=1)
Storage: Disk Maximum Storage: 65kB
-> Sort (actual time=1.008..1.277 rows=2000 loops=1)
Sort Key: ((a.n < 3))
Sort Method: external merge Disk: 48kB
-> Function Scan on generate_series a (actual time=0.300..0.633 rows=2
000 loops=1)
Planning Time: 0.069 ms
Execution Time: 474515.476 ms
(8 rows)

Best reagards,
--
Tatsuo Ishii
SRA OSS K.K.
English: http://www.sraoss.co.jp/index_en/
Japanese:http://www.sraoss.co.jp

#35Ashutosh Bapat
ashutosh.bapat.oss@gmail.com
In reply to: Tatsuo Ishii (#34)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Sat, Sep 14, 2024 at 1:42 PM Tatsuo Ishii <ishii@postgresql.org> wrote:

or the case when the last usage fit in memory but an earlier
usage spilled to disk.

In my understanding once tuplestore changes the storage type to disk,
it never returns to the memory storage type in terms of
tuplestore_get_stats. i.e. once state->usedDisk is set to true, it
never goes back to false. So the test case is not necessary.
David, am I correct?

I understand that. I am requesting a testcase to test that same logic.

Maybe something like this? In the example below there are 2
partitions. the first one has 1998 rows and the second one has 2
rows. Assuming that work_mem is 64kB, the first one does not fit the
memory and spills to disk. The second partition fits memory. However as
state->usedDisk remains true, explain shows "Storage: Disk".

test=# explain (analyze,costs off) select sum(n) over(partition by m) from (SELECT n < 3 as m, n from generate_series(1,2000) a(n));
n QUERY PLAN

--------------------------------------------------------------------------------
-------------
WindowAgg (actual time=1.958..473328.589 rows=2000 loops=1)
Storage: Disk Maximum Storage: 65kB
-> Sort (actual time=1.008..1.277 rows=2000 loops=1)
Sort Key: ((a.n < 3))
Sort Method: external merge Disk: 48kB
-> Function Scan on generate_series a (actual time=0.300..0.633 rows=2
000 loops=1)
Planning Time: 0.069 ms
Execution Time: 474515.476 ms
(8 rows)

Thanks. This will do. Is there a way to force the larger partition to
be computed first? That way we definitely know that the last
computation was done when all the tuples in the tuplestore were in
memory.

--
Best Wishes,
Ashutosh Bapat

#36Tatsuo Ishii
ishii@postgresql.org
In reply to: Ashutosh Bapat (#35)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

Hi Ashutosh,

Thank you for the review.

Thanks. This will do. Is there a way to force the larger partition to
be computed first? That way we definitely know that the last
computation was done when all the tuples in the tuplestore were in
memory.

Not sure if there's any way to force it in the SQL standard. However
in term of implementation, PostgreSQL sorts the function
(generate_series) scan result using a sort key "a.n < 3", which
results in rows being >= 2 first (as false == 0), then rows being < 3
(as true == 1). So unless PostgreSQL changes the way to sort boolean
data type, I think the result should be stable.

Best reagards,
--
Tatsuo Ishii
SRA OSS K.K.
English: http://www.sraoss.co.jp/index_en/
Japanese:http://www.sraoss.co.jp

#37Tatsuo Ishii
ishii@postgresql.org
In reply to: Tatsuo Ishii (#36)
1 attachment(s)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

Not sure if there's any way to force it in the SQL standard. However
in term of implementation, PostgreSQL sorts the function
(generate_series) scan result using a sort key "a.n < 3", which
results in rows being >= 2 first (as false == 0), then rows being < 3
(as true == 1). So unless PostgreSQL changes the way to sort boolean
data type, I think the result should be stable.

Attached is the v5 patch. The difference from v4 is addtion of two
more tests to explain.sql:

1) spils to disk case
2) splis to disk then switch back to memory case

Best reagards,
--
Tatsuo Ishii
SRA OSS K.K.
English: http://www.sraoss.co.jp/index_en/
Japanese:http://www.sraoss.co.jp

Attachments:

v5-0001-Add-memory-disk-usage-for-Window-aggregate-nodes-.patchtext/x-patch; charset=us-asciiDownload
From 23d5d62ef3c08117a0e452a57264477a3f9aba08 Mon Sep 17 00:00:00 2001
From: Tatsuo Ishii <ishii@postgresql.org>
Date: Tue, 17 Sep 2024 10:59:07 +0900
Subject: [PATCH v5] Add memory/disk usage for Window aggregate nodes in
 EXPLAIN.

This commit is similar to 1eff8279d and expands the idea to Window
aggregate nodes so that users can know how much memory or disk the
tuplestore used.

This commit uses newly introduced tuplestore_get_stats() to query this
information and add some additional output in EXPLAIN ANALYZE to
display the information for the Window aggregate node.
---
 src/backend/commands/explain.c        | 68 ++++++++++++++++++++-------
 src/test/regress/expected/explain.out | 37 +++++++++++++++
 src/test/regress/sql/explain.sql      |  8 ++++
 3 files changed, 95 insertions(+), 18 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 2819e479f8..aaec439892 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -120,6 +120,7 @@ static void show_sort_group_keys(PlanState *planstate, const char *qlabel,
 								 List *ancestors, ExplainState *es);
 static void show_sortorder_options(StringInfo buf, Node *sortexpr,
 								   Oid sortOperator, Oid collation, bool nullsFirst);
+static void show_storage_info(Tuplestorestate *tupstore, ExplainState *es);
 static void show_tablesample(TableSampleClause *tsc, PlanState *planstate,
 							 List *ancestors, ExplainState *es);
 static void show_sort_info(SortState *sortstate, ExplainState *es);
@@ -127,6 +128,7 @@ static void show_incremental_sort_info(IncrementalSortState *incrsortstate,
 									   ExplainState *es);
 static void show_hash_info(HashState *hashstate, ExplainState *es);
 static void show_material_info(MaterialState *mstate, ExplainState *es);
+static void show_windowagg_info(WindowAggState *winstate, ExplainState *es);
 static void show_memoize_info(MemoizeState *mstate, List *ancestors,
 							  ExplainState *es);
 static void show_hashagg_info(AggState *aggstate, ExplainState *es);
@@ -2231,6 +2233,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 										   planstate, es);
 			show_upper_qual(((WindowAgg *) plan)->runConditionOrig,
 							"Run Condition", planstate, ancestors, es);
+			show_windowagg_info(castNode(WindowAggState, planstate), es);
 			break;
 		case T_Group:
 			show_group_keys(castNode(GroupState, planstate), ancestors, es);
@@ -2894,6 +2897,34 @@ show_sortorder_options(StringInfo buf, Node *sortexpr,
 	}
 }
 
+/*
+ * Show information on storage method and maximum memory/disk space used.
+ */
+static void
+show_storage_info(Tuplestorestate *tupstore, ExplainState *es)
+{
+	char	   *maxStorageType;
+	int64		maxSpaceUsed,
+				maxSpaceUsedKB;
+
+	tuplestore_get_stats(tupstore, &maxStorageType, &maxSpaceUsed);
+	maxSpaceUsedKB = BYTES_TO_KILOBYTES(maxSpaceUsed);
+
+	if (es->format != EXPLAIN_FORMAT_TEXT)
+	{
+		ExplainPropertyText("Storage", maxStorageType, es);
+		ExplainPropertyInteger("Maximum Storage", "kB", maxSpaceUsedKB, es);
+	}
+	else
+	{
+		ExplainIndentText(es);
+		appendStringInfo(es->str,
+						 "Storage: %s  Maximum Storage: " INT64_FORMAT "kB\n",
+						 maxStorageType,
+						 maxSpaceUsedKB);
+	}
+}
+
 /*
  * Show TABLESAMPLE properties
  */
@@ -3350,9 +3381,6 @@ static void
 show_material_info(MaterialState *mstate, ExplainState *es)
 {
 	Tuplestorestate *tupstore = mstate->tuplestorestate;
-	char	   *maxStorageType;
-	int64		maxSpaceUsed,
-				maxSpaceUsedKB;
 
 	/*
 	 * Nothing to show if ANALYZE option wasn't used or if execution didn't
@@ -3361,22 +3389,26 @@ show_material_info(MaterialState *mstate, ExplainState *es)
 	if (!es->analyze || tupstore == NULL)
 		return;
 
-	tuplestore_get_stats(tupstore, &maxStorageType, &maxSpaceUsed);
-	maxSpaceUsedKB = BYTES_TO_KILOBYTES(maxSpaceUsed);
+	show_storage_info(tupstore, es);
+}
 
-	if (es->format != EXPLAIN_FORMAT_TEXT)
-	{
-		ExplainPropertyText("Storage", maxStorageType, es);
-		ExplainPropertyInteger("Maximum Storage", "kB", maxSpaceUsedKB, es);
-	}
-	else
-	{
-		ExplainIndentText(es);
-		appendStringInfo(es->str,
-						 "Storage: %s  Maximum Storage: " INT64_FORMAT "kB\n",
-						 maxStorageType,
-						 maxSpaceUsedKB);
-	}
+/*
+ * Show information on WindowAgg node, storage method and maximum memory/disk
+ * space used.
+ */
+static void
+show_windowagg_info(WindowAggState *winstate, ExplainState *es)
+{
+	Tuplestorestate *tupstore = winstate->buffer;
+
+	/*
+	 * Nothing to show if ANALYZE option wasn't used or if execution didn't
+	 * get as far as creating the tuplestore.
+	 */
+	if (!es->analyze || tupstore == NULL)
+		return;
+
+	show_storage_info(tupstore, es);
 }
 
 /*
diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out
index 6585c6a69e..a9ead5b36f 100644
--- a/src/test/regress/expected/explain.out
+++ b/src/test/regress/expected/explain.out
@@ -691,3 +691,40 @@ select explain_filter('explain (analyze,serialize) create temp table explain_tem
  Execution Time: N.N ms
 (4 rows)
 
+-- Test tuplestore storage usage in Window aggregate (memory case)
+select explain_filter('explain (analyze,costs off) select sum(n) over() from generate_series(1,10) a(n)');
+                                 explain_filter                                 
+--------------------------------------------------------------------------------
+ WindowAgg (actual time=N.N..N.N rows=N loops=N)
+   Storage: Memory  Maximum Storage: NkB
+   ->  Function Scan on generate_series a (actual time=N.N..N.N rows=N loops=N)
+ Planning Time: N.N ms
+ Execution Time: N.N ms
+(5 rows)
+
+-- Test tuplestore storage usage in Window aggregate (disk case)
+set work_mem to 64;
+select explain_filter('explain (analyze,costs off) select sum(n) over() from generate_series(1,2000) a(n)');
+                                 explain_filter                                 
+--------------------------------------------------------------------------------
+ WindowAgg (actual time=N.N..N.N rows=N loops=N)
+   Storage: Disk  Maximum Storage: NkB
+   ->  Function Scan on generate_series a (actual time=N.N..N.N rows=N loops=N)
+ Planning Time: N.N ms
+ Execution Time: N.N ms
+(5 rows)
+
+-- Test tuplestore storage usage in Window aggregate (memory and disk case, final result is disk)
+select explain_filter('explain (analyze,costs off) select sum(n) over(partition by m) from (SELECT n < 3 as m, n from generate_series(1,2000) a(n))');
+                                    explain_filter                                    
+--------------------------------------------------------------------------------------
+ WindowAgg (actual time=N.N..N.N rows=N loops=N)
+   Storage: Disk  Maximum Storage: NkB
+   ->  Sort (actual time=N.N..N.N rows=N loops=N)
+         Sort Key: ((a.n < N))
+         Sort Method: external merge  Disk: NkB
+         ->  Function Scan on generate_series a (actual time=N.N..N.N rows=N loops=N)
+ Planning Time: N.N ms
+ Execution Time: N.N ms
+(8 rows)
+
diff --git a/src/test/regress/sql/explain.sql b/src/test/regress/sql/explain.sql
index c7055f850c..54ecfcba63 100644
--- a/src/test/regress/sql/explain.sql
+++ b/src/test/regress/sql/explain.sql
@@ -169,3 +169,11 @@ select explain_filter('explain (analyze,serialize text,buffers,timing off) selec
 select explain_filter('explain (analyze,serialize binary,buffers,timing) select * from int8_tbl i8');
 -- this tests an edge case where we have no data to return
 select explain_filter('explain (analyze,serialize) create temp table explain_temp as select * from int8_tbl i8');
+
+-- Test tuplestore storage usage in Window aggregate (memory case)
+select explain_filter('explain (analyze,costs off) select sum(n) over() from generate_series(1,10) a(n)');
+-- Test tuplestore storage usage in Window aggregate (disk case)
+set work_mem to 64;
+select explain_filter('explain (analyze,costs off) select sum(n) over() from generate_series(1,2000) a(n)');
+-- Test tuplestore storage usage in Window aggregate (memory and disk case, final result is disk)
+select explain_filter('explain (analyze,costs off) select sum(n) over(partition by m) from (SELECT n < 3 as m, n from generate_series(1,2000) a(n))');
-- 
2.25.1

#38David Rowley
dgrowleyml@gmail.com
In reply to: Tatsuo Ishii (#37)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Tue, 17 Sept 2024 at 14:40, Tatsuo Ishii <ishii@postgresql.org> wrote:

Attached is the v5 patch. The difference from v4 is addtion of two
more tests to explain.sql:

1) spils to disk case
2) splis to disk then switch back to memory case

Looks ok to me, aside from the missing "reset work_mem;" after you're
done with testing the disk spilling code.

David

#39Tatsuo Ishii
ishii@postgresql.org
In reply to: David Rowley (#38)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Tue, 17 Sept 2024 at 14:40, Tatsuo Ishii <ishii@postgresql.org> wrote:

Attached is the v5 patch. The difference from v4 is addtion of two
more tests to explain.sql:

1) spils to disk case
2) splis to disk then switch back to memory case

Looks ok to me, aside from the missing "reset work_mem;" after you're
done with testing the disk spilling code.

Thanks. I have added it and pushed the patch.

Best reagards,
--
Tatsuo Ishii
SRA OSS K.K.
English: http://www.sraoss.co.jp/index_en/
Japanese:http://www.sraoss.co.jp

#40Tatsuo Ishii
ishii@postgresql.org
In reply to: Tatsuo Ishii (#39)
1 attachment(s)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

Thanks. I have added it and pushed the patch.

So I have created patches to do the same for CTE scan and table
function scan node. Patch attached.

Actually there's one more executor node type that uses tuplestore:
recursive union (used in "with recursive"). The particular node type
uses two tuplestore and we cannot simply apply tuplestore_get_stats()
to the node type. We need to modify RecursiveUnionState to track the
maximum tuplestore usage. I am not sure this would be worth the
effort. Opinion?

Best reagards,
--
Tatsuo Ishii
SRA OSS K.K.
English: http://www.sraoss.co.jp/index_en/
Japanese:http://www.sraoss.co.jp

Attachments:

v1-0001-Add-memory-disk-usage-for-CTE-scan-and-table-func.patchtext/x-patch; charset=us-asciiDownload
From e2c8d0257d4b0e11a9ef9cfe632d8ea1806a08b7 Mon Sep 17 00:00:00 2001
From: Tatsuo Ishii <ishii@postgresql.org>
Date: Wed, 18 Sep 2024 20:54:58 +0900
Subject: [PATCH v1] Add memory/disk usage for CTE scan and table function scan
 nodes in EXPLAIN.

This commit is similar to 95d6e9af07, expanding the idea to CTE scan
and table function scan nodes so that the maximum tuplestore memory
or disk usage is shown with EXPLAIN ANALYZE command.
---
 src/backend/commands/explain.c        | 35 +++++++++++++++++++++++++++
 src/test/regress/expected/explain.out | 20 +++++++++++++++
 src/test/regress/sql/explain.sql      |  4 +++
 3 files changed, 59 insertions(+)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index aaec439892..34a23428c3 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -129,6 +129,8 @@ static void show_incremental_sort_info(IncrementalSortState *incrsortstate,
 static void show_hash_info(HashState *hashstate, ExplainState *es);
 static void show_material_info(MaterialState *mstate, ExplainState *es);
 static void show_windowagg_info(WindowAggState *winstate, ExplainState *es);
+static void show_ctescan_info(CteScanState *ctescanstate, ExplainState *es);
+static void show_table_func_scan_info(TableFuncScanState *tscanstate, ExplainState *es);
 static void show_memoize_info(MemoizeState *mstate, List *ancestors,
 							  ExplainState *es);
 static void show_hashagg_info(AggState *aggstate, ExplainState *es);
@@ -2046,6 +2048,8 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			if (plan->qual)
 				show_instrumentation_count("Rows Removed by Filter", 1,
 										   planstate, es);
+			if (IsA(plan, CteScan))
+				show_ctescan_info(castNode(CteScanState, planstate), es);
 			break;
 		case T_Gather:
 			{
@@ -2127,6 +2131,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			if (plan->qual)
 				show_instrumentation_count("Rows Removed by Filter", 1,
 										   planstate, es);
+			show_table_func_scan_info(castNode(TableFuncScanState, planstate), es);
 			break;
 		case T_TidScan:
 			{
@@ -3411,6 +3416,36 @@ show_windowagg_info(WindowAggState *winstate, ExplainState *es)
 	show_storage_info(tupstore, es);
 }
 
+/*
+ * Show information on CTE Scan node, storage method and maximum memory/disk
+ * space used.
+ */
+static void
+show_ctescan_info(CteScanState *ctescanstate, ExplainState *es)
+{
+	Tuplestorestate *tupstore = ctescanstate->leader->cte_table;
+
+	if (!es->analyze || tupstore == NULL)
+		return;
+
+	show_storage_info(tupstore, es);
+}
+
+/*
+ * Show information on Table Function Scan node, storage method and maximum
+ * memory/disk space used.
+ */
+static void
+show_table_func_scan_info(TableFuncScanState *tscanstate, ExplainState *es)
+{
+	Tuplestorestate *tupstore = tscanstate->tupstore;
+
+	if (!es->analyze || tupstore == NULL)
+		return;
+
+	show_storage_info(tupstore, es);
+}
+
 /*
  * Show information on memoize hits/misses/evictions and memory usage.
  */
diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out
index d01c304c24..b64e64bb43 100644
--- a/src/test/regress/expected/explain.out
+++ b/src/test/regress/expected/explain.out
@@ -729,3 +729,23 @@ select explain_filter('explain (analyze,costs off) select sum(n) over(partition
 (8 rows)
 
 reset work_mem;
+-- Test tuplestore storage usage in CTE scan
+select explain_filter('explain (analyze,costs off) with w(n) as (select n from generate_series(1,10) a(n)) select sum(n) from w');
+                                 explain_filter                                 
+--------------------------------------------------------------------------------
+ Aggregate (actual time=N.N..N.N rows=N loops=N)
+   ->  Function Scan on generate_series a (actual time=N.N..N.N rows=N loops=N)
+ Planning Time: N.N ms
+ Execution Time: N.N ms
+(4 rows)
+
+-- Test tuplestore storage usage in table function scan
+select explain_filter('explain (analyze,costs off) select * from json_table(''[]'', ''strict $.a'' columns (i int path ''$''))');
+                              explain_filter                               
+---------------------------------------------------------------------------
+ Table Function Scan on "json_table" (actual time=N.N..N.N rows=N loops=N)
+   Storage: Memory  Maximum Storage: NkB
+ Planning Time: N.N ms
+ Execution Time: N.N ms
+(4 rows)
+
diff --git a/src/test/regress/sql/explain.sql b/src/test/regress/sql/explain.sql
index b861e2b53d..0623440124 100644
--- a/src/test/regress/sql/explain.sql
+++ b/src/test/regress/sql/explain.sql
@@ -178,3 +178,7 @@ select explain_filter('explain (analyze,costs off) select sum(n) over() from gen
 -- Test tuplestore storage usage in Window aggregate (memory and disk case, final result is disk)
 select explain_filter('explain (analyze,costs off) select sum(n) over(partition by m) from (SELECT n < 3 as m, n from generate_series(1,2000) a(n))');
 reset work_mem;
+-- Test tuplestore storage usage in CTE scan
+select explain_filter('explain (analyze,costs off) with w(n) as (select n from generate_series(1,10) a(n)) select sum(n) from w');
+-- Test tuplestore storage usage in table function scan
+select explain_filter('explain (analyze,costs off) select * from json_table(''[]'', ''strict $.a'' columns (i int path ''$''))');
-- 
2.25.1

#41David Rowley
dgrowleyml@gmail.com
In reply to: Tatsuo Ishii (#40)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Thu, 19 Sept 2024 at 00:13, Tatsuo Ishii <ishii@postgresql.org> wrote:

Actually there's one more executor node type that uses tuplestore:
recursive union (used in "with recursive"). The particular node type
uses two tuplestore and we cannot simply apply tuplestore_get_stats()
to the node type. We need to modify RecursiveUnionState to track the
maximum tuplestore usage. I am not sure this would be worth the
effort. Opinion?

Could you add the two sizes together and take the storage type from
the tuplestore with the highest storage size?

David

#42Tatsuo Ishii
ishii@postgresql.org
In reply to: David Rowley (#41)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Thu, 19 Sept 2024 at 00:13, Tatsuo Ishii <ishii@postgresql.org> wrote:

Actually there's one more executor node type that uses tuplestore:
recursive union (used in "with recursive"). The particular node type
uses two tuplestore and we cannot simply apply tuplestore_get_stats()
to the node type. We need to modify RecursiveUnionState to track the
maximum tuplestore usage. I am not sure this would be worth the
effort. Opinion?

Could you add the two sizes together and take the storage type from
the tuplestore with the highest storage size?

I don't think this works because tuplestore_begin/tuplestore_end are
called while executing the node (ExecRecursiveUnion).

I think the way you are proposing only shows the stats last time when
those tuplestore are created.

Best reagards,
--
Tatsuo Ishii
SRA OSS K.K.
English: http://www.sraoss.co.jp/index_en/
Japanese:http://www.sraoss.co.jp

#43David Rowley
dgrowleyml@gmail.com
In reply to: Tatsuo Ishii (#42)
1 attachment(s)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Thu, 19 Sept 2024 at 12:01, Tatsuo Ishii <ishii@postgresql.org> wrote:

Could you add the two sizes together and take the storage type from
the tuplestore with the highest storage size?

I don't think this works because tuplestore_begin/tuplestore_end are
called while executing the node (ExecRecursiveUnion).

I think the way you are proposing only shows the stats last time when
those tuplestore are created.

That code could be modified to swap the tuplestores and do a
tuplestore_clear() instead of tuplestore_end() followed by
tuplestore_begin_heap().

It's likely worthwhile from a performance point of view. Here's a
small test as an example:

master:
postgres=# with recursive cte (a) as (select 1 union all select
cte.a+1 from cte where cte.a+1 <= 1000000) select count(*) from cte;
Time: 219.023 ms
Time: 218.828 ms
Time: 219.093 ms

with attached patched:
postgres=# with recursive cte (a) as (select 1 union all select
cte.a+1 from cte where cte.a+1 <= 1000000) select count(*) from cte;
Time: 169.734 ms
Time: 164.841 ms
Time: 169.168 ms

David

Attachments:

reuse_tuplestore_in_recursive_union.patch.txttext/plain; charset=US-ASCII; name=reuse_tuplestore_in_recursive_union.patch.txtDownload
diff --git a/src/backend/executor/nodeRecursiveunion.c b/src/backend/executor/nodeRecursiveunion.c
index c7f8a19fa4..8aa7a78d0c 100644
--- a/src/backend/executor/nodeRecursiveunion.c
+++ b/src/backend/executor/nodeRecursiveunion.c
@@ -115,19 +115,26 @@ ExecRecursiveUnion(PlanState *pstate)
 		slot = ExecProcNode(innerPlan);
 		if (TupIsNull(slot))
 		{
+			Tuplestorestate *swaptemp;
+
 			/* Done if there's nothing in the intermediate table */
 			if (node->intermediate_empty)
 				break;
 
-			/* done with old working table ... */
-			tuplestore_end(node->working_table);
+			/*
+			 * Now we let the intermediate table become the work table.  We
+			 * need a fresh intermediate table, so delete the tuples from
+			 * the current working table and use that as the new intermediate
+			 * table.  This saves a round of free/malloc from creating a new
+			 * tuple store.
+			 */
+			tuplestore_clear(node->working_table);
 
-			/* intermediate table becomes working table */
+			swaptemp = node->working_table;
 			node->working_table = node->intermediate_table;
+			node->intermediate_table = swaptemp;
 
-			/* create new empty intermediate table */
-			node->intermediate_table = tuplestore_begin_heap(false, false,
-															 work_mem);
+			/* mark the intermediate table as empty */
 			node->intermediate_empty = true;
 
 			/* reset the recursive term */
#44Tatsuo Ishii
ishii@postgresql.org
In reply to: David Rowley (#43)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

That code could be modified to swap the tuplestores and do a
tuplestore_clear() instead of tuplestore_end() followed by
tuplestore_begin_heap().

It's likely worthwhile from a performance point of view. Here's a
small test as an example:

master:
postgres=# with recursive cte (a) as (select 1 union all select
cte.a+1 from cte where cte.a+1 <= 1000000) select count(*) from cte;
Time: 219.023 ms
Time: 218.828 ms
Time: 219.093 ms

with attached patched:
postgres=# with recursive cte (a) as (select 1 union all select
cte.a+1 from cte where cte.a+1 <= 1000000) select count(*) from cte;
Time: 169.734 ms
Time: 164.841 ms
Time: 169.168 ms

Impressive result. I also ran your query with count 1000.

without the patch:
Time: 3.655 ms
Time: 4.123 ms
Time: 2.163 ms

wit the patch:
Time: 3.641 ms
Time: 2.356 ms
Time: 2.347 ms

It seems with the patch the performance is slightly better or almost
same. I think the patch improves the performance without sacrificing
the smaller iteration case.

Best reagards,
--
Tatsuo Ishii
SRA OSS K.K.
English: http://www.sraoss.co.jp/index_en/
Japanese:http://www.sraoss.co.jp

#45David Rowley
dgrowleyml@gmail.com
In reply to: Tatsuo Ishii (#44)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Thu, 19 Sept 2024 at 13:49, Tatsuo Ishii <ishii@postgresql.org> wrote:

I also ran your query with count 1000.

without the patch:
Time: 3.655 ms
Time: 4.123 ms
Time: 2.163 ms

wit the patch:
Time: 3.641 ms
Time: 2.356 ms
Time: 2.347 ms

It seems with the patch the performance is slightly better or almost
same. I think the patch improves the performance without sacrificing
the smaller iteration case.

You might need to use pgbench to get more stable results with such a
small test. If your CPU clocks down when idle, it's not going to clock
up as fast as you might like it to when you give it something to do.

Here's what I got when running 1000 iterations:

$ cat bench.sql
with recursive cte (a) as (select 1 union all select cte.a+1 from cte
where cte.a+1 <= 1000) select count(*) from cte;

master
$ for i in {1..5}; do pgbench -n -f bench.sql -T 10 -M prepared
postgres | grep latency; done
latency average = 0.251 ms
latency average = 0.254 ms
latency average = 0.251 ms
latency average = 0.252 ms
latency average = 0.253 ms

patched
$ for i in {1..5}; do pgbench -n -f bench.sql -T 10 -M prepared
postgres | grep latency; done
latency average = 0.202 ms
latency average = 0.202 ms
latency average = 0.207 ms
latency average = 0.202 ms
latency average = 0.202 ms (~24.2% faster)

David

#46David Rowley
dgrowleyml@gmail.com
In reply to: Tatsuo Ishii (#44)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Thu, 19 Sept 2024 at 13:49, Tatsuo Ishii <ishii@postgresql.org> wrote:

That code could be modified to swap the tuplestores and do a
tuplestore_clear() instead of tuplestore_end() followed by
tuplestore_begin_heap().

Impressive result. I also ran your query with count 1000.

I've pushed that patch. That should now unblock you on the
nodeRecursiveunion.c telemetry.

David

#47Tatsuo Ishii
ishii@postgresql.org
In reply to: David Rowley (#46)
1 attachment(s)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

That code could be modified to swap the tuplestores and do a
tuplestore_clear() instead of tuplestore_end() followed by
tuplestore_begin_heap().

Impressive result. I also ran your query with count 1000.

I've pushed that patch. That should now unblock you on the
nodeRecursiveunion.c telemetry.

Thanks. Attached is a patch for CTE scan, table function scan and
recursive union scan nodes.

Best reagards,
--
Tatsuo Ishii
SRA OSS K.K.
English: http://www.sraoss.co.jp/index_en/
Japanese:http://www.sraoss.co.jp

Attachments:

v2-0001-Add-memory-disk-usage-for-more-executor-nodes.patchtext/x-patch; charset=us-asciiDownload
From e9e933bf959e54018fa20feca2034567024e551e Mon Sep 17 00:00:00 2001
From: Tatsuo Ishii <ishii@postgresql.org>
Date: Thu, 19 Sep 2024 13:13:50 +0900
Subject: [PATCH v2] Add memory/disk usage for more executor nodes.

This commit is similar to 95d6e9af07, expanding the idea to CTE scan,
table function scan and recursive union scan nodes so that the maximum
tuplestore memory or disk usage is shown with EXPLAIN ANALYZE command.
---
 src/backend/commands/explain.c        | 97 +++++++++++++++++++++++++++
 src/test/regress/expected/explain.out | 38 +++++++++++
 src/test/regress/sql/explain.sql      |  6 ++
 3 files changed, 141 insertions(+)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index aaec439892..b5f8f34df0 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -129,6 +129,9 @@ static void show_incremental_sort_info(IncrementalSortState *incrsortstate,
 static void show_hash_info(HashState *hashstate, ExplainState *es);
 static void show_material_info(MaterialState *mstate, ExplainState *es);
 static void show_windowagg_info(WindowAggState *winstate, ExplainState *es);
+static void show_ctescan_info(CteScanState *ctescanstate, ExplainState *es);
+static void show_table_func_scan_info(TableFuncScanState *tscanstate, ExplainState *es);
+static void show_recursive_union_info(RecursiveUnionState *rstate, ExplainState *es);
 static void show_memoize_info(MemoizeState *mstate, List *ancestors,
 							  ExplainState *es);
 static void show_hashagg_info(AggState *aggstate, ExplainState *es);
@@ -2046,6 +2049,8 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			if (plan->qual)
 				show_instrumentation_count("Rows Removed by Filter", 1,
 										   planstate, es);
+			if (IsA(plan, CteScan))
+				show_ctescan_info(castNode(CteScanState, planstate), es);
 			break;
 		case T_Gather:
 			{
@@ -2127,6 +2132,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			if (plan->qual)
 				show_instrumentation_count("Rows Removed by Filter", 1,
 										   planstate, es);
+			show_table_func_scan_info(castNode(TableFuncScanState, planstate), es);
 			break;
 		case T_TidScan:
 			{
@@ -2278,6 +2284,10 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			show_memoize_info(castNode(MemoizeState, planstate), ancestors,
 							  es);
 			break;
+		case T_RecursiveUnion:
+			show_recursive_union_info(castNode(RecursiveUnionState, planstate),
+									  es);
+			break;
 		default:
 			break;
 	}
@@ -3411,6 +3421,93 @@ show_windowagg_info(WindowAggState *winstate, ExplainState *es)
 	show_storage_info(tupstore, es);
 }
 
+/*
+ * Show information on CTE Scan node, storage method and maximum memory/disk
+ * space used.
+ */
+static void
+show_ctescan_info(CteScanState *ctescanstate, ExplainState *es)
+{
+	Tuplestorestate *tupstore = ctescanstate->leader->cte_table;
+
+	if (!es->analyze || tupstore == NULL)
+		return;
+
+	show_storage_info(tupstore, es);
+}
+
+/*
+ * Show information on Table Function Scan node, storage method and maximum
+ * memory/disk space used.
+ */
+static void
+show_table_func_scan_info(TableFuncScanState *tscanstate, ExplainState *es)
+{
+	Tuplestorestate *tupstore = tscanstate->tupstore;
+
+	if (!es->analyze || tupstore == NULL)
+		return;
+
+	show_storage_info(tupstore, es);
+}
+
+/*
+ * Show information on Recursive Union node, storage method and maximum
+ * memory/disk space used.
+ */
+static void
+show_recursive_union_info(RecursiveUnionState *rstate, ExplainState *es)
+{
+	char	   *maxStorageType;
+	char	   *tempStorageType;
+	int64		maxSpaceUsed,
+				maxSpaceUsedKB,
+				tempSpaceUsed;
+	Tuplestorestate *working_table,
+			   *intermediate_table;
+
+	/*
+	 * Recursive union node uses two tuplestore.  We employ one of them which
+	 * consumed larger memory/disk usage.
+	 */
+	working_table = rstate->working_table;
+	intermediate_table = rstate->intermediate_table;
+
+	if (!es->analyze ||
+		(working_table == NULL && intermediate_table == NULL))
+		return;
+
+	tempSpaceUsed = maxSpaceUsed = 0;
+
+	if (working_table != NULL)
+		tuplestore_get_stats(working_table, &tempStorageType, &tempSpaceUsed);
+
+	if (intermediate_table != NULL)
+		tuplestore_get_stats(intermediate_table, &maxStorageType, &maxSpaceUsed);
+
+	if (tempSpaceUsed > maxSpaceUsed)
+	{
+		maxStorageType = tempStorageType;
+		maxSpaceUsed = tempSpaceUsed;
+	}
+
+	maxSpaceUsedKB = BYTES_TO_KILOBYTES(maxSpaceUsed);
+
+	if (es->format != EXPLAIN_FORMAT_TEXT)
+	{
+		ExplainPropertyText("Storage", maxStorageType, es);
+		ExplainPropertyInteger("Maximum Storage", "kB", maxSpaceUsedKB, es);
+	}
+	else
+	{
+		ExplainIndentText(es);
+		appendStringInfo(es->str,
+						 "Storage: %s  Maximum Storage: " INT64_FORMAT "kB\n",
+						 maxStorageType,
+						 maxSpaceUsedKB);
+	}
+}
+
 /*
  * Show information on memoize hits/misses/evictions and memory usage.
  */
diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out
index d01c304c24..87bfaa6d79 100644
--- a/src/test/regress/expected/explain.out
+++ b/src/test/regress/expected/explain.out
@@ -729,3 +729,41 @@ select explain_filter('explain (analyze,costs off) select sum(n) over(partition
 (8 rows)
 
 reset work_mem;
+-- Test tuplestore storage usage in CTE scan
+select explain_filter('explain (analyze,costs off) with w(n) as (select n from generate_series(1,10) a(n)) select sum(n) from w');
+                                 explain_filter                                 
+--------------------------------------------------------------------------------
+ Aggregate (actual time=N.N..N.N rows=N loops=N)
+   ->  Function Scan on generate_series a (actual time=N.N..N.N rows=N loops=N)
+ Planning Time: N.N ms
+ Execution Time: N.N ms
+(4 rows)
+
+-- Test tuplestore storage usage in table function scan
+select explain_filter('explain (analyze,costs off) select * from json_table(''[]'', ''strict $.a'' columns (i int path ''$''))');
+                              explain_filter                               
+---------------------------------------------------------------------------
+ Table Function Scan on "json_table" (actual time=N.N..N.N rows=N loops=N)
+   Storage: Memory  Maximum Storage: NkB
+ Planning Time: N.N ms
+ Execution Time: N.N ms
+(4 rows)
+
+-- Test tuplestore storage usage in recurive union scan
+select explain_filter('explain (analyze,costs off) with recursive t(n) as (values(1) union all select n+1 from t where n < 10) select sum(n) from t');
+                               explain_filter                                
+-----------------------------------------------------------------------------
+ Aggregate (actual time=N.N..N.N rows=N loops=N)
+   CTE t
+     ->  Recursive Union (actual time=N.N..N.N rows=N loops=N)
+           Storage: Memory  Maximum Storage: NkB
+           ->  Result (actual time=N.N..N.N rows=N loops=N)
+           ->  WorkTable Scan on t t_1 (actual time=N.N..N.N rows=N loops=N)
+                 Filter: (n < N)
+                 Rows Removed by Filter: N
+   ->  CTE Scan on t (actual time=N.N..N.N rows=N loops=N)
+         Storage: Memory  Maximum Storage: NkB
+ Planning Time: N.N ms
+ Execution Time: N.N ms
+(12 rows)
+
diff --git a/src/test/regress/sql/explain.sql b/src/test/regress/sql/explain.sql
index b861e2b53d..5d5c68443e 100644
--- a/src/test/regress/sql/explain.sql
+++ b/src/test/regress/sql/explain.sql
@@ -178,3 +178,9 @@ select explain_filter('explain (analyze,costs off) select sum(n) over() from gen
 -- Test tuplestore storage usage in Window aggregate (memory and disk case, final result is disk)
 select explain_filter('explain (analyze,costs off) select sum(n) over(partition by m) from (SELECT n < 3 as m, n from generate_series(1,2000) a(n))');
 reset work_mem;
+-- Test tuplestore storage usage in CTE scan
+select explain_filter('explain (analyze,costs off) with w(n) as (select n from generate_series(1,10) a(n)) select sum(n) from w');
+-- Test tuplestore storage usage in table function scan
+select explain_filter('explain (analyze,costs off) select * from json_table(''[]'', ''strict $.a'' columns (i int path ''$''))');
+-- Test tuplestore storage usage in recurive union scan
+select explain_filter('explain (analyze,costs off) with recursive t(n) as (values(1) union all select n+1 from t where n < 10) select sum(n) from t');
-- 
2.25.1

#48David Rowley
dgrowleyml@gmail.com
In reply to: Tatsuo Ishii (#47)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Thu, 19 Sept 2024 at 16:21, Tatsuo Ishii <ishii@postgresql.org> wrote:

Thanks. Attached is a patch for CTE scan, table function scan and
recursive union scan nodes.

1. It's probably a minor detail, but in show_recursive_union_info(), I
don't think the tuplestores can ever be NULL.

+ if (working_table != NULL)
+ tuplestore_get_stats(working_table, &tempStorageType, &tempSpaceUsed);
+
+ if (intermediate_table != NULL)
+ tuplestore_get_stats(intermediate_table, &maxStorageType, &maxSpaceUsed);

I added the NULL tests for the Materialize case as the tuplestore is
created in ExecMaterial() rather than ExecInitMaterial(). For the two
tuplestorestates above, they're both created in
ExecInitRecursiveUnion().

2. I imagined you'd always do maxSpaceUsed += tempSpaceUsed; or
maxSpaceUsedKB = BYTES_TO_KILOBYTES(maxSpaceUsed + tempSpaceUsed);

+ if (tempSpaceUsed > maxSpaceUsed)
+ {
+ maxStorageType = tempStorageType;
+ maxSpaceUsed = tempSpaceUsed;
+ }

Why do you think the the space used by the smaller tuplestore should
be ignored in the storage size output?

David

#49Tatsuo Ishii
ishii@postgresql.org
In reply to: David Rowley (#48)
1 attachment(s)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

1. It's probably a minor detail, but in show_recursive_union_info(), I
don't think the tuplestores can ever be NULL.

+ if (working_table != NULL)
+ tuplestore_get_stats(working_table, &tempStorageType, &tempSpaceUsed);
+
+ if (intermediate_table != NULL)
+ tuplestore_get_stats(intermediate_table, &maxStorageType, &maxSpaceUsed);

I added the NULL tests for the Materialize case as the tuplestore is
created in ExecMaterial() rather than ExecInitMaterial(). For the two
tuplestorestates above, they're both created in
ExecInitRecursiveUnion().

You are right. Also I checked other Exec* in nodeRecursiveunion.c and
did not find any place where working_table and intermediate_table are
set to NULL.

2. I imagined you'd always do maxSpaceUsed += tempSpaceUsed; or
maxSpaceUsedKB = BYTES_TO_KILOBYTES(maxSpaceUsed + tempSpaceUsed);

+ if (tempSpaceUsed > maxSpaceUsed)
+ {
+ maxStorageType = tempStorageType;
+ maxSpaceUsed = tempSpaceUsed;
+ }

Why do you think the the space used by the smaller tuplestore should
be ignored in the storage size output?

I thought about the case when the two tuplestores have different
storage types. But I remember that we already use disk storage type
even if the storage type was changed from disk to memory[1]/messages/by-id/CAExHW5vRPRLvsZYLmNGcDLkPDWDHXGSWYjox-to-OsCVFETd3w@mail.gmail.com. So
probably we don't need to much worry about the storage kind difference
in the two tuplestores.

Attached patch fixes 1 & 2.

[1]: /messages/by-id/CAExHW5vRPRLvsZYLmNGcDLkPDWDHXGSWYjox-to-OsCVFETd3w@mail.gmail.com

Best reagards,
--
Tatsuo Ishii
SRA OSS K.K.
English: http://www.sraoss.co.jp/index_en/
Japanese:http://www.sraoss.co.jp

Attachments:

v3-0001-Add-memory-disk-usage-for-more-executor-nodes.patchtext/x-patch; charset=us-asciiDownload
From 814fa7bdafba2c004b923ce577c0c6f53cfc5387 Mon Sep 17 00:00:00 2001
From: Tatsuo Ishii <ishii@postgresql.org>
Date: Thu, 19 Sep 2024 19:14:07 +0900
Subject: [PATCH v3] Add memory/disk usage for more executor nodes.

This commit is similar to 95d6e9af07, expanding the idea to CTE scan,
table function scan and recursive union scan nodes so that the maximum
tuplestore memory or disk usage is shown with EXPLAIN ANALYZE command.
---
 src/backend/commands/explain.c        | 85 +++++++++++++++++++++++++++
 src/test/regress/expected/explain.out | 38 ++++++++++++
 src/test/regress/sql/explain.sql      |  6 ++
 3 files changed, 129 insertions(+)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index aaec439892..725c3e88db 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -129,6 +129,9 @@ static void show_incremental_sort_info(IncrementalSortState *incrsortstate,
 static void show_hash_info(HashState *hashstate, ExplainState *es);
 static void show_material_info(MaterialState *mstate, ExplainState *es);
 static void show_windowagg_info(WindowAggState *winstate, ExplainState *es);
+static void show_ctescan_info(CteScanState *ctescanstate, ExplainState *es);
+static void show_table_func_scan_info(TableFuncScanState *tscanstate, ExplainState *es);
+static void show_recursive_union_info(RecursiveUnionState *rstate, ExplainState *es);
 static void show_memoize_info(MemoizeState *mstate, List *ancestors,
 							  ExplainState *es);
 static void show_hashagg_info(AggState *aggstate, ExplainState *es);
@@ -2046,6 +2049,8 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			if (plan->qual)
 				show_instrumentation_count("Rows Removed by Filter", 1,
 										   planstate, es);
+			if (IsA(plan, CteScan))
+				show_ctescan_info(castNode(CteScanState, planstate), es);
 			break;
 		case T_Gather:
 			{
@@ -2127,6 +2132,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			if (plan->qual)
 				show_instrumentation_count("Rows Removed by Filter", 1,
 										   planstate, es);
+			show_table_func_scan_info(castNode(TableFuncScanState, planstate), es);
 			break;
 		case T_TidScan:
 			{
@@ -2278,6 +2284,10 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			show_memoize_info(castNode(MemoizeState, planstate), ancestors,
 							  es);
 			break;
+		case T_RecursiveUnion:
+			show_recursive_union_info(castNode(RecursiveUnionState, planstate),
+									  es);
+			break;
 		default:
 			break;
 	}
@@ -3411,6 +3421,81 @@ show_windowagg_info(WindowAggState *winstate, ExplainState *es)
 	show_storage_info(tupstore, es);
 }
 
+/*
+ * Show information on CTE Scan node, storage method and maximum memory/disk
+ * space used.
+ */
+static void
+show_ctescan_info(CteScanState *ctescanstate, ExplainState *es)
+{
+	Tuplestorestate *tupstore = ctescanstate->leader->cte_table;
+
+	if (!es->analyze || tupstore == NULL)
+		return;
+
+	show_storage_info(tupstore, es);
+}
+
+/*
+ * Show information on Table Function Scan node, storage method and maximum
+ * memory/disk space used.
+ */
+static void
+show_table_func_scan_info(TableFuncScanState *tscanstate, ExplainState *es)
+{
+	Tuplestorestate *tupstore = tscanstate->tupstore;
+
+	if (!es->analyze || tupstore == NULL)
+		return;
+
+	show_storage_info(tupstore, es);
+}
+
+/*
+ * Show information on Recursive Union node, storage method and maximum
+ * memory/disk space used.
+ */
+static void
+show_recursive_union_info(RecursiveUnionState *rstate, ExplainState *es)
+{
+	char	   *maxStorageType,
+			   *tempStorageType;
+	int64		maxSpaceUsed,
+				maxSpaceUsedKB,
+				tempSpaceUsed;
+
+	if (!es->analyze)
+		return;
+
+	/*
+	 * Recursive union node uses two tuplestores.  We employ the storage type
+	 * from one of them which consumed more memory/disk than the other.  The
+	 * storage size is sum of the two.
+	 */
+	tuplestore_get_stats(rstate->working_table, &tempStorageType, &tempSpaceUsed);
+	tuplestore_get_stats(rstate->intermediate_table, &maxStorageType, &maxSpaceUsed);
+
+	if (tempSpaceUsed > maxSpaceUsed)
+		maxStorageType = tempStorageType;
+
+	maxSpaceUsed += tempSpaceUsed;
+	maxSpaceUsedKB = BYTES_TO_KILOBYTES(maxSpaceUsed);
+
+	if (es->format != EXPLAIN_FORMAT_TEXT)
+	{
+		ExplainPropertyText("Storage", maxStorageType, es);
+		ExplainPropertyInteger("Maximum Storage", "kB", maxSpaceUsedKB, es);
+	}
+	else
+	{
+		ExplainIndentText(es);
+		appendStringInfo(es->str,
+						 "Storage: %s  Maximum Storage: " INT64_FORMAT "kB\n",
+						 maxStorageType,
+						 maxSpaceUsedKB);
+	}
+}
+
 /*
  * Show information on memoize hits/misses/evictions and memory usage.
  */
diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out
index d01c304c24..87bfaa6d79 100644
--- a/src/test/regress/expected/explain.out
+++ b/src/test/regress/expected/explain.out
@@ -729,3 +729,41 @@ select explain_filter('explain (analyze,costs off) select sum(n) over(partition
 (8 rows)
 
 reset work_mem;
+-- Test tuplestore storage usage in CTE scan
+select explain_filter('explain (analyze,costs off) with w(n) as (select n from generate_series(1,10) a(n)) select sum(n) from w');
+                                 explain_filter                                 
+--------------------------------------------------------------------------------
+ Aggregate (actual time=N.N..N.N rows=N loops=N)
+   ->  Function Scan on generate_series a (actual time=N.N..N.N rows=N loops=N)
+ Planning Time: N.N ms
+ Execution Time: N.N ms
+(4 rows)
+
+-- Test tuplestore storage usage in table function scan
+select explain_filter('explain (analyze,costs off) select * from json_table(''[]'', ''strict $.a'' columns (i int path ''$''))');
+                              explain_filter                               
+---------------------------------------------------------------------------
+ Table Function Scan on "json_table" (actual time=N.N..N.N rows=N loops=N)
+   Storage: Memory  Maximum Storage: NkB
+ Planning Time: N.N ms
+ Execution Time: N.N ms
+(4 rows)
+
+-- Test tuplestore storage usage in recurive union scan
+select explain_filter('explain (analyze,costs off) with recursive t(n) as (values(1) union all select n+1 from t where n < 10) select sum(n) from t');
+                               explain_filter                                
+-----------------------------------------------------------------------------
+ Aggregate (actual time=N.N..N.N rows=N loops=N)
+   CTE t
+     ->  Recursive Union (actual time=N.N..N.N rows=N loops=N)
+           Storage: Memory  Maximum Storage: NkB
+           ->  Result (actual time=N.N..N.N rows=N loops=N)
+           ->  WorkTable Scan on t t_1 (actual time=N.N..N.N rows=N loops=N)
+                 Filter: (n < N)
+                 Rows Removed by Filter: N
+   ->  CTE Scan on t (actual time=N.N..N.N rows=N loops=N)
+         Storage: Memory  Maximum Storage: NkB
+ Planning Time: N.N ms
+ Execution Time: N.N ms
+(12 rows)
+
diff --git a/src/test/regress/sql/explain.sql b/src/test/regress/sql/explain.sql
index b861e2b53d..5d5c68443e 100644
--- a/src/test/regress/sql/explain.sql
+++ b/src/test/regress/sql/explain.sql
@@ -178,3 +178,9 @@ select explain_filter('explain (analyze,costs off) select sum(n) over() from gen
 -- Test tuplestore storage usage in Window aggregate (memory and disk case, final result is disk)
 select explain_filter('explain (analyze,costs off) select sum(n) over(partition by m) from (SELECT n < 3 as m, n from generate_series(1,2000) a(n))');
 reset work_mem;
+-- Test tuplestore storage usage in CTE scan
+select explain_filter('explain (analyze,costs off) with w(n) as (select n from generate_series(1,10) a(n)) select sum(n) from w');
+-- Test tuplestore storage usage in table function scan
+select explain_filter('explain (analyze,costs off) select * from json_table(''[]'', ''strict $.a'' columns (i int path ''$''))');
+-- Test tuplestore storage usage in recurive union scan
+select explain_filter('explain (analyze,costs off) with recursive t(n) as (values(1) union all select n+1 from t where n < 10) select sum(n) from t');
-- 
2.25.1

#50David Rowley
dgrowleyml@gmail.com
In reply to: Tatsuo Ishii (#49)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Thu, 19 Sept 2024 at 22:17, Tatsuo Ishii <ishii@postgresql.org> wrote:

Attached patch fixes 1 & 2.

I looked at this and thought that one thing you might want to consider
is adjusting show_storage_info() to accept the size and type
parameters so you don't have to duplicate the formatting code in
show_recursive_union_info().

The first of the new tests also isn't testing what you want it to
test. Maybe you could add a "materialized" in there to stop the CTE
being inlined:

explain (analyze,costs off) with w(n) as materialized (select n from
generate_series(1,10) a(n)) select sum(n) from w

Also, I'm on the fence about if the new tests are worthwhile. I won't
object to them, however. I just wanted to note that most of the
complexity is in tuplestore.c of which there's already coverage for.
The test's value is reduced by the fact that most of the interesting
details have to be masked out due to possible platform variations in
the number of bytes. Really the new tests are only testing that we
display the storage details and maybe that the storage type came out
as expected. It seems unlikely these would get broken. I'd say it's
committers preference, however. I just wanted to add my thoughts. You
have to offset the value against the fact that the expected output is
likely to change over the years which adds to the burden of making
changes to the EXPLAIN output.

David

#51Tatsuo Ishii
ishii@postgresql.org
In reply to: David Rowley (#50)
1 attachment(s)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

I looked at this and thought that one thing you might want to consider
is adjusting show_storage_info() to accept the size and type
parameters so you don't have to duplicate the formatting code in
show_recursive_union_info().

I agree and made necessary changes. See attached v4 patches.

The first of the new tests also isn't testing what you want it to
test. Maybe you could add a "materialized" in there to stop the CTE
being inlined:

explain (analyze,costs off) with w(n) as materialized (select n from
generate_series(1,10) a(n)) select sum(n) from w

Also, I'm on the fence about if the new tests are worthwhile. I won't
object to them, however. I just wanted to note that most of the
complexity is in tuplestore.c of which there's already coverage for.
The test's value is reduced by the fact that most of the interesting
details have to be masked out due to possible platform variations in
the number of bytes. Really the new tests are only testing that we
display the storage details and maybe that the storage type came out
as expected. It seems unlikely these would get broken. I'd say it's
committers preference, however. I just wanted to add my thoughts. You
have to offset the value against the fact that the expected output is
likely to change over the years which adds to the burden of making
changes to the EXPLAIN output.

After thinking more, I lean toward to your opinion. The new tests do
not give big value, but on the other hand they could become a burden
over the years. I do not include the new tests in the v4 patches.

Best reagards,
--
Tatsuo Ishii
SRA OSS K.K.
English: http://www.sraoss.co.jp/index_en/
Japanese:http://www.sraoss.co.jp

Attachments:

v4-0001-Add-memory-disk-usage-for-more-executor-nodes.patchtext/x-patch; charset=us-asciiDownload
From b94c524623a27f526f03de5e90564b185e37ffdd Mon Sep 17 00:00:00 2001
From: Tatsuo Ishii <ishii@postgresql.org>
Date: Mon, 23 Sep 2024 14:57:39 +0900
Subject: [PATCH v4] Add memory/disk usage for more executor nodes.

This commit is similar to 95d6e9af07, expanding the idea to CTE scan,
table function scan and recursive union scan nodes so that the maximum
tuplestore memory or disk usage is shown with EXPLAIN ANALYZE command.

Also adjust show_storage_info() so that it accepts storage type and
storage size arguments instead of Tuplestorestate. This makes it
possible for the node types to share the formatting code in
show_storage_info(). Due to this show_material_info() and
show_windowagg_info() are also modified.
---
 src/backend/commands/explain.c | 107 ++++++++++++++++++++++++++++++---
 1 file changed, 97 insertions(+), 10 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index aaec439892..ee1bcb84e2 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -120,7 +120,8 @@ static void show_sort_group_keys(PlanState *planstate, const char *qlabel,
 								 List *ancestors, ExplainState *es);
 static void show_sortorder_options(StringInfo buf, Node *sortexpr,
 								   Oid sortOperator, Oid collation, bool nullsFirst);
-static void show_storage_info(Tuplestorestate *tupstore, ExplainState *es);
+static void show_storage_info(char *maxStorageType, int64 maxSpaceUsed,
+							  ExplainState *es);
 static void show_tablesample(TableSampleClause *tsc, PlanState *planstate,
 							 List *ancestors, ExplainState *es);
 static void show_sort_info(SortState *sortstate, ExplainState *es);
@@ -129,6 +130,11 @@ static void show_incremental_sort_info(IncrementalSortState *incrsortstate,
 static void show_hash_info(HashState *hashstate, ExplainState *es);
 static void show_material_info(MaterialState *mstate, ExplainState *es);
 static void show_windowagg_info(WindowAggState *winstate, ExplainState *es);
+static void show_ctescan_info(CteScanState *ctescanstate, ExplainState *es);
+static void show_table_func_scan_info(TableFuncScanState *tscanstate,
+									  ExplainState *es);
+static void show_recursive_union_info(RecursiveUnionState *rstate,
+									  ExplainState *es);
 static void show_memoize_info(MemoizeState *mstate, List *ancestors,
 							  ExplainState *es);
 static void show_hashagg_info(AggState *aggstate, ExplainState *es);
@@ -2046,6 +2052,8 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			if (plan->qual)
 				show_instrumentation_count("Rows Removed by Filter", 1,
 										   planstate, es);
+			if (IsA(plan, CteScan))
+				show_ctescan_info(castNode(CteScanState, planstate), es);
 			break;
 		case T_Gather:
 			{
@@ -2127,6 +2135,8 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			if (plan->qual)
 				show_instrumentation_count("Rows Removed by Filter", 1,
 										   planstate, es);
+			show_table_func_scan_info(castNode(TableFuncScanState,
+											   planstate), es);
 			break;
 		case T_TidScan:
 			{
@@ -2278,6 +2288,10 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			show_memoize_info(castNode(MemoizeState, planstate), ancestors,
 							  es);
 			break;
+		case T_RecursiveUnion:
+			show_recursive_union_info(castNode(RecursiveUnionState,
+											   planstate), es);
+			break;
 		default:
 			break;
 	}
@@ -2901,14 +2915,9 @@ show_sortorder_options(StringInfo buf, Node *sortexpr,
  * Show information on storage method and maximum memory/disk space used.
  */
 static void
-show_storage_info(Tuplestorestate *tupstore, ExplainState *es)
+show_storage_info(char *maxStorageType, int64 maxSpaceUsed, ExplainState *es)
 {
-	char	   *maxStorageType;
-	int64		maxSpaceUsed,
-				maxSpaceUsedKB;
-
-	tuplestore_get_stats(tupstore, &maxStorageType, &maxSpaceUsed);
-	maxSpaceUsedKB = BYTES_TO_KILOBYTES(maxSpaceUsed);
+	int64		maxSpaceUsedKB = BYTES_TO_KILOBYTES(maxSpaceUsed);
 
 	if (es->format != EXPLAIN_FORMAT_TEXT)
 	{
@@ -3380,6 +3389,9 @@ show_hash_info(HashState *hashstate, ExplainState *es)
 static void
 show_material_info(MaterialState *mstate, ExplainState *es)
 {
+	char	   *maxStorageType;
+	int64		maxSpaceUsed;
+
 	Tuplestorestate *tupstore = mstate->tuplestorestate;
 
 	/*
@@ -3389,7 +3401,8 @@ show_material_info(MaterialState *mstate, ExplainState *es)
 	if (!es->analyze || tupstore == NULL)
 		return;
 
-	show_storage_info(tupstore, es);
+	tuplestore_get_stats(tupstore, &maxStorageType, &maxSpaceUsed);
+	show_storage_info(maxStorageType, maxSpaceUsed, es);
 }
 
 /*
@@ -3399,6 +3412,9 @@ show_material_info(MaterialState *mstate, ExplainState *es)
 static void
 show_windowagg_info(WindowAggState *winstate, ExplainState *es)
 {
+	char	   *maxStorageType;
+	int64		maxSpaceUsed;
+
 	Tuplestorestate *tupstore = winstate->buffer;
 
 	/*
@@ -3408,7 +3424,78 @@ show_windowagg_info(WindowAggState *winstate, ExplainState *es)
 	if (!es->analyze || tupstore == NULL)
 		return;
 
-	show_storage_info(tupstore, es);
+	tuplestore_get_stats(tupstore, &maxStorageType, &maxSpaceUsed);
+	show_storage_info(maxStorageType, maxSpaceUsed, es);
+}
+
+/*
+ * Show information on CTE Scan node, storage method and maximum memory/disk
+ * space used.
+ */
+static void
+show_ctescan_info(CteScanState *ctescanstate, ExplainState *es)
+{
+	char	   *maxStorageType;
+	int64		maxSpaceUsed;
+
+	Tuplestorestate *tupstore = ctescanstate->leader->cte_table;
+
+	if (!es->analyze || tupstore == NULL)
+		return;
+
+	tuplestore_get_stats(tupstore, &maxStorageType, &maxSpaceUsed);
+	show_storage_info(maxStorageType, maxSpaceUsed, es);
+}
+
+/*
+ * Show information on Table Function Scan node, storage method and maximum
+ * memory/disk space used.
+ */
+static void
+show_table_func_scan_info(TableFuncScanState *tscanstate, ExplainState *es)
+{
+	char	   *maxStorageType;
+	int64		maxSpaceUsed;
+
+	Tuplestorestate *tupstore = tscanstate->tupstore;
+
+	if (!es->analyze || tupstore == NULL)
+		return;
+
+	tuplestore_get_stats(tupstore, &maxStorageType, &maxSpaceUsed);
+	show_storage_info(maxStorageType, maxSpaceUsed, es);
+}
+
+/*
+ * Show information on Recursive Union node, storage method and maximum
+ * memory/disk space used.
+ */
+static void
+show_recursive_union_info(RecursiveUnionState *rstate, ExplainState *es)
+{
+	char	   *maxStorageType,
+			   *tempStorageType;
+	int64		maxSpaceUsed,
+				tempSpaceUsed;
+
+	if (!es->analyze)
+		return;
+
+	/*
+	 * Recursive union node uses two tuplestores.  We employ the storage type
+	 * from one of them which consumed more memory/disk than the other.  The
+	 * storage size is sum of the two.
+	 */
+	tuplestore_get_stats(rstate->working_table, &tempStorageType,
+						 &tempSpaceUsed);
+	tuplestore_get_stats(rstate->intermediate_table, &maxStorageType,
+						 &maxSpaceUsed);
+
+	if (tempSpaceUsed > maxSpaceUsed)
+		maxStorageType = tempStorageType;
+
+	maxSpaceUsed += tempSpaceUsed;
+	show_storage_info(maxStorageType, maxSpaceUsed, es);
 }
 
 /*
-- 
2.25.1

#52David Rowley
dgrowleyml@gmail.com
In reply to: Tatsuo Ishii (#51)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Mon, 23 Sept 2024 at 18:28, Tatsuo Ishii <ishii@postgresql.org> wrote:

I agree and made necessary changes. See attached v4 patches.

Looks good to me.

David

#53Tatsuo Ishii
ishii@postgresql.org
In reply to: David Rowley (#52)
Re: Add memory/disk usage for WindowAgg nodes in EXPLAIN

On Mon, 23 Sept 2024 at 18:28, Tatsuo Ishii <ishii@postgresql.org> wrote:

I agree and made necessary changes. See attached v4 patches.

Looks good to me.

Thank you for the review! I have pushed the patch.

Best reagards,
--
Tatsuo Ishii
SRA OSS K.K.
English: http://www.sraoss.co.jp/index_en/
Japanese:http://www.sraoss.co.jp