SupportRequestRows support function for generate_series_timestamptz
When looking at [1]/messages/by-id/CAMPYKo0FouB-HZ1k-_Ur2v+kK71q0T5icQGrp+SPbQJGq0H2Rw@mail.gmail.com, I noticed that we don't have a prosupport
function for the timestamp version of generate_series.
We have this for the integer versions of generate_series(), per:
postgres=# explain analyze select * from generate_series(1, 256, 2);
QUERY PLAN
------------------------------------------------------------------------------------------------------------------
Function Scan on generate_series (cost=0.00..1.28 rows=128 width=4)
(actual time=0.142..0.183 rows=128 loops=1)
The timestamp version just gives the default 1000 row estimate:
postgres=# explain analyze select * from generate_series('2024-01-01',
'2025-01-01', interval '1 day');
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------
Function Scan on generate_series (cost=0.00..10.00 rows=1000
width=8) (actual time=0.604..0.718 rows=367 loops=1)
I had some spare time today, so wrote a patch, which gives you:
postgres=# explain analyze select * from generate_series('2024-01-01',
'2025-01-01', interval '1 day');
QUERY PLAN
------------------------------------------------------------------------------------------------------------------
Function Scan on generate_series (cost=0.00..3.67 rows=367 width=8)
(actual time=0.258..0.291 rows=367 loops=1)
This required a bit of hackery to not have timestamp_mi() error out in
the planner when the timestamp difference calculation overflows. I
considered adding ereturn support to fix that, but that felt like
opening Pandora's box. Instead, I added some pre-checks similar to
what's in timestamp_mi() to have the support function fall back on the
1000 row estimate when there will be an overflow.
Also, there's no interval_div, so the patch has a macro that converts
interval to microseconds and does floating point division. I think
that's good enough for row estimations.
I'll park this here until July CF.
(I understand this doesn't help the case in [1]/messages/by-id/CAMPYKo0FouB-HZ1k-_Ur2v+kK71q0T5icQGrp+SPbQJGq0H2Rw@mail.gmail.com as the generate_series
inputs are not const there)
David
[1]: /messages/by-id/CAMPYKo0FouB-HZ1k-_Ur2v+kK71q0T5icQGrp+SPbQJGq0H2Rw@mail.gmail.com
Attachments:
v1-0001-Add-support-function-for-generate_series-for-time.patchtext/plain; charset=US-ASCII; name=v1-0001-Add-support-function-for-generate_series-for-time.patchDownload
From ca0e982215d1335d015a2b515f038b7186935af0 Mon Sep 17 00:00:00 2001
From: David Rowley <dgrowley@gmail.com>
Date: Sun, 14 Apr 2024 14:49:39 +1200
Subject: [PATCH v1] Add support function for generate_series for timestamps
This provides the planner with row estimates for
generate_series(TIMESTAMP, TIMESTAMP, INTERVAL) and
generate_series(TIMESTAMPTZ, TIMESTAMPTZ, INTERVAL)
---
src/backend/utils/adt/timestamp.c | 83 +++++++++++++++++++++++++++++++
src/include/catalog/pg_proc.dat | 9 +++-
2 files changed, 90 insertions(+), 2 deletions(-)
diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index 963f2ec74a..25f2680243 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -27,6 +27,7 @@
#include "funcapi.h"
#include "libpq/pqformat.h"
#include "miscadmin.h"
+#include "optimizer/optimizer.h"
#include "nodes/nodeFuncs.h"
#include "nodes/supportnodes.h"
#include "parser/scansup.h"
@@ -6668,6 +6669,88 @@ generate_series_timestamptz_at_zone(PG_FUNCTION_ARGS)
return generate_series_timestamptz_internal(fcinfo);
}
+/*
+ * Planner support function for generate_series(timestamp, timestamp, interval)
+ */
+Datum
+generate_series_timestamp_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+ Node *ret = NULL;
+
+ if (IsA(rawreq, SupportRequestRows))
+ {
+ /* Try to estimate the number of rows returned */
+ SupportRequestRows *req = (SupportRequestRows *) rawreq;
+
+ if (is_funcclause(req->node)) /* be paranoid */
+ {
+ List *args = ((FuncExpr *) req->node)->args;
+ Node *arg1, *arg2, *arg3;
+
+ /* We can use estimated argument values here */
+ arg1 = estimate_expression_value(req->root, linitial(args));
+ arg2 = estimate_expression_value(req->root, lsecond(args));
+ arg3 = estimate_expression_value(req->root, lthird(args));
+
+ /*
+ * If any argument is constant NULL, we can safely assume that
+ * zero rows are returned. Otherwise, if they're all non-NULL
+ * constants, we can calculate the number of rows that will be
+ * returned.
+ */
+ if ((IsA(arg1, Const) && ((Const *) arg1)->constisnull) ||
+ (IsA(arg2, Const) && ((Const *) arg2)->constisnull) ||
+ (IsA(arg3, Const) && ((Const *) arg3)->constisnull))
+ {
+ req->rows = 0;
+ ret = (Node *) req;
+ }
+ else if (IsA(arg1, Const) && IsA(arg2, Const) && IsA(arg3, Const))
+ {
+ Timestamp start, finish;
+ Interval *step;
+ Datum diff;
+ double dstep;
+ int64 dummy;
+
+ start = DatumGetTimestamp(((Const *) arg1)->constvalue);
+ finish = DatumGetTimestamp(((Const *) arg2)->constvalue);
+ step = DatumGetIntervalP(((Const *) arg3)->constvalue);
+
+ /*
+ * Protect against overflows in timestamp_mi. XXX convert to
+ * ereturn one day?
+ */
+ if (!TIMESTAMP_NOT_FINITE(start) && !TIMESTAMP_NOT_FINITE(finish) &&
+ !pg_sub_s64_overflow(finish, start, &dummy))
+ {
+ diff = DirectFunctionCall2(timestamp_mi,
+ TimestampGetDatum(finish),
+ TimestampGetDatum(start));
+
+#define INTERVAL_TO_MICROSECONDS(i) ((((double) (i)->month * DAYS_PER_MONTH + (i)->day)) * USECS_PER_DAY + (i)->time)
+
+ dstep = INTERVAL_TO_MICROSECONDS(step);
+
+ if (dstep != 0.0)
+ {
+ Interval *idiff = DatumGetIntervalP(diff);
+ double ddiff = INTERVAL_TO_MICROSECONDS(idiff);
+
+ req->rows = floor(ddiff / dstep + 1.0);
+ ret = (Node *) req;
+ }
+#undef INTERVAL_TO_MICROSECONDS
+ }
+ }
+ }
+ }
+
+ PG_RETURN_POINTER(ret);
+}
+
+
/* timestamp_at_local()
* timestamptz_at_local()
*
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 134e3b22fd..279ff2384b 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -8328,15 +8328,20 @@
prorettype => 'timestamp', proargtypes => 'timestamp timestamp interval',
prosrc => 'generate_series_timestamp' },
{ oid => '939', descr => 'non-persistent series generator',
- proname => 'generate_series', prorows => '1000', proretset => 't',
+ proname => 'generate_series', prorows => '1000',
+ prosupport => 'generate_series_timestamp_support', proretset => 't',
provolatile => 's', prorettype => 'timestamptz',
proargtypes => 'timestamptz timestamptz interval',
prosrc => 'generate_series_timestamptz' },
{ oid => '6274', descr => 'non-persistent series generator',
- proname => 'generate_series', prorows => '1000', proretset => 't',
+ proname => 'generate_series', prorows => '1000',
+ prosupport => 'generate_series_timestamp_support', proretset => 't',
prorettype => 'timestamptz',
proargtypes => 'timestamptz timestamptz interval text',
prosrc => 'generate_series_timestamptz_at_zone' },
+{ oid => '8402', descr => 'planner support for generate_series',
+ proname => 'generate_series_timestamp_support', prorettype => 'internal',
+ proargtypes => 'internal', prosrc => 'generate_series_timestamp_support' },
# boolean aggregates
{ oid => '2515', descr => 'aggregate transition function',
--
2.40.1.windows.1
On Sun, 14 Apr 2024 at 15:14, David Rowley <dgrowleyml@gmail.com> wrote:
I had some spare time today, so wrote a patch, which gives you:
postgres=# explain analyze select * from generate_series('2024-01-01',
'2025-01-01', interval '1 day');
QUERY PLAN
------------------------------------------------------------------------------------------------------------------
Function Scan on generate_series (cost=0.00..3.67 rows=367 width=8)
(actual time=0.258..0.291 rows=367 loops=1)
Here's v2 of the patch with some added regression tests.
I did this by writing a plpgsql function named explain_mask_costs()
which has various boolean parameters to mask out the various portions
of the costs. I wondered if this function should live somewhere else
as it seems applicable to more than just misc_functions.sql. Maybe
test_setup.sql. I'll leave where it is for now unless anyone thinks
differently.
This is a fairly simple and seemingly non-controversial patch. I plan
to push it in the next few days unless there's some feedback before
then.
David
Attachments:
v2-0001-Add-support-function-for-generate_series-for-time.patchapplication/octet-stream; name=v2-0001-Add-support-function-for-generate_series-for-time.patchDownload
From e7b12bf6752cfd296d2365760cdad476752367cb Mon Sep 17 00:00:00 2001
From: David Rowley <dgrowley@gmail.com>
Date: Sun, 14 Apr 2024 14:49:39 +1200
Subject: [PATCH v2] Add support function for generate_series for timestamps
This provides the planner with row estimates for
generate_series(TIMESTAMP, TIMESTAMP, INTERVAL) and
generate_series(TIMESTAMPTZ, TIMESTAMPTZ, INTERVAL)
---
src/backend/utils/adt/timestamp.c | 87 ++++++++++++++++++
src/include/catalog/pg_proc.dat | 9 +-
src/test/regress/expected/misc_functions.out | 92 ++++++++++++++++++++
src/test/regress/sql/misc_functions.sql | 74 ++++++++++++++++
4 files changed, 260 insertions(+), 2 deletions(-)
diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index cdc7e43b93..29bdba92a5 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -27,6 +27,7 @@
#include "funcapi.h"
#include "libpq/pqformat.h"
#include "miscadmin.h"
+#include "optimizer/optimizer.h"
#include "nodes/nodeFuncs.h"
#include "nodes/supportnodes.h"
#include "parser/scansup.h"
@@ -6680,6 +6681,92 @@ generate_series_timestamptz_at_zone(PG_FUNCTION_ARGS)
return generate_series_timestamptz_internal(fcinfo);
}
+/*
+ * Planner support function for generate_series(timestamp, timestamp, interval)
+ */
+Datum
+generate_series_timestamp_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+ Node *ret = NULL;
+
+ if (IsA(rawreq, SupportRequestRows))
+ {
+ /* Try to estimate the number of rows returned */
+ SupportRequestRows *req = (SupportRequestRows *) rawreq;
+
+ if (is_funcclause(req->node)) /* be paranoid */
+ {
+ List *args = ((FuncExpr *) req->node)->args;
+ Node *arg1,
+ *arg2,
+ *arg3;
+
+ /* We can use estimated argument values here */
+ arg1 = estimate_expression_value(req->root, linitial(args));
+ arg2 = estimate_expression_value(req->root, lsecond(args));
+ arg3 = estimate_expression_value(req->root, lthird(args));
+
+ /*
+ * If any argument is constant NULL, we can safely assume that
+ * zero rows are returned. Otherwise, if they're all non-NULL
+ * constants, we can calculate the number of rows that will be
+ * returned.
+ */
+ if ((IsA(arg1, Const) && ((Const *) arg1)->constisnull) ||
+ (IsA(arg2, Const) && ((Const *) arg2)->constisnull) ||
+ (IsA(arg3, Const) && ((Const *) arg3)->constisnull))
+ {
+ req->rows = 0;
+ ret = (Node *) req;
+ }
+ else if (IsA(arg1, Const) && IsA(arg2, Const) && IsA(arg3, Const))
+ {
+ Timestamp start,
+ finish;
+ Interval *step;
+ Datum diff;
+ double dstep;
+ int64 dummy;
+
+ start = DatumGetTimestamp(((Const *) arg1)->constvalue);
+ finish = DatumGetTimestamp(((Const *) arg2)->constvalue);
+ step = DatumGetIntervalP(((Const *) arg3)->constvalue);
+
+ /*
+ * Protect against overflows in timestamp_mi. XXX convert to
+ * ereturn one day?
+ */
+ if (!TIMESTAMP_NOT_FINITE(start) && !TIMESTAMP_NOT_FINITE(finish) &&
+ !pg_sub_s64_overflow(finish, start, &dummy))
+ {
+ diff = DirectFunctionCall2(timestamp_mi,
+ TimestampGetDatum(finish),
+ TimestampGetDatum(start));
+
+#define INTERVAL_TO_MICROSECONDS(i) ((((double) (i)->month * DAYS_PER_MONTH + (i)->day)) * USECS_PER_DAY + (i)->time)
+
+ dstep = INTERVAL_TO_MICROSECONDS(step);
+
+ /* This equation works for either sign of step */
+ if (dstep != 0.0)
+ {
+ Interval *idiff = DatumGetIntervalP(diff);
+ double ddiff = INTERVAL_TO_MICROSECONDS(idiff);
+
+ req->rows = floor(ddiff / dstep + 1.0);
+ ret = (Node *) req;
+ }
+#undef INTERVAL_TO_MICROSECONDS
+ }
+ }
+ }
+ }
+
+ PG_RETURN_POINTER(ret);
+}
+
+
/* timestamp_at_local()
* timestamptz_at_local()
*
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index e1001a4822..05ef765751 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -8335,15 +8335,20 @@
prorettype => 'timestamp', proargtypes => 'timestamp timestamp interval',
prosrc => 'generate_series_timestamp' },
{ oid => '939', descr => 'non-persistent series generator',
- proname => 'generate_series', prorows => '1000', proretset => 't',
+ proname => 'generate_series', prorows => '1000',
+ prosupport => 'generate_series_timestamp_support', proretset => 't',
provolatile => 's', prorettype => 'timestamptz',
proargtypes => 'timestamptz timestamptz interval',
prosrc => 'generate_series_timestamptz' },
{ oid => '6274', descr => 'non-persistent series generator',
- proname => 'generate_series', prorows => '1000', proretset => 't',
+ proname => 'generate_series', prorows => '1000',
+ prosupport => 'generate_series_timestamp_support', proretset => 't',
prorettype => 'timestamptz',
proargtypes => 'timestamptz timestamptz interval text',
prosrc => 'generate_series_timestamptz_at_zone' },
+{ oid => '8402', descr => 'planner support for generate_series',
+ proname => 'generate_series_timestamp_support', prorettype => 'internal',
+ proargtypes => 'internal', prosrc => 'generate_series_timestamp_support' },
# boolean aggregates
{ oid => '2515', descr => 'aggregate transition function',
diff --git a/src/test/regress/expected/misc_functions.out b/src/test/regress/expected/misc_functions.out
index d94056862a..def81d88d7 100644
--- a/src/test/regress/expected/misc_functions.out
+++ b/src/test/regress/expected/misc_functions.out
@@ -2,6 +2,43 @@
\getenv libdir PG_LIBDIR
\getenv dlsuffix PG_DLSUFFIX
\set regresslib :libdir '/regress' :dlsuffix
+-- Function to assist with verifying EXPLAIN which includes costs. A series
+-- of bool flags allows control over which portions are masked out
+CREATE FUNCTION explain_mask_costs(query text, do_analyze bool,
+ hide_costs bool, hide_row_est bool, hide_width bool) RETURNS setof text
+LANGUAGE plpgsql AS
+$$
+DECLARE
+ ln text;
+ analyze_str text;
+BEGIN
+ IF do_analyze = true THEN
+ analyze_str := 'on';
+ ELSE
+ analyze_str := 'off';
+ END IF;
+
+ FOR ln IN
+ EXECUTE format('explain (analyze %s, costs on, summary off, timing off) %s',
+ analyze_str, query)
+ LOOP
+ IF hide_costs = true THEN
+ ln := regexp_replace(ln, 'cost=\d+\.\d\d\.\.\d+\.\d\d', 'cost=N..N');
+ END IF;
+
+ IF hide_row_est = true THEN
+ -- don't use 'g' so that we leave the actual rows intact
+ ln := regexp_replace(ln, 'rows=\d+', 'rows=N');
+ END IF;
+
+ IF hide_width = true THEN
+ ln := regexp_replace(ln, 'width=\d+', 'width=N');
+ END IF;
+
+ RETURN NEXT ln;
+ END LOOP;
+END;
+$$;
--
-- num_nulls()
--
@@ -594,6 +631,60 @@ SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g;
Index Cond: (unique1 = g.g)
(4 rows)
+--
+-- Test the SupportRequestRows support function for generate_series_timestamp()
+--
+-- Ensure the row estimate matches the actual rows
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series('2024-02-01', '2024-03-01', INTERVAL '1 day') g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+------------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30 loops=1)
+(1 row)
+
+-- Ensure the estimated and actual row counts match when the range isn't
+-- evenly divisible by the step
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series('2024-02-01', '2024-03-01', INTERVAL '7 day') g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+----------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=5 width=N) (actual rows=5 loops=1)
+(1 row)
+
+-- Ensure the estimates match when step is decreasing
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series('2024-03-01', '2024-02-01', INTERVAL '-1 day') g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+------------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30 loops=1)
+(1 row)
+
+-- Ensure an empty range estimates 1 row
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series('2024-03-01', '2024-02-01', INTERVAL '1 day') g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+----------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=1 width=N) (actual rows=0 loops=1)
+(1 row)
+
+-- Ensure we get the default row estimate for infinity values
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series('-infinity', 'infinity', INTERVAL '1 day') g(s);$$,
+false, true, false, true);
+ explain_mask_costs
+-------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=1000 width=N)
+(1 row)
+
+-- Ensure the row estimate behaves correctly when step size is zero.
+-- We expect generate_series_timestamp() to throw the error rather than in
+-- the support function.
+SELECT * FROM generate_series('2024-02-01', '2024-03-01', INTERVAL '0 day') g(s);
+ERROR: step size cannot equal zero
-- Test functions for control data
SELECT count(*) > 0 AS ok FROM pg_control_checkpoint();
ok
@@ -706,3 +797,4 @@ SELECT pg_column_toast_chunk_id(a) IS NULL,
(1 row)
DROP TABLE test_chunk_id;
+DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool);
diff --git a/src/test/regress/sql/misc_functions.sql b/src/test/regress/sql/misc_functions.sql
index 76470fcb3f..48e5911176 100644
--- a/src/test/regress/sql/misc_functions.sql
+++ b/src/test/regress/sql/misc_functions.sql
@@ -4,6 +4,44 @@
\set regresslib :libdir '/regress' :dlsuffix
+-- Function to assist with verifying EXPLAIN which includes costs. A series
+-- of bool flags allows control over which portions are masked out
+CREATE FUNCTION explain_mask_costs(query text, do_analyze bool,
+ hide_costs bool, hide_row_est bool, hide_width bool) RETURNS setof text
+LANGUAGE plpgsql AS
+$$
+DECLARE
+ ln text;
+ analyze_str text;
+BEGIN
+ IF do_analyze = true THEN
+ analyze_str := 'on';
+ ELSE
+ analyze_str := 'off';
+ END IF;
+
+ FOR ln IN
+ EXECUTE format('explain (analyze %s, costs on, summary off, timing off) %s',
+ analyze_str, query)
+ LOOP
+ IF hide_costs = true THEN
+ ln := regexp_replace(ln, 'cost=\d+\.\d\d\.\.\d+\.\d\d', 'cost=N..N');
+ END IF;
+
+ IF hide_row_est = true THEN
+ -- don't use 'g' so that we leave the actual rows intact
+ ln := regexp_replace(ln, 'rows=\d+', 'rows=N');
+ END IF;
+
+ IF hide_width = true THEN
+ ln := regexp_replace(ln, 'width=\d+', 'width=N');
+ END IF;
+
+ RETURN NEXT ln;
+ END LOOP;
+END;
+$$;
+
--
-- num_nulls()
--
@@ -224,6 +262,41 @@ SELECT * FROM tenk1 a JOIN my_gen_series(1,1000) g ON a.unique1 = g;
EXPLAIN (COSTS OFF)
SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g;
+--
+-- Test the SupportRequestRows support function for generate_series_timestamp()
+--
+
+-- Ensure the row estimate matches the actual rows
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series('2024-02-01', '2024-03-01', INTERVAL '1 day') g(s);$$,
+true, true, false, true);
+
+-- Ensure the estimated and actual row counts match when the range isn't
+-- evenly divisible by the step
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series('2024-02-01', '2024-03-01', INTERVAL '7 day') g(s);$$,
+true, true, false, true);
+
+-- Ensure the estimates match when step is decreasing
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series('2024-03-01', '2024-02-01', INTERVAL '-1 day') g(s);$$,
+true, true, false, true);
+
+-- Ensure an empty range estimates 1 row
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series('2024-03-01', '2024-02-01', INTERVAL '1 day') g(s);$$,
+true, true, false, true);
+
+-- Ensure we get the default row estimate for infinity values
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series('-infinity', 'infinity', INTERVAL '1 day') g(s);$$,
+false, true, false, true);
+
+-- Ensure the row estimate behaves correctly when step size is zero.
+-- We expect generate_series_timestamp() to throw the error rather than in
+-- the support function.
+SELECT * FROM generate_series('2024-02-01', '2024-03-01', INTERVAL '0 day') g(s);
+
-- Test functions for control data
SELECT count(*) > 0 AS ok FROM pg_control_checkpoint();
SELECT count(*) > 0 AS ok FROM pg_control_init();
@@ -273,3 +346,4 @@ SELECT pg_column_toast_chunk_id(a) IS NULL,
pg_column_toast_chunk_id(b) IN (SELECT chunk_id FROM pg_toast.:toastrel)
FROM test_chunk_id;
DROP TABLE test_chunk_id;
+DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool);
--
2.34.1
looks good to me.
some minor questions:
/*
* Protect against overflows in timestamp_mi. XXX convert to
* ereturn one day?
*/
if (!TIMESTAMP_NOT_FINITE(start) && !TIMESTAMP_NOT_FINITE(finish) &&
!pg_sub_s64_overflow(finish, start, &dummy))
i don't understand the comment "XXX convert to ereturn one day?".
do we need to add unlikely for "pg_sub_s64_overflow", i saw most of
pg_sub_s64_overflow have unlikely.
On Mon, 8 Jul 2024 at 14:50, jian he <jian.universality@gmail.com> wrote:
looks good to me.
Thanks for looking.
/*
* Protect against overflows in timestamp_mi. XXX convert to
* ereturn one day?
*/
if (!TIMESTAMP_NOT_FINITE(start) && !TIMESTAMP_NOT_FINITE(finish) &&
!pg_sub_s64_overflow(finish, start, &dummy))i don't understand the comment "XXX convert to ereturn one day?".
The problem I'm trying to work around there is that timestamp_mi
raises an ERROR if there's an overflow. I don't want the support
function to cause an ERROR so I'm trying to only call timestamp_mi in
cases where it won't error. The ereturn mention is a reference to
ERROR raising infrastructure added by d9f7f5d32 and so far only used
by input functions. It would be possible to use that to save from
having to do the pg_sub_s64_overflow(). Instead, we could check if
any errors were found and only proceed with the remaining part of the
calculation if none were found.
I've tried to improve the comment in the attached version. I removed
the reference to ereturn.
do we need to add unlikely for "pg_sub_s64_overflow", i saw most of
pg_sub_s64_overflow have unlikely.
I was hoping the condition would be likely() rather than unlikely().
However, I didn't consider that the code path was hot enough for it to
matter. It's just a function we call once during planning if we find a
call to generate_series_timestamp(). It's not like it's called a
million or a billion times during execution like a function such as
int4pl() could be.
David
Attachments:
v3-0001-Add-support-function-for-generate_series-for-time.patchapplication/octet-stream; name=v3-0001-Add-support-function-for-generate_series-for-time.patchDownload
From 1ec7cc565e85e01bfc3cb2a8bc1f7ad4c542fb82 Mon Sep 17 00:00:00 2001
From: David Rowley <dgrowley@gmail.com>
Date: Sun, 14 Apr 2024 14:49:39 +1200
Subject: [PATCH v3] Add support function for generate_series for timestamps
This provides the planner with row estimates for
generate_series(TIMESTAMP, TIMESTAMP, INTERVAL) and
generate_series(TIMESTAMPTZ, TIMESTAMPTZ, INTERVAL)
---
src/backend/utils/adt/timestamp.c | 88 +++++++++++++++++++
src/include/catalog/pg_proc.dat | 9 +-
src/test/regress/expected/misc_functions.out | 92 ++++++++++++++++++++
src/test/regress/sql/misc_functions.sql | 74 ++++++++++++++++
4 files changed, 261 insertions(+), 2 deletions(-)
diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index cdc7e43b93..69fe7860ed 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -27,6 +27,7 @@
#include "funcapi.h"
#include "libpq/pqformat.h"
#include "miscadmin.h"
+#include "optimizer/optimizer.h"
#include "nodes/nodeFuncs.h"
#include "nodes/supportnodes.h"
#include "parser/scansup.h"
@@ -6680,6 +6681,93 @@ generate_series_timestamptz_at_zone(PG_FUNCTION_ARGS)
return generate_series_timestamptz_internal(fcinfo);
}
+/*
+ * Planner support function for generate_series(timestamp, timestamp, interval)
+ */
+Datum
+generate_series_timestamp_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+ Node *ret = NULL;
+
+ if (IsA(rawreq, SupportRequestRows))
+ {
+ /* Try to estimate the number of rows returned */
+ SupportRequestRows *req = (SupportRequestRows *) rawreq;
+
+ if (is_funcclause(req->node)) /* be paranoid */
+ {
+ List *args = ((FuncExpr *) req->node)->args;
+ Node *arg1,
+ *arg2,
+ *arg3;
+
+ /* We can use estimated argument values here */
+ arg1 = estimate_expression_value(req->root, linitial(args));
+ arg2 = estimate_expression_value(req->root, lsecond(args));
+ arg3 = estimate_expression_value(req->root, lthird(args));
+
+ /*
+ * If any argument is constant NULL, we can safely assume that
+ * zero rows are returned. Otherwise, if they're all non-NULL
+ * constants, we can calculate the number of rows that will be
+ * returned.
+ */
+ if ((IsA(arg1, Const) && ((Const *) arg1)->constisnull) ||
+ (IsA(arg2, Const) && ((Const *) arg2)->constisnull) ||
+ (IsA(arg3, Const) && ((Const *) arg3)->constisnull))
+ {
+ req->rows = 0;
+ ret = (Node *) req;
+ }
+ else if (IsA(arg1, Const) && IsA(arg2, Const) && IsA(arg3, Const))
+ {
+ Timestamp start,
+ finish;
+ Interval *step;
+ Datum diff;
+ double dstep;
+ int64 dummy;
+
+ start = DatumGetTimestamp(((Const *) arg1)->constvalue);
+ finish = DatumGetTimestamp(((Const *) arg2)->constvalue);
+ step = DatumGetIntervalP(((Const *) arg3)->constvalue);
+
+ /*
+ * Perform some prechecks which could cause timestamp_mi to
+ * raise an ERROR. It's much better to just return some
+ * default estimate than error out in a support function.
+ */
+ if (!TIMESTAMP_NOT_FINITE(start) && !TIMESTAMP_NOT_FINITE(finish) &&
+ !pg_sub_s64_overflow(finish, start, &dummy))
+ {
+ diff = DirectFunctionCall2(timestamp_mi,
+ TimestampGetDatum(finish),
+ TimestampGetDatum(start));
+
+#define INTERVAL_TO_MICROSECONDS(i) ((((double) (i)->month * DAYS_PER_MONTH + (i)->day)) * USECS_PER_DAY + (i)->time)
+
+ dstep = INTERVAL_TO_MICROSECONDS(step);
+
+ /* This equation works for either sign of step */
+ if (dstep != 0.0)
+ {
+ Interval *idiff = DatumGetIntervalP(diff);
+ double ddiff = INTERVAL_TO_MICROSECONDS(idiff);
+
+ req->rows = floor(ddiff / dstep + 1.0);
+ ret = (Node *) req;
+ }
+#undef INTERVAL_TO_MICROSECONDS
+ }
+ }
+ }
+ }
+
+ PG_RETURN_POINTER(ret);
+}
+
+
/* timestamp_at_local()
* timestamptz_at_local()
*
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index e1001a4822..05ef765751 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -8335,15 +8335,20 @@
prorettype => 'timestamp', proargtypes => 'timestamp timestamp interval',
prosrc => 'generate_series_timestamp' },
{ oid => '939', descr => 'non-persistent series generator',
- proname => 'generate_series', prorows => '1000', proretset => 't',
+ proname => 'generate_series', prorows => '1000',
+ prosupport => 'generate_series_timestamp_support', proretset => 't',
provolatile => 's', prorettype => 'timestamptz',
proargtypes => 'timestamptz timestamptz interval',
prosrc => 'generate_series_timestamptz' },
{ oid => '6274', descr => 'non-persistent series generator',
- proname => 'generate_series', prorows => '1000', proretset => 't',
+ proname => 'generate_series', prorows => '1000',
+ prosupport => 'generate_series_timestamp_support', proretset => 't',
prorettype => 'timestamptz',
proargtypes => 'timestamptz timestamptz interval text',
prosrc => 'generate_series_timestamptz_at_zone' },
+{ oid => '8402', descr => 'planner support for generate_series',
+ proname => 'generate_series_timestamp_support', prorettype => 'internal',
+ proargtypes => 'internal', prosrc => 'generate_series_timestamp_support' },
# boolean aggregates
{ oid => '2515', descr => 'aggregate transition function',
diff --git a/src/test/regress/expected/misc_functions.out b/src/test/regress/expected/misc_functions.out
index d94056862a..def81d88d7 100644
--- a/src/test/regress/expected/misc_functions.out
+++ b/src/test/regress/expected/misc_functions.out
@@ -2,6 +2,43 @@
\getenv libdir PG_LIBDIR
\getenv dlsuffix PG_DLSUFFIX
\set regresslib :libdir '/regress' :dlsuffix
+-- Function to assist with verifying EXPLAIN which includes costs. A series
+-- of bool flags allows control over which portions are masked out
+CREATE FUNCTION explain_mask_costs(query text, do_analyze bool,
+ hide_costs bool, hide_row_est bool, hide_width bool) RETURNS setof text
+LANGUAGE plpgsql AS
+$$
+DECLARE
+ ln text;
+ analyze_str text;
+BEGIN
+ IF do_analyze = true THEN
+ analyze_str := 'on';
+ ELSE
+ analyze_str := 'off';
+ END IF;
+
+ FOR ln IN
+ EXECUTE format('explain (analyze %s, costs on, summary off, timing off) %s',
+ analyze_str, query)
+ LOOP
+ IF hide_costs = true THEN
+ ln := regexp_replace(ln, 'cost=\d+\.\d\d\.\.\d+\.\d\d', 'cost=N..N');
+ END IF;
+
+ IF hide_row_est = true THEN
+ -- don't use 'g' so that we leave the actual rows intact
+ ln := regexp_replace(ln, 'rows=\d+', 'rows=N');
+ END IF;
+
+ IF hide_width = true THEN
+ ln := regexp_replace(ln, 'width=\d+', 'width=N');
+ END IF;
+
+ RETURN NEXT ln;
+ END LOOP;
+END;
+$$;
--
-- num_nulls()
--
@@ -594,6 +631,60 @@ SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g;
Index Cond: (unique1 = g.g)
(4 rows)
+--
+-- Test the SupportRequestRows support function for generate_series_timestamp()
+--
+-- Ensure the row estimate matches the actual rows
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series('2024-02-01', '2024-03-01', INTERVAL '1 day') g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+------------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30 loops=1)
+(1 row)
+
+-- Ensure the estimated and actual row counts match when the range isn't
+-- evenly divisible by the step
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series('2024-02-01', '2024-03-01', INTERVAL '7 day') g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+----------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=5 width=N) (actual rows=5 loops=1)
+(1 row)
+
+-- Ensure the estimates match when step is decreasing
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series('2024-03-01', '2024-02-01', INTERVAL '-1 day') g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+------------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30 loops=1)
+(1 row)
+
+-- Ensure an empty range estimates 1 row
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series('2024-03-01', '2024-02-01', INTERVAL '1 day') g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+----------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=1 width=N) (actual rows=0 loops=1)
+(1 row)
+
+-- Ensure we get the default row estimate for infinity values
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series('-infinity', 'infinity', INTERVAL '1 day') g(s);$$,
+false, true, false, true);
+ explain_mask_costs
+-------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=1000 width=N)
+(1 row)
+
+-- Ensure the row estimate behaves correctly when step size is zero.
+-- We expect generate_series_timestamp() to throw the error rather than in
+-- the support function.
+SELECT * FROM generate_series('2024-02-01', '2024-03-01', INTERVAL '0 day') g(s);
+ERROR: step size cannot equal zero
-- Test functions for control data
SELECT count(*) > 0 AS ok FROM pg_control_checkpoint();
ok
@@ -706,3 +797,4 @@ SELECT pg_column_toast_chunk_id(a) IS NULL,
(1 row)
DROP TABLE test_chunk_id;
+DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool);
diff --git a/src/test/regress/sql/misc_functions.sql b/src/test/regress/sql/misc_functions.sql
index 76470fcb3f..48e5911176 100644
--- a/src/test/regress/sql/misc_functions.sql
+++ b/src/test/regress/sql/misc_functions.sql
@@ -4,6 +4,44 @@
\set regresslib :libdir '/regress' :dlsuffix
+-- Function to assist with verifying EXPLAIN which includes costs. A series
+-- of bool flags allows control over which portions are masked out
+CREATE FUNCTION explain_mask_costs(query text, do_analyze bool,
+ hide_costs bool, hide_row_est bool, hide_width bool) RETURNS setof text
+LANGUAGE plpgsql AS
+$$
+DECLARE
+ ln text;
+ analyze_str text;
+BEGIN
+ IF do_analyze = true THEN
+ analyze_str := 'on';
+ ELSE
+ analyze_str := 'off';
+ END IF;
+
+ FOR ln IN
+ EXECUTE format('explain (analyze %s, costs on, summary off, timing off) %s',
+ analyze_str, query)
+ LOOP
+ IF hide_costs = true THEN
+ ln := regexp_replace(ln, 'cost=\d+\.\d\d\.\.\d+\.\d\d', 'cost=N..N');
+ END IF;
+
+ IF hide_row_est = true THEN
+ -- don't use 'g' so that we leave the actual rows intact
+ ln := regexp_replace(ln, 'rows=\d+', 'rows=N');
+ END IF;
+
+ IF hide_width = true THEN
+ ln := regexp_replace(ln, 'width=\d+', 'width=N');
+ END IF;
+
+ RETURN NEXT ln;
+ END LOOP;
+END;
+$$;
+
--
-- num_nulls()
--
@@ -224,6 +262,41 @@ SELECT * FROM tenk1 a JOIN my_gen_series(1,1000) g ON a.unique1 = g;
EXPLAIN (COSTS OFF)
SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g;
+--
+-- Test the SupportRequestRows support function for generate_series_timestamp()
+--
+
+-- Ensure the row estimate matches the actual rows
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series('2024-02-01', '2024-03-01', INTERVAL '1 day') g(s);$$,
+true, true, false, true);
+
+-- Ensure the estimated and actual row counts match when the range isn't
+-- evenly divisible by the step
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series('2024-02-01', '2024-03-01', INTERVAL '7 day') g(s);$$,
+true, true, false, true);
+
+-- Ensure the estimates match when step is decreasing
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series('2024-03-01', '2024-02-01', INTERVAL '-1 day') g(s);$$,
+true, true, false, true);
+
+-- Ensure an empty range estimates 1 row
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series('2024-03-01', '2024-02-01', INTERVAL '1 day') g(s);$$,
+true, true, false, true);
+
+-- Ensure we get the default row estimate for infinity values
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series('-infinity', 'infinity', INTERVAL '1 day') g(s);$$,
+false, true, false, true);
+
+-- Ensure the row estimate behaves correctly when step size is zero.
+-- We expect generate_series_timestamp() to throw the error rather than in
+-- the support function.
+SELECT * FROM generate_series('2024-02-01', '2024-03-01', INTERVAL '0 day') g(s);
+
-- Test functions for control data
SELECT count(*) > 0 AS ok FROM pg_control_checkpoint();
SELECT count(*) > 0 AS ok FROM pg_control_init();
@@ -273,3 +346,4 @@ SELECT pg_column_toast_chunk_id(a) IS NULL,
pg_column_toast_chunk_id(b) IN (SELECT chunk_id FROM pg_toast.:toastrel)
FROM test_chunk_id;
DROP TABLE test_chunk_id;
+DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool);
--
2.34.1
On Mon, Jul 8, 2024 at 12:02 PM David Rowley <dgrowleyml@gmail.com> wrote:
/*
* Protect against overflows in timestamp_mi. XXX convert to
* ereturn one day?
*/
if (!TIMESTAMP_NOT_FINITE(start) && !TIMESTAMP_NOT_FINITE(finish) &&
!pg_sub_s64_overflow(finish, start, &dummy))i don't understand the comment "XXX convert to ereturn one day?".
The problem I'm trying to work around there is that timestamp_mi
raises an ERROR if there's an overflow. I don't want the support
function to cause an ERROR so I'm trying to only call timestamp_mi in
cases where it won't error. The ereturn mention is a reference to
ERROR raising infrastructure added by d9f7f5d32 and so far only used
by input functions. It would be possible to use that to save from
having to do the pg_sub_s64_overflow(). Instead, we could check if
any errors were found and only proceed with the remaining part of the
calculation if none were found.I've tried to improve the comment in the attached version. I removed
the reference to ereturn.
got it.
{ oid => '2031',
proname => 'timestamp_mi', prorettype => 'interval',
proargtypes => 'timestamp timestamp', prosrc => 'timestamp_mi' },
{ oid => '1188',
proname => 'timestamptz_mi', prorettype => 'interval',
proargtypes => 'timestamptz timestamptz', prosrc => 'timestamp_mi' },
so this also apply to
{ oid => '938', descr => 'non-persistent series generator',
proname => 'generate_series', prorows => '1000', proretset => 't',
prorettype => 'timestamp', proargtypes => 'timestamp timestamp interval',
prosrc => 'generate_series_timestamp' },
If so, then we need to update src/include/catalog/pg_proc.dat also?
On Mon, 8 Jul 2024 at 16:43, jian he <jian.universality@gmail.com> wrote:
{ oid => '2031',
proname => 'timestamp_mi', prorettype => 'interval',
proargtypes => 'timestamp timestamp', prosrc => 'timestamp_mi' },
{ oid => '1188',
proname => 'timestamptz_mi', prorettype => 'interval',
proargtypes => 'timestamptz timestamptz', prosrc => 'timestamp_mi' },
I'm not quite sure what you mean that needs to be adjusted with this.
so this also apply to
{ oid => '938', descr => 'non-persistent series generator',
proname => 'generate_series', prorows => '1000', proretset => 't',
prorettype => 'timestamp', proargtypes => 'timestamp timestamp interval',
prosrc => 'generate_series_timestamp' },If so, then we need to update src/include/catalog/pg_proc.dat also?
Oh, yeah. I missed setting the prosupport function for that one. Thanks.
I'm not sure when I realised there were 3 of these functions, but it
seems I didn't when I adjusted pg_proc.dat.
Updated patch attached.
David
Attachments:
v4-0001-Add-support-function-for-generate_series-for-time.patchapplication/octet-stream; name=v4-0001-Add-support-function-for-generate_series-for-time.patchDownload
From 2a9f969cdeb3876df5c4912fe72bd5c9704ca63b Mon Sep 17 00:00:00 2001
From: David Rowley <dgrowley@gmail.com>
Date: Sun, 14 Apr 2024 14:49:39 +1200
Subject: [PATCH v4] Add support function for generate_series for timestamps
This provides the planner with row estimates for
generate_series(TIMESTAMP, TIMESTAMP, INTERVAL),
generate_series(TIMESTAMPTZ, TIMESTAMPTZ, INTERVAL) and
generate_series(TIMESTAMPTZ, TIMESTAMPTZ, INTERVAL, TEXT).
---
src/backend/utils/adt/timestamp.c | 88 +++++++++++++++
src/include/catalog/pg_proc.dat | 12 +-
src/test/regress/expected/misc_functions.out | 110 +++++++++++++++++++
src/test/regress/sql/misc_functions.sql | 84 ++++++++++++++
4 files changed, 291 insertions(+), 3 deletions(-)
diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index cdc7e43b93..69fe7860ed 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -27,6 +27,7 @@
#include "funcapi.h"
#include "libpq/pqformat.h"
#include "miscadmin.h"
+#include "optimizer/optimizer.h"
#include "nodes/nodeFuncs.h"
#include "nodes/supportnodes.h"
#include "parser/scansup.h"
@@ -6680,6 +6681,93 @@ generate_series_timestamptz_at_zone(PG_FUNCTION_ARGS)
return generate_series_timestamptz_internal(fcinfo);
}
+/*
+ * Planner support function for generate_series(timestamp, timestamp, interval)
+ */
+Datum
+generate_series_timestamp_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+ Node *ret = NULL;
+
+ if (IsA(rawreq, SupportRequestRows))
+ {
+ /* Try to estimate the number of rows returned */
+ SupportRequestRows *req = (SupportRequestRows *) rawreq;
+
+ if (is_funcclause(req->node)) /* be paranoid */
+ {
+ List *args = ((FuncExpr *) req->node)->args;
+ Node *arg1,
+ *arg2,
+ *arg3;
+
+ /* We can use estimated argument values here */
+ arg1 = estimate_expression_value(req->root, linitial(args));
+ arg2 = estimate_expression_value(req->root, lsecond(args));
+ arg3 = estimate_expression_value(req->root, lthird(args));
+
+ /*
+ * If any argument is constant NULL, we can safely assume that
+ * zero rows are returned. Otherwise, if they're all non-NULL
+ * constants, we can calculate the number of rows that will be
+ * returned.
+ */
+ if ((IsA(arg1, Const) && ((Const *) arg1)->constisnull) ||
+ (IsA(arg2, Const) && ((Const *) arg2)->constisnull) ||
+ (IsA(arg3, Const) && ((Const *) arg3)->constisnull))
+ {
+ req->rows = 0;
+ ret = (Node *) req;
+ }
+ else if (IsA(arg1, Const) && IsA(arg2, Const) && IsA(arg3, Const))
+ {
+ Timestamp start,
+ finish;
+ Interval *step;
+ Datum diff;
+ double dstep;
+ int64 dummy;
+
+ start = DatumGetTimestamp(((Const *) arg1)->constvalue);
+ finish = DatumGetTimestamp(((Const *) arg2)->constvalue);
+ step = DatumGetIntervalP(((Const *) arg3)->constvalue);
+
+ /*
+ * Perform some prechecks which could cause timestamp_mi to
+ * raise an ERROR. It's much better to just return some
+ * default estimate than error out in a support function.
+ */
+ if (!TIMESTAMP_NOT_FINITE(start) && !TIMESTAMP_NOT_FINITE(finish) &&
+ !pg_sub_s64_overflow(finish, start, &dummy))
+ {
+ diff = DirectFunctionCall2(timestamp_mi,
+ TimestampGetDatum(finish),
+ TimestampGetDatum(start));
+
+#define INTERVAL_TO_MICROSECONDS(i) ((((double) (i)->month * DAYS_PER_MONTH + (i)->day)) * USECS_PER_DAY + (i)->time)
+
+ dstep = INTERVAL_TO_MICROSECONDS(step);
+
+ /* This equation works for either sign of step */
+ if (dstep != 0.0)
+ {
+ Interval *idiff = DatumGetIntervalP(diff);
+ double ddiff = INTERVAL_TO_MICROSECONDS(idiff);
+
+ req->rows = floor(ddiff / dstep + 1.0);
+ ret = (Node *) req;
+ }
+#undef INTERVAL_TO_MICROSECONDS
+ }
+ }
+ }
+ }
+
+ PG_RETURN_POINTER(ret);
+}
+
+
/* timestamp_at_local()
* timestamptz_at_local()
*
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index e1001a4822..4d17fccbfd 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -8331,19 +8331,25 @@
prorettype => 'numeric', proargtypes => 'numeric numeric',
prosrc => 'generate_series_numeric' },
{ oid => '938', descr => 'non-persistent series generator',
- proname => 'generate_series', prorows => '1000', proretset => 't',
+ proname => 'generate_series', prorows => '1000',
+ prosupport => 'generate_series_timestamp_support', proretset => 't',
prorettype => 'timestamp', proargtypes => 'timestamp timestamp interval',
prosrc => 'generate_series_timestamp' },
{ oid => '939', descr => 'non-persistent series generator',
- proname => 'generate_series', prorows => '1000', proretset => 't',
+ proname => 'generate_series', prorows => '1000',
+ prosupport => 'generate_series_timestamp_support', proretset => 't',
provolatile => 's', prorettype => 'timestamptz',
proargtypes => 'timestamptz timestamptz interval',
prosrc => 'generate_series_timestamptz' },
{ oid => '6274', descr => 'non-persistent series generator',
- proname => 'generate_series', prorows => '1000', proretset => 't',
+ proname => 'generate_series', prorows => '1000',
+ prosupport => 'generate_series_timestamp_support', proretset => 't',
prorettype => 'timestamptz',
proargtypes => 'timestamptz timestamptz interval text',
prosrc => 'generate_series_timestamptz_at_zone' },
+{ oid => '8402', descr => 'planner support for generate_series',
+ proname => 'generate_series_timestamp_support', prorettype => 'internal',
+ proargtypes => 'internal', prosrc => 'generate_series_timestamp_support' },
# boolean aggregates
{ oid => '2515', descr => 'aggregate transition function',
diff --git a/src/test/regress/expected/misc_functions.out b/src/test/regress/expected/misc_functions.out
index d94056862a..78c91eff01 100644
--- a/src/test/regress/expected/misc_functions.out
+++ b/src/test/regress/expected/misc_functions.out
@@ -2,6 +2,43 @@
\getenv libdir PG_LIBDIR
\getenv dlsuffix PG_DLSUFFIX
\set regresslib :libdir '/regress' :dlsuffix
+-- Function to assist with verifying EXPLAIN which includes costs. A series
+-- of bool flags allows control over which portions are masked out
+CREATE FUNCTION explain_mask_costs(query text, do_analyze bool,
+ hide_costs bool, hide_row_est bool, hide_width bool) RETURNS setof text
+LANGUAGE plpgsql AS
+$$
+DECLARE
+ ln text;
+ analyze_str text;
+BEGIN
+ IF do_analyze = true THEN
+ analyze_str := 'on';
+ ELSE
+ analyze_str := 'off';
+ END IF;
+
+ FOR ln IN
+ EXECUTE format('explain (analyze %s, costs on, summary off, timing off) %s',
+ analyze_str, query)
+ LOOP
+ IF hide_costs = true THEN
+ ln := regexp_replace(ln, 'cost=\d+\.\d\d\.\.\d+\.\d\d', 'cost=N..N');
+ END IF;
+
+ IF hide_row_est = true THEN
+ -- don't use 'g' so that we leave the actual rows intact
+ ln := regexp_replace(ln, 'rows=\d+', 'rows=N');
+ END IF;
+
+ IF hide_width = true THEN
+ ln := regexp_replace(ln, 'width=\d+', 'width=N');
+ END IF;
+
+ RETURN NEXT ln;
+ END LOOP;
+END;
+$$;
--
-- num_nulls()
--
@@ -594,6 +631,78 @@ SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g;
Index Cond: (unique1 = g.g)
(4 rows)
+--
+-- Test the SupportRequestRows support function for generate_series_timestamp()
+--
+-- Ensure the row estimate matches the actual rows
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day') g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+------------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30 loops=1)
+(1 row)
+
+-- As above but with generate_series_timestamp
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMP '2024-02-01', TIMESTAMP '2024-03-01', INTERVAL '1 day') g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+------------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30 loops=1)
+(1 row)
+
+-- As above but with generate_series_timestamptz_at_zone()
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day', 'UTC') g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+------------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30 loops=1)
+(1 row)
+
+-- Ensure the estimated and actual row counts match when the range isn't
+-- evenly divisible by the step
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '7 day') g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+----------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=5 width=N) (actual rows=5 loops=1)
+(1 row)
+
+-- Ensure the estimates match when step is decreasing
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '-1 day') g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+------------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30 loops=1)
+(1 row)
+
+-- Ensure an empty range estimates 1 row
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '1 day') g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+----------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=1 width=N) (actual rows=0 loops=1)
+(1 row)
+
+-- Ensure we get the default row estimate for infinity values
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '-infinity', TIMESTAMPTZ 'infinity', INTERVAL '1 day') g(s);$$,
+false, true, false, true);
+ explain_mask_costs
+-------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=1000 width=N)
+(1 row)
+
+-- Ensure the row estimate behaves correctly when step size is zero.
+-- We expect generate_series_timestamp() to throw the error rather than in
+-- the support function.
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s);
+ERROR: step size cannot equal zero
-- Test functions for control data
SELECT count(*) > 0 AS ok FROM pg_control_checkpoint();
ok
@@ -706,3 +815,4 @@ SELECT pg_column_toast_chunk_id(a) IS NULL,
(1 row)
DROP TABLE test_chunk_id;
+DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool);
diff --git a/src/test/regress/sql/misc_functions.sql b/src/test/regress/sql/misc_functions.sql
index 76470fcb3f..f30387f54a 100644
--- a/src/test/regress/sql/misc_functions.sql
+++ b/src/test/regress/sql/misc_functions.sql
@@ -4,6 +4,44 @@
\set regresslib :libdir '/regress' :dlsuffix
+-- Function to assist with verifying EXPLAIN which includes costs. A series
+-- of bool flags allows control over which portions are masked out
+CREATE FUNCTION explain_mask_costs(query text, do_analyze bool,
+ hide_costs bool, hide_row_est bool, hide_width bool) RETURNS setof text
+LANGUAGE plpgsql AS
+$$
+DECLARE
+ ln text;
+ analyze_str text;
+BEGIN
+ IF do_analyze = true THEN
+ analyze_str := 'on';
+ ELSE
+ analyze_str := 'off';
+ END IF;
+
+ FOR ln IN
+ EXECUTE format('explain (analyze %s, costs on, summary off, timing off) %s',
+ analyze_str, query)
+ LOOP
+ IF hide_costs = true THEN
+ ln := regexp_replace(ln, 'cost=\d+\.\d\d\.\.\d+\.\d\d', 'cost=N..N');
+ END IF;
+
+ IF hide_row_est = true THEN
+ -- don't use 'g' so that we leave the actual rows intact
+ ln := regexp_replace(ln, 'rows=\d+', 'rows=N');
+ END IF;
+
+ IF hide_width = true THEN
+ ln := regexp_replace(ln, 'width=\d+', 'width=N');
+ END IF;
+
+ RETURN NEXT ln;
+ END LOOP;
+END;
+$$;
+
--
-- num_nulls()
--
@@ -224,6 +262,51 @@ SELECT * FROM tenk1 a JOIN my_gen_series(1,1000) g ON a.unique1 = g;
EXPLAIN (COSTS OFF)
SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g;
+--
+-- Test the SupportRequestRows support function for generate_series_timestamp()
+--
+
+-- Ensure the row estimate matches the actual rows
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day') g(s);$$,
+true, true, false, true);
+
+-- As above but with generate_series_timestamp
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMP '2024-02-01', TIMESTAMP '2024-03-01', INTERVAL '1 day') g(s);$$,
+true, true, false, true);
+
+-- As above but with generate_series_timestamptz_at_zone()
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day', 'UTC') g(s);$$,
+true, true, false, true);
+
+-- Ensure the estimated and actual row counts match when the range isn't
+-- evenly divisible by the step
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '7 day') g(s);$$,
+true, true, false, true);
+
+-- Ensure the estimates match when step is decreasing
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '-1 day') g(s);$$,
+true, true, false, true);
+
+-- Ensure an empty range estimates 1 row
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '1 day') g(s);$$,
+true, true, false, true);
+
+-- Ensure we get the default row estimate for infinity values
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '-infinity', TIMESTAMPTZ 'infinity', INTERVAL '1 day') g(s);$$,
+false, true, false, true);
+
+-- Ensure the row estimate behaves correctly when step size is zero.
+-- We expect generate_series_timestamp() to throw the error rather than in
+-- the support function.
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s);
+
-- Test functions for control data
SELECT count(*) > 0 AS ok FROM pg_control_checkpoint();
SELECT count(*) > 0 AS ok FROM pg_control_init();
@@ -273,3 +356,4 @@ SELECT pg_column_toast_chunk_id(a) IS NULL,
pg_column_toast_chunk_id(b) IN (SELECT chunk_id FROM pg_toast.:toastrel)
FROM test_chunk_id;
DROP TABLE test_chunk_id;
+DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool);
--
2.34.1
On Mon, Jul 8, 2024 at 1:16 PM David Rowley <dgrowleyml@gmail.com> wrote:
Updated patch attached.
looking good to me.
On Mon, 8 Jul 2024 at 17:52, jian he <jian.universality@gmail.com> wrote:
On Mon, Jul 8, 2024 at 1:16 PM David Rowley <dgrowleyml@gmail.com> wrote:
Updated patch attached.
looking good to me.
Thanks for reviewing. I've pushed the patch now.
David