New function normal_rand_array function to contrib/tablefunc.
Here is a new function which could produce an array of numbers with a
controllable array length and duplicated elements in these arrays. I
used it when working with gin index, and I think it would be helpful for
others as well, so here it is.
select * from normal_rand_array(5, 10, 1.8::numeric, 3.5::numeric);
normal_rand_array
-----------------------------------------------
{3.3,2.3,2.7,3.2,2.0,2.7,3.4,2.7,2.3,2.9}
{3.3,1.8,2.9,3.4,2.0,1.8,2.0,3.5,2.8,2.5}
{2.1,1.9,2.3,1.9,2.5,2.7,2.4,2.9,1.8}
{2.3,2.5,2.4,2.7,2.7,2.3,2.9,3.3,3.3,1.9,3.5}
{2.8,3.4,2.7,1.8,3.3,2.3,2.2,3.5,2.6,2.5}
(5 rows)
select * from normal_rand_array(5, 10, 1.8::int4, 3.5::int4);
normal_rand_array
-------------------------------------
{3,2,2,3,4,2}
{2,4,2,3,3,3,3,2,2,3,3,2,3,2}
{2,4,3}
{4,2,3,4,2,4,2,2,3,4,3,3,2,4,4,2,3}
{4,3,3,4,3,3,4,2,4}
(5 rows)
the 5 means it needs to produce 5 rows in total and the 10 is the
average array length, and 1.8 is the minvalue for the random function
and 3.5 is the maxvalue.
--
Best Regards
Andy Fan
Attachments:
v20240608-0001-Add-function-normal_rand_array-function-to.patchtext/x-diffDownload
From 397dcaf67f29057b80aebbb6116b49ac8344547c Mon Sep 17 00:00:00 2001
From: Andy Fan <zhihuifan1213@163.com>
Date: Sat, 8 Jun 2024 13:21:08 +0800
Subject: [PATCH v20240608 1/1] Add function normal_rand_array function to
contrib/tablefunc.
It can produce an array of numbers with n controllable array length and
duplicated elements in these arrays.
---
contrib/tablefunc/Makefile | 2 +-
contrib/tablefunc/expected/tablefunc.out | 26 ++++
contrib/tablefunc/sql/tablefunc.sql | 10 ++
contrib/tablefunc/tablefunc--1.0--1.1.sql | 7 ++
contrib/tablefunc/tablefunc.c | 140 ++++++++++++++++++++++
contrib/tablefunc/tablefunc.control | 2 +-
doc/src/sgml/tablefunc.sgml | 10 ++
src/backend/utils/adt/arrayfuncs.c | 7 ++
8 files changed, 202 insertions(+), 2 deletions(-)
create mode 100644 contrib/tablefunc/tablefunc--1.0--1.1.sql
diff --git a/contrib/tablefunc/Makefile b/contrib/tablefunc/Makefile
index 191a3a1d38..f0c67308fd 100644
--- a/contrib/tablefunc/Makefile
+++ b/contrib/tablefunc/Makefile
@@ -3,7 +3,7 @@
MODULES = tablefunc
EXTENSION = tablefunc
-DATA = tablefunc--1.0.sql
+DATA = tablefunc--1.0.sql tablefunc--1.0--1.1.sql
PGFILEDESC = "tablefunc - various functions that return tables"
REGRESS = tablefunc
diff --git a/contrib/tablefunc/expected/tablefunc.out b/contrib/tablefunc/expected/tablefunc.out
index ddece79029..9f0cbbfbbe 100644
--- a/contrib/tablefunc/expected/tablefunc.out
+++ b/contrib/tablefunc/expected/tablefunc.out
@@ -12,6 +12,32 @@ SELECT avg(normal_rand)::int, count(*) FROM normal_rand(100, 250, 0.2);
-- negative number of tuples
SELECT avg(normal_rand)::int, count(*) FROM normal_rand(-1, 250, 0.2);
ERROR: number of rows cannot be negative
+SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 1.23::numeric, 8::numeric) as i;
+ count | avg
+-------+--------------------
+ 10 | 3.0000000000000000
+(1 row)
+
+SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 1.23::int4, 8::int4) as i;
+ count | avg
+-------+--------------------
+ 10 | 3.0000000000000000
+(1 row)
+
+SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 1.23::int8, 8::int8) as i;
+ count | avg
+-------+--------------------
+ 10 | 3.0000000000000000
+(1 row)
+
+SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 1.23::float8, 8::float8) as i;
+ count | avg
+-------+--------------------
+ 10 | 3.0000000000000000
+(1 row)
+
+SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 'abc'::text, 'def'::text) as i;
+ERROR: unsupported type 25 in normal_rand_array.
--
-- crosstab()
--
diff --git a/contrib/tablefunc/sql/tablefunc.sql b/contrib/tablefunc/sql/tablefunc.sql
index 0fb8e40de2..dec57cfc66 100644
--- a/contrib/tablefunc/sql/tablefunc.sql
+++ b/contrib/tablefunc/sql/tablefunc.sql
@@ -8,6 +8,16 @@ SELECT avg(normal_rand)::int, count(*) FROM normal_rand(100, 250, 0.2);
-- negative number of tuples
SELECT avg(normal_rand)::int, count(*) FROM normal_rand(-1, 250, 0.2);
+SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 1.23::numeric, 8::numeric) as i;
+
+SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 1.23::int4, 8::int4) as i;
+
+SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 1.23::int8, 8::int8) as i;
+
+SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 1.23::float8, 8::float8) as i;
+
+SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 'abc'::text, 'def'::text) as i;
+
--
-- crosstab()
--
diff --git a/contrib/tablefunc/tablefunc--1.0--1.1.sql b/contrib/tablefunc/tablefunc--1.0--1.1.sql
new file mode 100644
index 0000000000..9d13e80ff0
--- /dev/null
+++ b/contrib/tablefunc/tablefunc--1.0--1.1.sql
@@ -0,0 +1,7 @@
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION tablefunc UPDATE TO '1.1'" to load this file. \quit
+
+CREATE FUNCTION normal_rand_array(int4, int4, anyelement, anyelement)
+RETURNS setof anyarray
+AS 'MODULE_PATHNAME','normal_rand_array'
+LANGUAGE C VOLATILE STRICT;
diff --git a/contrib/tablefunc/tablefunc.c b/contrib/tablefunc/tablefunc.c
index 7d1b5f5143..6d26aa843b 100644
--- a/contrib/tablefunc/tablefunc.c
+++ b/contrib/tablefunc/tablefunc.c
@@ -42,7 +42,9 @@
#include "lib/stringinfo.h"
#include "miscadmin.h"
#include "tablefunc.h"
+#include "utils/array.h"
#include "utils/builtins.h"
+#include "utils/fmgroids.h"
PG_MODULE_MAGIC;
@@ -91,6 +93,13 @@ typedef struct
bool use_carry; /* use second generated value */
} normal_rand_fctx;
+typedef struct
+{
+ int carry_len;
+ FunctionCallInfo fcinfo;
+ FunctionCallInfo random_len_fcinfo;
+} normal_rand_array_fctx;
+
#define xpfree(var_) \
do { \
if (var_ != NULL) \
@@ -269,6 +278,137 @@ normal_rand(PG_FUNCTION_ARGS)
SRF_RETURN_DONE(funcctx);
}
+/*
+ * normal_rand_array - return requested number of random arrays
+ * with a Gaussian (Normal) distribution.
+ *
+ * inputs are int numvals, int mean_len, anyelement minvalue,
+ * anyelement maxvalue returns setof anyelement[]
+ */
+PG_FUNCTION_INFO_V1(normal_rand_array);
+Datum
+normal_rand_array(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ uint64 call_cntr;
+ uint64 max_calls;
+ normal_rand_array_fctx *fctx;
+ MemoryContext oldcontext;
+ Datum minvalue, maxvalue;
+ int array_mean_len;
+ Oid target_oid, random_fn_oid;
+
+ array_mean_len = PG_GETARG_INT32(1);
+ minvalue = PG_GETARG_DATUM(2);
+ maxvalue = PG_GETARG_DATUM(3);
+
+ target_oid = get_fn_expr_argtype(fcinfo->flinfo, 2);
+
+ if (target_oid == INT4OID)
+ random_fn_oid = F_RANDOM_INT4_INT4;
+ else if (target_oid == INT8OID)
+ random_fn_oid = F_RANDOM_INT8_INT8;
+ else if (target_oid == FLOAT8OID)
+ random_fn_oid = F_RANDOM_;
+ else if (target_oid == NUMERICOID)
+ random_fn_oid = F_RANDOM_NUMERIC_NUMERIC;
+ else
+ elog(ERROR, "unsupported type %d in normal_rand_array.",
+ target_oid);
+
+ /* stuff done only on the first call of the function */
+ if (SRF_IS_FIRSTCALL())
+ {
+ int32 num_tuples;
+ FmgrInfo *random_len_flinfo, *random_val_flinfo;
+ FunctionCallInfo random_len_fcinfo, random_val_fcinfo;
+
+ /* create a function context for cross-call persistence */
+ funcctx = SRF_FIRSTCALL_INIT();
+
+ /*
+ * switch to memory context appropriate for multiple function calls
+ */
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ /* total number of tuples to be returned */
+ num_tuples = PG_GETARG_INT32(0);
+ if (num_tuples < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("number of rows cannot be negative")));
+ funcctx->max_calls = num_tuples;
+
+ /* allocate memory for user context */
+ fctx = (normal_rand_array_fctx *) palloc(sizeof(normal_rand_array_fctx));
+
+ random_len_fcinfo = (FunctionCallInfo) palloc0(SizeForFunctionCallInfo(2));
+ random_len_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo));
+ fmgr_info(F_RANDOM_INT4_INT4, random_len_flinfo);
+ InitFunctionCallInfoData(*random_len_fcinfo, random_len_flinfo, 2, InvalidOid, NULL, NULL);
+
+ random_len_fcinfo->args[0].isnull = false;
+ random_len_fcinfo->args[1].isnull = false;
+ random_len_fcinfo->args[0].value = 0;
+ random_len_fcinfo->args[1].value = array_mean_len;
+
+ random_val_fcinfo = (FunctionCallInfo) palloc0(SizeForFunctionCallInfo(2));
+ random_val_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo));
+ fmgr_info(random_fn_oid, random_val_flinfo);
+ InitFunctionCallInfoData(*random_val_fcinfo, random_val_flinfo, 2, InvalidOid, NULL, NULL);
+
+ random_val_fcinfo->args[0].isnull = false;
+ random_val_fcinfo->args[1].isnull = false;
+ random_val_fcinfo->args[0].value = minvalue;
+ random_val_fcinfo->args[1].value = maxvalue;
+
+ fctx->carry_len = -1;
+ fctx->fcinfo = random_val_fcinfo;
+ fctx->random_len_fcinfo = random_len_fcinfo;
+
+ funcctx->user_fctx = fctx;
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ /* stuff done on every call of the function */
+ funcctx = SRF_PERCALL_SETUP();
+
+ call_cntr = funcctx->call_cntr;
+ max_calls = funcctx->max_calls;
+ fctx = funcctx->user_fctx;
+
+ if (call_cntr < max_calls) /* do when there is more left to send */
+ {
+ int array_len;
+ int i;
+ Datum *results;
+
+ if (fctx->carry_len != -1)
+ {
+ array_len = fctx->carry_len;
+ fctx->carry_len = -1;
+ }
+ else
+ {
+ array_len = Int32GetDatum(FunctionCallInvoke(fctx->random_len_fcinfo));
+ fctx->carry_len = 2 * array_mean_len - array_len;
+ }
+
+ results = palloc(array_len * sizeof(Datum));
+
+ for(i = 0; i < array_len; i++)
+ results[i] = FunctionCallInvoke(fctx->fcinfo);
+
+
+ SRF_RETURN_NEXT(funcctx, PointerGetDatum(
+ construct_array_builtin(results, array_len, target_oid)));
+ }
+ else
+ /* do when there is no more left */
+ SRF_RETURN_DONE(funcctx);
+}
+
/*
* get_normal_pair()
* Assigns normally distributed (Gaussian) values to a pair of provided
diff --git a/contrib/tablefunc/tablefunc.control b/contrib/tablefunc/tablefunc.control
index 7b25d16170..9cc6222a4f 100644
--- a/contrib/tablefunc/tablefunc.control
+++ b/contrib/tablefunc/tablefunc.control
@@ -1,6 +1,6 @@
# tablefunc extension
comment = 'functions that manipulate whole tables, including crosstab'
-default_version = '1.0'
+default_version = '1.1'
module_pathname = '$libdir/tablefunc'
relocatable = true
trusted = true
diff --git a/doc/src/sgml/tablefunc.sgml b/doc/src/sgml/tablefunc.sgml
index e10fe7009d..014c36b81c 100644
--- a/doc/src/sgml/tablefunc.sgml
+++ b/doc/src/sgml/tablefunc.sgml
@@ -53,6 +53,16 @@
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <function>normal_rand_array</function> ( <parameter>numvals</parameter> <type>integer</type>, <parameter>meanarraylen</parameter> <type>int4</type>, <parameter>minval</parameter> <type>anyelement</type>, <parameter>maxval</parameter> <type>anyelement</type> )
+ <returnvalue>setof anyarray</returnvalue>
+ </para>
+ <para>
+ Produces a set of normally distributed random array of numbers.
+ </para></entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<function>crosstab</function> ( <parameter>sql</parameter> <type>text</type> )
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index d6641b570d..7c95cc05bc 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -3397,6 +3397,12 @@ construct_array_builtin(Datum *elems, int nelems, Oid elmtype)
elmalign = TYPALIGN_INT;
break;
+ case FLOAT8OID:
+ elmlen = sizeof(float8);
+ elmbyval = FLOAT8PASSBYVAL;
+ elmalign = TYPALIGN_DOUBLE;
+ break;
+
case INT2OID:
elmlen = sizeof(int16);
elmbyval = true;
@@ -3429,6 +3435,7 @@ construct_array_builtin(Datum *elems, int nelems, Oid elmtype)
break;
case TEXTOID:
+ case NUMERICOID:
elmlen = -1;
elmbyval = false;
elmalign = TYPALIGN_INT;
--
2.45.2
It looks useful, for example, it can be used in sorting tests to make them more interesting. I just have one question. Why are you using SRF_IS_FIRST CALL and not _PG_init?
Best regards, Stepan Neretin.
Hi Andy
On 08.06.24 08:05, Andy Fan wrote:
Here is a new function which could produce an array of numbers with a
controllable array length and duplicated elements in these arrays. I
used it when working with gin index, and I think it would be helpful for
others as well, so here it is.select * from normal_rand_array(5, 10, 1.8::numeric, 3.5::numeric);
normal_rand_array
-----------------------------------------------
{3.3,2.3,2.7,3.2,2.0,2.7,3.4,2.7,2.3,2.9}
{3.3,1.8,2.9,3.4,2.0,1.8,2.0,3.5,2.8,2.5}
{2.1,1.9,2.3,1.9,2.5,2.7,2.4,2.9,1.8}
{2.3,2.5,2.4,2.7,2.7,2.3,2.9,3.3,3.3,1.9,3.5}
{2.8,3.4,2.7,1.8,3.3,2.3,2.2,3.5,2.6,2.5}
(5 rows)select * from normal_rand_array(5, 10, 1.8::int4, 3.5::int4);
normal_rand_array
-------------------------------------
{3,2,2,3,4,2}
{2,4,2,3,3,3,3,2,2,3,3,2,3,2}
{2,4,3}
{4,2,3,4,2,4,2,2,3,4,3,3,2,4,4,2,3}
{4,3,3,4,3,3,4,2,4}
(5 rows)the 5 means it needs to produce 5 rows in total and the 10 is the
average array length, and 1.8 is the minvalue for the random function
and 3.5 is the maxvalue.
When either minval or maxval exceeds int4 the function cannot be
executed/found
SELECT * FROM normal_rand_array(5, 10, 8, 42::bigint);
ERROR: function normal_rand_array(integer, integer, integer, bigint)
does not exist
LINE 1: SELECT * FROM normal_rand_array(5, 10, 8, 42::bigint);
^
HINT: No function matches the given name and argument types. You might
need to add explicit type casts.
---
SELECT * FROM normal_rand_array(5, 10, 8::bigint, 42);
ERROR: function normal_rand_array(integer, integer, bigint, integer)
does not exist
LINE 1: SELECT * FROM normal_rand_array(5, 10, 8::bigint, 42);
^
HINT: No function matches the given name and argument types. You might
need to add explicit type casts.
---
However, when both are int8 it works fine:
SELECT * FROM normal_rand_array(5, 10, 8::bigint, 42::bigint);
normal_rand_array
--------------------------------------------------
{29,38,31,10,23,39,9,32}
{8,39,19,31,29,15,17,15,36,20,33,19}
{15,18,42,19}
{16,31,33,11,14,20,24,9,12,17,22,42,41,24,11,41}
{15,11,36,8,28,37}
(5 rows)
---
Is it the expected behaviour?
In some cases the function returns an empty array. Is it also expected?
SELECT count(*)
FROM normal_rand_array(100000, 10, 8, 42) i
WHERE array_length(i,1) IS NULL;
count
-------
4533
(1 row)
In both cases, perhaps mentioning these behaviors in the docs would
avoid some confusion.
Thanks!
Best,
--
Jim
On Tue, 2 Jul 2024 at 11:18, Jim Jones <jim.jones@uni-muenster.de> wrote:
When either minval or maxval exceeds int4 the function cannot be
executed/foundSELECT * FROM normal_rand_array(5, 10, 8, 42::bigint);
ERROR: function normal_rand_array(integer, integer, integer, bigint)
does not exist
LINE 1: SELECT * FROM normal_rand_array(5, 10, 8, 42::bigint);
^
HINT: No function matches the given name and argument types. You might
need to add explicit type casts.
This could be solved by defining separate functions for each supported
type, rather than one function with type anyelement. Internally, they
could continue to share common code, of course.
In some cases the function returns an empty array. Is it also expected?
Perhaps it would be useful to have separate minimum and maximum array
length arguments, rather than a mean array length argument.
Actually, I find the current behaviour somewhat counterintuitive. Only
after reading the source code did I realise what it's actually doing,
which is this:
Row 1: array of random length in range [0, meanarraylen]
Row 2: array of length 2*meanarraylen - length of previous array
Row 3: array of random length in range [0, meanarraylen]
Row 4: array of length 2*meanarraylen - length of previous array
...
That's far from obvious (it's certainly not documented) and I don't
think it's a particularly good way of achieving a specified mean array
length, because only half the lengths are random.
One thing that's definitely needed is more documentation. It should
have its own new subsection, like the other tablefunc functions.
Something else that confused me is why this function is called
normal_rand_array(). The underlying random functions that it's calling
return random values from a uniform distribution, not a normal
distribution. Arrays of normally distributed random values might also
be useful, but that's not what this patch is doing.
Also, the function accepts float8 minval and maxval arguments, and
then simply ignores them and returns random float8 values in the range
[0,1), which is highly counterintuitive.
My suggestion would be to mirror the signatures of the core random()
functions more closely, and have this:
1). rand_array(numvals int, minlen int, maxlen int)
returns setof float8[]
2). rand_array(numvals int, minlen int, maxlen int,
minval int, maxval int)
returns setof int[]
3). rand_array(numvals int, minlen int, maxlen int,
minval bigint, maxval bigint)
returns setof bigint[]
4). rand_array(numvals int, minlen int, maxlen int,
minval numeric, maxval numeric)
returns setof numeric[]
Also, I'd recommend giving the function arguments names in SQL, like
this, since that makes them easier to use.
Something else that's not obvious is that this patch is relying on the
core random functions, which means that it's using the same PRNG state
as those functions. That's probably OK, but it should be documented,
because it's different from tablefunc's normal_rand() function, which
uses pg_global_prng_state. In particular, what this means is that
calling setseed() will affect the output of these new functions, but
not normal_rand(). Incidentally, that makes writing tests much simpler
-- just call setseed() first and the output will be repeatable.
Regards,
Dean
Dean Rasheed <dean.a.rasheed@gmail.com> writes:
My suggestion would be to mirror the signatures of the core random()
functions more closely, and have this:1). rand_array(numvals int, minlen int, maxlen int)
returns setof float8[]2). rand_array(numvals int, minlen int, maxlen int,
minval int, maxval int)
returns setof int[]3). rand_array(numvals int, minlen int, maxlen int,
minval bigint, maxval bigint)
returns setof bigint[]4). rand_array(numvals int, minlen int, maxlen int,
minval numeric, maxval numeric)
returns setof numeric[]
this is indeed a more clean and correct APIs, I will use the above ones
in the next version. Thanks for the suggestion.
It is just not clear to me how verbose the document should to be, and
where the document should be, tablefunc.sgml, the comment above the
function or the places just the code? In many cases you provides above
or below are just implementation details, not the API declaimed purpose.
Something else that's not obvious is that this patch is relying on the
core random functions, which means that it's using the same PRNG state
as those functions. That's probably OK, but it should be documented,
because it's different from tablefunc's normal_rand() function, which
uses pg_global_prng_state.
My above question applies to this comment.
In particular, what this means is that
calling setseed() will affect the output of these new functions, but
not normal_rand(). Incidentally, that makes writing tests much simpler
-- just call setseed() first and the output will be repeatable.
Good to know this user case. for example, should this be documented?
In some cases the function returns an empty array. Is it also expected?
Perhaps it would be useful to have separate minimum and maximum array
length arguments, rather than a mean array length argument.
I'm not sure which one is better, but main user case of this function
for testing pupose, so it I think minimum and maximum array length is
good for me too.
Actually, I find the current behaviour somewhat counterintuitive. Only
after reading the source code did I realise what it's actually doing,
which is this:Row 1: array of random length in range [0, meanarraylen]
Row 2: array of length 2*meanarraylen - length of previous array
Row 3: array of random length in range [0, meanarraylen]
Row 4: array of length 2*meanarraylen - length of previous array
...That's far from obvious (it's certainly not documented) and I don't
think it's a particularly good way of achieving a specified mean array
length, because only half the lengths are random.
I'm not sure how does this matter in real user case.
One thing that's definitely needed is more documentation. It should
have its own new subsection, like the other tablefunc functions.
is the documentaion for the '2*meanarraylen - lastarraylen'?
and What is new subsection, do you mean anything wrong in
'tablefunc.sgml', I did have some issue to run 'make html', but the
error exists before my patch, so I change the document carefully without
testing it. do you know how to fix the below error in 'make html'?
$/usr/bin/xsltproc --nonet --path . --path . --stringparam pg.version '18devel' stylesheet.xsl postgres-full.xml
I/O error : Attempt to load network entity http://docbook.sourceforge.net/release/xsl/current/xhtml/chunk.xsl
warning: failed to load external entity "http://docbook.sourceforge.net/release/xsl/current/xhtml/chunk.xsl"
compilation error: file stylesheet.xsl line 6 element import
xsl:import : unable to load http://docbook.sourceforge.net/release/xsl/current/xhtml/chunk.xsl
I/O error : Attempt to load network entity http://docbook.sourceforge.net/release/xsl/current/common/entities.ent
stylesheet-html-common.xsl:4: warning: failed to load external entity "http://docbook.sourceforge.net/release/xsl/current/common/entities.ent"
%common.entities;
^
stylesheet-html-common.xsl:124: parser error : Entity 'primary' not defined
translate(substring(&primary;, 1, 1),
Something else that confused me is why this function is called
normal_rand_array(). The underlying random functions that it's calling
return random values from a uniform distribution, not a normal
distribution. Arrays of normally distributed random values might also
be useful, but that's not what this patch is doing.
OK, you are right, your new names should be better.
Also, the function accepts float8 minval and maxval arguments, and
then simply ignores them and returns random float8 values in the range
[0,1), which is highly counterintuitive.
This is a obvious bug and it only exists in float8 case IIUC, will fix
it in the next version.
--
Best Regards
Andy Fan
Andy Fan <zhihuifan1213@163.com> writes:
(just noticed this reply is sent to Jim privately, re-sent it to
public.)
Hi Jim,
When either minval or maxval exceeds int4 the function cannot be
executed/foundSELECT * FROM normal_rand_array(5, 10, 8, 42::bigint);
ERROR: function normal_rand_array(integer, integer, integer, bigint)
does not exist
LINE 1: SELECT * FROM normal_rand_array(5, 10, 8, 42::bigint);
^
HINT: No function matches the given name and argument types. You might
need to add explicit type casts.
---SELECT * FROM normal_rand_array(5, 10, 8::bigint, 42);
ERROR: function normal_rand_array(integer, integer, bigint, integer)
does not exist
LINE 1: SELECT * FROM normal_rand_array(5, 10, 8::bigint, 42);
^
HINT: No function matches the given name and argument types. You might
need to add explicit type casts.
---However, when both are int8 it works fine:
I defined the function as below:
postgres=# \df normal_rand_array
List of functions
Schema | Name | Result data type | Argument data types | Type
--------+-------------------+------------------+------------------------------------------+------
public | normal_rand_array | SETOF anyarray | integer, integer, anyelement, anyelement | func
(1 row)so it is required that the 3nd and 4th argument should have the same
data type, that's why your first 2 test case failed and the third one
works. and I also think we should not add a test case / document for
this since the behavior of 'anyelement' system.
This issue can be fixed with the new API defined suggested by Dean.
SELECT * FROM normal_rand_array(5, 10, 8::bigint, 42::bigint);
normal_rand_array
--------------------------------------------------
{29,38,31,10,23,39,9,32}
{8,39,19,31,29,15,17,15,36,20,33,19}
{15,18,42,19}
{16,31,33,11,14,20,24,9,12,17,22,42,41,24,11,41}
{15,11,36,8,28,37}
(5 rows)
---Is it the expected behaviour?
Yes, see the above statements.
In some cases the function returns an empty array. Is it also expected?
SELECT count(*)
FROM normal_rand_array(100000, 10, 8, 42) i
WHERE array_length(i,1) IS NULL;count
-------
4533
(1 row)Yes, by design I think it is a feature which could generate [] case
which should be used a special case for testing, and at the
implementation side, the [] means the length is 0 which is caused by I
choose the 'len' by random [0 .. len * 2], so 0 is possible and doesn't
confict with the declared behavior.In both cases, perhaps mentioning these behaviors in the docs would
avoid some confusion.hmm, It doesn't take some big effort to add them, but I'm feeling that
would make the document a bit of too verbose/detailed.Sorry for the late respone!
--
Best Regards
Andy Fan
Import Notes
Reply to msg id not found: 871q46u560.fsf@163.com
On Wed, 17 Jul 2024 at 07:29, Andy Fan <zhihuifan1213@163.com> wrote:
It is just not clear to me how verbose the document should to be, and
where the document should be, tablefunc.sgml, the comment above the
function or the places just the code? In many cases you provides above
or below are just implementation details, not the API declaimed purpose.Something else that's not obvious is that this patch is relying on the
core random functions, which means that it's using the same PRNG state
as those functions. That's probably OK, but it should be documented,
because it's different from tablefunc's normal_rand() function, which
uses pg_global_prng_state.My above question applies to this comment.
One thing that's definitely needed is more documentation. It should
have its own new subsection, like the other tablefunc functions.
I was really referring to the SGML docs. Try to follow the style used
for the existing functions in tablefunc.sgml -- so in addition to
adding the row to the table at the top, also add one or more sections
further down the page to give more details, and example output.
Something like this:
https://www.postgresql.org/docs/current/tablefunc.html#TABLEFUNC-FUNCTIONS-NORMAL-RAND
That would be a good place to mention that setseed() can be used to
produce repeatable results.
I did have some issue to run 'make html', but the
error exists before my patch, so I change the document carefully without
testing it. do you know how to fix the below error in 'make html'?$/usr/bin/xsltproc --nonet --path . --path . --stringparam pg.version '18devel' stylesheet.xsl postgres-full.xml
I/O error : Attempt to load network entity http://docbook.sourceforge.net/release/xsl/current/xhtml/chunk.xsl
warning: failed to load external entity "http://docbook.sourceforge.net/release/xsl/current/xhtml/chunk.xsl"
compilation error: file stylesheet.xsl line 6 element import
xsl:import : unable to load http://docbook.sourceforge.net/release/xsl/current/xhtml/chunk.xsl
I/O error : Attempt to load network entity http://docbook.sourceforge.net/release/xsl/current/common/entities.ent
stylesheet-html-common.xsl:4: warning: failed to load external entity "http://docbook.sourceforge.net/release/xsl/current/common/entities.ent"
%common.entities;
^
stylesheet-html-common.xsl:124: parser error : Entity 'primary' not defined
translate(substring(&primary;, 1, 1),
This looks like you're missing a required package. Try installing
docbook-xsl or docbook-xsl-stylesheets or something similar (the
package name varies depending on your distro).
Regards,
Dean
Dean Rasheed <dean.a.rasheed@gmail.com> writes:
Hello Dean,
I did have some issue to run 'make html', but the
error exists before my patch, so I change the document carefully without
testing it. do you know how to fix the below error in 'make html'?$/usr/bin/xsltproc --nonet --path . --path . --stringparam pg.version '18devel' stylesheet.xsl postgres-full.xml
I/O error : Attempt to load network entity http://docbook.sourceforge.net/release/xsl/current/xhtml/chunk.xsl
warning: failed to load external entity
"http://docbook.sourceforge.net/release/xsl/current/xhtml/chunk.xsl"
..
This looks like you're missing a required package. Try installing
docbook-xsl or docbook-xsl-stylesheets or something similar (the
package name varies depending on your distro).
This does work, thank you!
My suggestion would be to mirror the signatures of the core random()
functions more closely, and have this:1). rand_array(numvals int, minlen int, maxlen int)
returns setof float8[]
..>
4). rand_array(numvals int, minlen int, maxlen int,
minval numeric, maxval numeric)
returns setof numeric[]
this is indeed a more clean and correct APIs, I will use the above ones
in the next version. Thanks for the suggestion.
I followed your suggestion in the new attached version. They are not
only some cleaner APIs for user and but also some cleaner implementation
in core, Thank for this suggestion as well.
Sorry for the late response, just my new posistion is bit of busy that I
don't have enough time on community work.
--
Best Regards
Andy Fan
Attachments:
v20240826-0001-Add-functions-rand_array-function-to-contr.patchtext/x-diffDownload
From e6f30ef4fcccee36bc577c5612c3713ae64890f0 Mon Sep 17 00:00:00 2001
From: Andy Fan <zhihuifan1213@163.com>
Date: Mon, 26 Aug 2024 18:50:57 +0800
Subject: [PATCH v20240826 1/1] Add functions rand_array function to
contrib/tablefunc.
It produces an array of numeric_type with its length in the range of
[minlen,maxlen] and each value is in the range of [minval,maxval].
---
contrib/tablefunc/Makefile | 2 +-
contrib/tablefunc/expected/tablefunc.out | 105 ++++++++++++++
contrib/tablefunc/sql/tablefunc.sql | 12 ++
contrib/tablefunc/tablefunc--1.0--1.1.sql | 22 +++
contrib/tablefunc/tablefunc.c | 162 ++++++++++++++++++++++
contrib/tablefunc/tablefunc.control | 2 +-
doc/src/sgml/tablefunc.sgml | 11 ++
src/backend/utils/adt/arrayfuncs.c | 7 +
8 files changed, 321 insertions(+), 2 deletions(-)
create mode 100644 contrib/tablefunc/tablefunc--1.0--1.1.sql
diff --git a/contrib/tablefunc/Makefile b/contrib/tablefunc/Makefile
index 191a3a1d38..f0c67308fd 100644
--- a/contrib/tablefunc/Makefile
+++ b/contrib/tablefunc/Makefile
@@ -3,7 +3,7 @@
MODULES = tablefunc
EXTENSION = tablefunc
-DATA = tablefunc--1.0.sql
+DATA = tablefunc--1.0.sql tablefunc--1.0--1.1.sql
PGFILEDESC = "tablefunc - various functions that return tables"
REGRESS = tablefunc
diff --git a/contrib/tablefunc/expected/tablefunc.out b/contrib/tablefunc/expected/tablefunc.out
index ddece79029..586988881e 100644
--- a/contrib/tablefunc/expected/tablefunc.out
+++ b/contrib/tablefunc/expected/tablefunc.out
@@ -12,6 +12,111 @@ SELECT avg(normal_rand)::int, count(*) FROM normal_rand(100, 250, 0.2);
-- negative number of tuples
SELECT avg(normal_rand)::int, count(*) FROM normal_rand(-1, 250, 0.2);
ERROR: number of rows cannot be negative
+select setseed(0.8);
+ setseed
+---------
+
+(1 row)
+
+select rand_array(10, 0, 3, 50::int, 80::int);
+ rand_array
+------------
+ {57,58,61}
+ {}
+ {50,50,78}
+ {}
+ {52,52}
+ {55,79,61}
+ {72,78}
+ {}
+ {64,70}
+ {59,78}
+(10 rows)
+
+select setseed(0.8);
+ setseed
+---------
+
+(1 row)
+
+select rand_array(10, 0, 3, 50::bigint, 80::bigint);
+ rand_array
+------------
+ {57,58,61}
+ {}
+ {50,50,78}
+ {}
+ {52,52}
+ {55,79,61}
+ {72,78}
+ {}
+ {64,70}
+ {59,78}
+(10 rows)
+
+select setseed(0.8);
+ setseed
+---------
+
+(1 row)
+
+select rand_array(10, 0, 3, 50::float8, 80::float8);
+ rand_array
+----------------------------------------------------------------
+ {0.2328377064354923,0.2557039868740123,0.3469304653544969}
+ {}
+ {0.000835197565928425,0.011969003780824217,0.8981013605381762}
+ {}
+ {0.08527426384290426,0.06616981677920686}
+ {0.1809935627858228,0.9105400064668479,0.34989620485343464}
+ {0.7139255282088908,0.8803672957879083}
+ {}
+ {0.45650965657162845,0.6253950409407578}
+ {0.30472421749069234,0.8833976124588141}
+(10 rows)
+
+select setseed(0.8);
+ setseed
+---------
+
+(1 row)
+
+select rand_array(10, 0, 3, 50::float4, 80::float4);
+ rand_array
+----------------------------------------------------------------
+ {0.2328377064354923,0.2557039868740123,0.3469304653544969}
+ {}
+ {0.000835197565928425,0.011969003780824217,0.8981013605381762}
+ {}
+ {0.08527426384290426,0.06616981677920686}
+ {0.1809935627858228,0.9105400064668479,0.34989620485343464}
+ {0.7139255282088908,0.8803672957879083}
+ {}
+ {0.45650965657162845,0.6253950409407578}
+ {0.30472421749069234,0.8833976124588141}
+(10 rows)
+
+select setseed(0.8);
+ setseed
+---------
+
+(1 row)
+
+select rand_array(10, 0, 3, 50::numeric, 80::numeric);
+ rand_array
+------------
+ {57,58,61}
+ {}
+ {50,50,78}
+ {}
+ {52,52}
+ {55,79,61}
+ {72,78}
+ {}
+ {64,70}
+ {59,78}
+(10 rows)
+
--
-- crosstab()
--
diff --git a/contrib/tablefunc/sql/tablefunc.sql b/contrib/tablefunc/sql/tablefunc.sql
index 0fb8e40de2..39e51e8024 100644
--- a/contrib/tablefunc/sql/tablefunc.sql
+++ b/contrib/tablefunc/sql/tablefunc.sql
@@ -8,6 +8,18 @@ SELECT avg(normal_rand)::int, count(*) FROM normal_rand(100, 250, 0.2);
-- negative number of tuples
SELECT avg(normal_rand)::int, count(*) FROM normal_rand(-1, 250, 0.2);
+select setseed(0.8);
+select rand_array(10, 0, 3, 50::int, 80::int);
+select setseed(0.8);
+select rand_array(10, 0, 3, 50::bigint, 80::bigint);
+select setseed(0.8);
+select rand_array(10, 0, 3, 50::float8, 80::float8);
+select setseed(0.8);
+select rand_array(10, 0, 3, 50::float4, 80::float4);
+select setseed(0.8);
+select rand_array(10, 0, 3, 50::numeric, 80::numeric);
+
+
--
-- crosstab()
--
diff --git a/contrib/tablefunc/tablefunc--1.0--1.1.sql b/contrib/tablefunc/tablefunc--1.0--1.1.sql
new file mode 100644
index 0000000000..973f6aa68c
--- /dev/null
+++ b/contrib/tablefunc/tablefunc--1.0--1.1.sql
@@ -0,0 +1,22 @@
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION tablefunc UPDATE TO '1.1'" to load this file. \quit
+
+CREATE FUNCTION rand_array(numvals int, minlen int, maxlen int, minval int, maxval int)
+RETURNS setof int[]
+AS 'MODULE_PATHNAME','rand_array_int'
+LANGUAGE C VOLATILE STRICT;
+
+CREATE FUNCTION rand_array(numvals int, minlen int, maxlen int, minval bigint, maxval bigint)
+RETURNS setof bigint[]
+AS 'MODULE_PATHNAME','rand_array_bigint'
+LANGUAGE C VOLATILE STRICT;
+
+CREATE FUNCTION rand_array(numvals int, minlen int, maxlen int, minval float8, maxval float8)
+RETURNS setof float8[]
+AS 'MODULE_PATHNAME','rand_array_float8'
+LANGUAGE C VOLATILE STRICT;
+
+CREATE FUNCTION rand_array(numvals int, minlen int, maxlen int, minval numeric, maxval numeric)
+RETURNS setof numeric[]
+AS 'MODULE_PATHNAME','rand_array_numeric'
+LANGUAGE C VOLATILE STRICT;
diff --git a/contrib/tablefunc/tablefunc.c b/contrib/tablefunc/tablefunc.c
index 7d1b5f5143..21d36bf101 100644
--- a/contrib/tablefunc/tablefunc.c
+++ b/contrib/tablefunc/tablefunc.c
@@ -42,7 +42,9 @@
#include "lib/stringinfo.h"
#include "miscadmin.h"
#include "tablefunc.h"
+#include "utils/array.h"
#include "utils/builtins.h"
+#include "utils/fmgroids.h"
PG_MODULE_MAGIC;
@@ -91,6 +93,12 @@ typedef struct
bool use_carry; /* use second generated value */
} normal_rand_fctx;
+typedef struct
+{
+ FunctionCallInfo random_val_fcinfo;
+ FunctionCallInfo random_len_fcinfo;
+} rand_array_fctx;
+
#define xpfree(var_) \
do { \
if (var_ != NULL) \
@@ -269,6 +277,160 @@ normal_rand(PG_FUNCTION_ARGS)
SRF_RETURN_DONE(funcctx);
}
+static Datum
+rand_array_internal(FunctionCallInfo fcinfo, Oid datatype)
+{
+ FuncCallContext *funcctx;
+ Oid random_fn_oid;
+ int num_tuples = PG_GETARG_INT16(0);
+ int minlen = PG_GETARG_INT16(1);
+ int maxlen = PG_GETARG_INT16(2);
+ Datum minval = PG_GETARG_DATUM(3),
+ maxval = PG_GETARG_DATUM(4);
+ rand_array_fctx *fctx;
+
+ if (datatype == INT4OID)
+ random_fn_oid = F_RANDOM_INT4_INT4;
+ else if (datatype == INT8OID)
+ random_fn_oid = F_RANDOM_INT8_INT8;
+ else if (datatype == FLOAT8OID)
+ random_fn_oid = F_RANDOM_;
+ else if (datatype == NUMERICOID)
+ random_fn_oid = F_RANDOM_NUMERIC_NUMERIC;
+ else
+ elog(ERROR, "unsupported type %d for rand_array function.",
+ datatype);
+
+ if (num_tuples < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("number of rows cannot be negative")));
+
+ if (minlen >= maxlen)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("minlen must be greater than maxlen.")));
+
+ if (minlen < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("minlen and maxlen must be greater than zero.")));
+
+
+ /* stuff done only on the first call of the function */
+ if (SRF_IS_FIRSTCALL())
+ {
+ MemoryContext oldcontext;
+
+ FmgrInfo *random_len_flinfo, *random_val_flinfo;
+ FunctionCallInfo random_len_fcinfo, random_val_fcinfo;
+
+ /* create a function context for cross-call persistence */
+ funcctx = SRF_FIRSTCALL_INIT();
+
+ /*
+ * switch to memory context appropriate for multiple function calls
+ */
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ funcctx->max_calls = num_tuples;
+
+ /* allocate memory for user context */
+ fctx = (rand_array_fctx *) palloc(sizeof(rand_array_fctx));
+
+ /* build the random_len_fcinfo */
+ random_len_fcinfo = (FunctionCallInfo) palloc0(SizeForFunctionCallInfo(2));
+ random_len_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo));
+ fmgr_info(F_RANDOM_INT4_INT4, random_len_flinfo);
+ InitFunctionCallInfoData(*random_len_fcinfo, random_len_flinfo, 2,
+ InvalidOid, NULL, NULL);
+
+ random_len_fcinfo->args[0].isnull = false;
+ random_len_fcinfo->args[1].isnull = false;
+ random_len_fcinfo->args[0].value = minlen;
+ random_len_fcinfo->args[1].value = maxlen;
+
+ /* build the random_val_fcinfo */
+ random_val_fcinfo = (FunctionCallInfo) palloc0(SizeForFunctionCallInfo(2));
+ random_val_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo));
+ fmgr_info(random_fn_oid, random_val_flinfo);
+ InitFunctionCallInfoData(*random_val_fcinfo, random_val_flinfo, 2,
+ InvalidOid, NULL, NULL);
+
+ random_val_fcinfo->args[0].isnull = false;
+ random_val_fcinfo->args[1].isnull = false;
+ random_val_fcinfo->args[0].value = minval;
+ random_val_fcinfo->args[1].value = maxval;
+
+ fctx->random_val_fcinfo = random_val_fcinfo;
+ fctx->random_len_fcinfo = random_len_fcinfo;
+
+ funcctx->user_fctx = fctx;
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ /* stuff done on every call of the function */
+ funcctx = SRF_PERCALL_SETUP();
+
+ fctx = funcctx->user_fctx;
+
+ if (funcctx->call_cntr < funcctx->max_calls)
+ {
+ int array_len;
+ int i;
+ Datum *results;
+
+ array_len = Int32GetDatum(FunctionCallInvoke(fctx->random_len_fcinfo));
+
+ results = palloc(array_len * sizeof(Datum));
+
+ for(i = 0; i < array_len; i++)
+ results[i] = FunctionCallInvoke(fctx->random_val_fcinfo);
+
+
+ SRF_RETURN_NEXT(funcctx, PointerGetDatum(
+ construct_array_builtin(results, array_len, datatype)));
+ }
+ else
+ /* do when there is no more left */
+ SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(rand_array_int);
+Datum
+rand_array_int(PG_FUNCTION_ARGS)
+{
+ return rand_array_internal(fcinfo, INT4OID);
+}
+
+
+PG_FUNCTION_INFO_V1(rand_array_bigint);
+Datum
+rand_array_bigint(PG_FUNCTION_ARGS)
+{
+
+ return rand_array_internal(fcinfo, INT8OID);
+}
+
+
+PG_FUNCTION_INFO_V1(rand_array_float8);
+Datum
+rand_array_float8(PG_FUNCTION_ARGS)
+{
+ return rand_array_internal(fcinfo, FLOAT8OID);
+}
+
+
+PG_FUNCTION_INFO_V1(rand_array_numeric);
+Datum
+rand_array_numeric(PG_FUNCTION_ARGS)
+{
+ return rand_array_internal(fcinfo, NUMERICOID);
+}
+
+
/*
* get_normal_pair()
* Assigns normally distributed (Gaussian) values to a pair of provided
diff --git a/contrib/tablefunc/tablefunc.control b/contrib/tablefunc/tablefunc.control
index 7b25d16170..9cc6222a4f 100644
--- a/contrib/tablefunc/tablefunc.control
+++ b/contrib/tablefunc/tablefunc.control
@@ -1,6 +1,6 @@
# tablefunc extension
comment = 'functions that manipulate whole tables, including crosstab'
-default_version = '1.0'
+default_version = '1.1'
module_pathname = '$libdir/tablefunc'
relocatable = true
trusted = true
diff --git a/doc/src/sgml/tablefunc.sgml b/doc/src/sgml/tablefunc.sgml
index e10fe7009d..ae71374e91 100644
--- a/doc/src/sgml/tablefunc.sgml
+++ b/doc/src/sgml/tablefunc.sgml
@@ -53,6 +53,17 @@
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <function>rand_array</function> (<parameter>numvals</parameter> <type>integer</type>,<parameter>minlen</parameter> <type>int4</type>, <parameter>maxlen</parameter> <type>int4</type>,<parameter>minval</parameter> <type><replaceable>numeric_type</replaceable></type>, <parameter>maxval</parameter> <type><replaceable>numeric_type</replaceable></type> )
+ <returnvalue>setof <replaceable>numeric_type</replaceable>[]</returnvalue>
+ </para>
+ <para>
+ Produces a set of random numeric_type[], uses the same deterministic pseudo-random number generator as random().
+ </para>
+ </entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<function>crosstab</function> ( <parameter>sql</parameter> <type>text</type> )
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index e5c7e57a5d..0165d97d45 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -3404,6 +3404,12 @@ construct_array_builtin(Datum *elems, int nelems, Oid elmtype)
elmalign = TYPALIGN_INT;
break;
+ case FLOAT8OID:
+ elmlen = sizeof(float8);
+ elmbyval = FLOAT8PASSBYVAL;
+ elmalign = TYPALIGN_DOUBLE;
+ break;
+
case INT2OID:
elmlen = sizeof(int16);
elmbyval = true;
@@ -3436,6 +3442,7 @@ construct_array_builtin(Datum *elems, int nelems, Oid elmtype)
break;
case TEXTOID:
+ case NUMERICOID:
elmlen = -1;
elmbyval = false;
elmalign = TYPALIGN_INT;
--
2.45.1
Andy Fan <zhihuifan1213@163.com> writes:
My suggestion would be to mirror the signatures of the core random()
functions more closely, and have this:1). rand_array(numvals int, minlen int, maxlen int)
returns setof float8[]..>
4). rand_array(numvals int, minlen int, maxlen int,
minval numeric, maxval numeric)
returns setof numeric[]this is indeed a more clean and correct APIs, I will use the above ones
in the next version. Thanks for the suggestion.I followed your suggestion in the new attached version. They are not
only some cleaner APIs for user and but also some cleaner implementation
in core, Thank for this suggestion as well.
A new version is attached, nothing changed except replace
PG_GETARG_INT16 with PG_GETARG_INT32. PG_GETARG_INT16 is a copy-paste
error.
--
Best Regards
Andy Fan
Attachments:
v20240827-0001-Add-functions-rand_array-function-to-contr.patchtext/x-diffDownload
From 12fe2c4ac4c982b861134215c2585df63f47f27b Mon Sep 17 00:00:00 2001
From: Andy Fan <zhihuifan1213@163.com>
Date: Mon, 26 Aug 2024 18:50:57 +0800
Subject: [PATCH v20240827 1/1] Add functions rand_array function to
contrib/tablefunc.
It produces an array of numeric_type with its length in the range of
[minlen,maxlen] and each value is in the range of [minval,maxval].
---
contrib/tablefunc/Makefile | 2 +-
contrib/tablefunc/expected/tablefunc.out | 105 ++++++++++++++
contrib/tablefunc/sql/tablefunc.sql | 12 ++
contrib/tablefunc/tablefunc--1.0--1.1.sql | 22 +++
contrib/tablefunc/tablefunc.c | 161 ++++++++++++++++++++++
contrib/tablefunc/tablefunc.control | 2 +-
doc/src/sgml/tablefunc.sgml | 11 ++
src/backend/utils/adt/arrayfuncs.c | 7 +
8 files changed, 320 insertions(+), 2 deletions(-)
create mode 100644 contrib/tablefunc/tablefunc--1.0--1.1.sql
diff --git a/contrib/tablefunc/Makefile b/contrib/tablefunc/Makefile
index 191a3a1d38..f0c67308fd 100644
--- a/contrib/tablefunc/Makefile
+++ b/contrib/tablefunc/Makefile
@@ -3,7 +3,7 @@
MODULES = tablefunc
EXTENSION = tablefunc
-DATA = tablefunc--1.0.sql
+DATA = tablefunc--1.0.sql tablefunc--1.0--1.1.sql
PGFILEDESC = "tablefunc - various functions that return tables"
REGRESS = tablefunc
diff --git a/contrib/tablefunc/expected/tablefunc.out b/contrib/tablefunc/expected/tablefunc.out
index ddece79029..586988881e 100644
--- a/contrib/tablefunc/expected/tablefunc.out
+++ b/contrib/tablefunc/expected/tablefunc.out
@@ -12,6 +12,111 @@ SELECT avg(normal_rand)::int, count(*) FROM normal_rand(100, 250, 0.2);
-- negative number of tuples
SELECT avg(normal_rand)::int, count(*) FROM normal_rand(-1, 250, 0.2);
ERROR: number of rows cannot be negative
+select setseed(0.8);
+ setseed
+---------
+
+(1 row)
+
+select rand_array(10, 0, 3, 50::int, 80::int);
+ rand_array
+------------
+ {57,58,61}
+ {}
+ {50,50,78}
+ {}
+ {52,52}
+ {55,79,61}
+ {72,78}
+ {}
+ {64,70}
+ {59,78}
+(10 rows)
+
+select setseed(0.8);
+ setseed
+---------
+
+(1 row)
+
+select rand_array(10, 0, 3, 50::bigint, 80::bigint);
+ rand_array
+------------
+ {57,58,61}
+ {}
+ {50,50,78}
+ {}
+ {52,52}
+ {55,79,61}
+ {72,78}
+ {}
+ {64,70}
+ {59,78}
+(10 rows)
+
+select setseed(0.8);
+ setseed
+---------
+
+(1 row)
+
+select rand_array(10, 0, 3, 50::float8, 80::float8);
+ rand_array
+----------------------------------------------------------------
+ {0.2328377064354923,0.2557039868740123,0.3469304653544969}
+ {}
+ {0.000835197565928425,0.011969003780824217,0.8981013605381762}
+ {}
+ {0.08527426384290426,0.06616981677920686}
+ {0.1809935627858228,0.9105400064668479,0.34989620485343464}
+ {0.7139255282088908,0.8803672957879083}
+ {}
+ {0.45650965657162845,0.6253950409407578}
+ {0.30472421749069234,0.8833976124588141}
+(10 rows)
+
+select setseed(0.8);
+ setseed
+---------
+
+(1 row)
+
+select rand_array(10, 0, 3, 50::float4, 80::float4);
+ rand_array
+----------------------------------------------------------------
+ {0.2328377064354923,0.2557039868740123,0.3469304653544969}
+ {}
+ {0.000835197565928425,0.011969003780824217,0.8981013605381762}
+ {}
+ {0.08527426384290426,0.06616981677920686}
+ {0.1809935627858228,0.9105400064668479,0.34989620485343464}
+ {0.7139255282088908,0.8803672957879083}
+ {}
+ {0.45650965657162845,0.6253950409407578}
+ {0.30472421749069234,0.8833976124588141}
+(10 rows)
+
+select setseed(0.8);
+ setseed
+---------
+
+(1 row)
+
+select rand_array(10, 0, 3, 50::numeric, 80::numeric);
+ rand_array
+------------
+ {57,58,61}
+ {}
+ {50,50,78}
+ {}
+ {52,52}
+ {55,79,61}
+ {72,78}
+ {}
+ {64,70}
+ {59,78}
+(10 rows)
+
--
-- crosstab()
--
diff --git a/contrib/tablefunc/sql/tablefunc.sql b/contrib/tablefunc/sql/tablefunc.sql
index 0fb8e40de2..39e51e8024 100644
--- a/contrib/tablefunc/sql/tablefunc.sql
+++ b/contrib/tablefunc/sql/tablefunc.sql
@@ -8,6 +8,18 @@ SELECT avg(normal_rand)::int, count(*) FROM normal_rand(100, 250, 0.2);
-- negative number of tuples
SELECT avg(normal_rand)::int, count(*) FROM normal_rand(-1, 250, 0.2);
+select setseed(0.8);
+select rand_array(10, 0, 3, 50::int, 80::int);
+select setseed(0.8);
+select rand_array(10, 0, 3, 50::bigint, 80::bigint);
+select setseed(0.8);
+select rand_array(10, 0, 3, 50::float8, 80::float8);
+select setseed(0.8);
+select rand_array(10, 0, 3, 50::float4, 80::float4);
+select setseed(0.8);
+select rand_array(10, 0, 3, 50::numeric, 80::numeric);
+
+
--
-- crosstab()
--
diff --git a/contrib/tablefunc/tablefunc--1.0--1.1.sql b/contrib/tablefunc/tablefunc--1.0--1.1.sql
new file mode 100644
index 0000000000..973f6aa68c
--- /dev/null
+++ b/contrib/tablefunc/tablefunc--1.0--1.1.sql
@@ -0,0 +1,22 @@
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION tablefunc UPDATE TO '1.1'" to load this file. \quit
+
+CREATE FUNCTION rand_array(numvals int, minlen int, maxlen int, minval int, maxval int)
+RETURNS setof int[]
+AS 'MODULE_PATHNAME','rand_array_int'
+LANGUAGE C VOLATILE STRICT;
+
+CREATE FUNCTION rand_array(numvals int, minlen int, maxlen int, minval bigint, maxval bigint)
+RETURNS setof bigint[]
+AS 'MODULE_PATHNAME','rand_array_bigint'
+LANGUAGE C VOLATILE STRICT;
+
+CREATE FUNCTION rand_array(numvals int, minlen int, maxlen int, minval float8, maxval float8)
+RETURNS setof float8[]
+AS 'MODULE_PATHNAME','rand_array_float8'
+LANGUAGE C VOLATILE STRICT;
+
+CREATE FUNCTION rand_array(numvals int, minlen int, maxlen int, minval numeric, maxval numeric)
+RETURNS setof numeric[]
+AS 'MODULE_PATHNAME','rand_array_numeric'
+LANGUAGE C VOLATILE STRICT;
diff --git a/contrib/tablefunc/tablefunc.c b/contrib/tablefunc/tablefunc.c
index 7d1b5f5143..ffab7e5d8d 100644
--- a/contrib/tablefunc/tablefunc.c
+++ b/contrib/tablefunc/tablefunc.c
@@ -42,7 +42,9 @@
#include "lib/stringinfo.h"
#include "miscadmin.h"
#include "tablefunc.h"
+#include "utils/array.h"
#include "utils/builtins.h"
+#include "utils/fmgroids.h"
PG_MODULE_MAGIC;
@@ -91,6 +93,12 @@ typedef struct
bool use_carry; /* use second generated value */
} normal_rand_fctx;
+typedef struct
+{
+ FunctionCallInfo random_val_fcinfo;
+ FunctionCallInfo random_len_fcinfo;
+} rand_array_fctx;
+
#define xpfree(var_) \
do { \
if (var_ != NULL) \
@@ -269,6 +277,159 @@ normal_rand(PG_FUNCTION_ARGS)
SRF_RETURN_DONE(funcctx);
}
+static Datum
+rand_array_internal(FunctionCallInfo fcinfo, Oid datatype)
+{
+ FuncCallContext *funcctx;
+ Oid random_fn_oid;
+ int num_tuples = PG_GETARG_INT32(0);
+ int minlen = PG_GETARG_INT32(1);
+ int maxlen = PG_GETARG_INT32(2);
+ Datum minval = PG_GETARG_DATUM(3),
+ maxval = PG_GETARG_DATUM(4);
+ rand_array_fctx *fctx;
+
+ if (datatype == INT4OID)
+ random_fn_oid = F_RANDOM_INT4_INT4;
+ else if (datatype == INT8OID)
+ random_fn_oid = F_RANDOM_INT8_INT8;
+ else if (datatype == FLOAT8OID)
+ random_fn_oid = F_RANDOM_;
+ else if (datatype == NUMERICOID)
+ random_fn_oid = F_RANDOM_NUMERIC_NUMERIC;
+ else
+ elog(ERROR, "unsupported type %d for rand_array function.",
+ datatype);
+
+ if (num_tuples < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("number of rows cannot be negative")));
+
+ if (minlen >= maxlen)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("minlen must be greater than maxlen.")));
+
+ if (minlen < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("minlen and maxlen must be greater than zero.")));
+
+
+ /* stuff done only on the first call of the function */
+ if (SRF_IS_FIRSTCALL())
+ {
+ MemoryContext oldcontext;
+
+ FmgrInfo *random_len_flinfo, *random_val_flinfo;
+ FunctionCallInfo random_len_fcinfo, random_val_fcinfo;
+
+ /* create a function context for cross-call persistence */
+ funcctx = SRF_FIRSTCALL_INIT();
+
+ /*
+ * switch to memory context appropriate for multiple function calls
+ */
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ funcctx->max_calls = num_tuples;
+
+ /* allocate memory for user context */
+ fctx = (rand_array_fctx *) palloc(sizeof(rand_array_fctx));
+
+ /* build the random_len_fcinfo */
+ random_len_fcinfo = (FunctionCallInfo) palloc0(SizeForFunctionCallInfo(2));
+ random_len_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo));
+ fmgr_info(F_RANDOM_INT4_INT4, random_len_flinfo);
+ InitFunctionCallInfoData(*random_len_fcinfo, random_len_flinfo, 2,
+ InvalidOid, NULL, NULL);
+
+ random_len_fcinfo->args[0].isnull = false;
+ random_len_fcinfo->args[1].isnull = false;
+ random_len_fcinfo->args[0].value = minlen;
+ random_len_fcinfo->args[1].value = maxlen;
+
+ /* build the random_val_fcinfo */
+ random_val_fcinfo = (FunctionCallInfo) palloc0(SizeForFunctionCallInfo(2));
+ random_val_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo));
+ fmgr_info(random_fn_oid, random_val_flinfo);
+ InitFunctionCallInfoData(*random_val_fcinfo, random_val_flinfo, 2,
+ InvalidOid, NULL, NULL);
+
+ random_val_fcinfo->args[0].isnull = false;
+ random_val_fcinfo->args[1].isnull = false;
+ random_val_fcinfo->args[0].value = minval;
+ random_val_fcinfo->args[1].value = maxval;
+
+ fctx->random_val_fcinfo = random_val_fcinfo;
+ fctx->random_len_fcinfo = random_len_fcinfo;
+
+ funcctx->user_fctx = fctx;
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ /* stuff done on every call of the function */
+ funcctx = SRF_PERCALL_SETUP();
+
+ fctx = funcctx->user_fctx;
+
+ if (funcctx->call_cntr < funcctx->max_calls)
+ {
+ int array_len;
+ int i;
+ Datum *results;
+
+ array_len = Int32GetDatum(FunctionCallInvoke(fctx->random_len_fcinfo));
+
+ results = palloc(array_len * sizeof(Datum));
+
+ for(i = 0; i < array_len; i++)
+ results[i] = FunctionCallInvoke(fctx->random_val_fcinfo);
+
+
+ SRF_RETURN_NEXT(funcctx, PointerGetDatum(
+ construct_array_builtin(results, array_len, datatype)));
+ }
+ else
+ /* do when there is no more left */
+ SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(rand_array_int);
+Datum
+rand_array_int(PG_FUNCTION_ARGS)
+{
+ return rand_array_internal(fcinfo, INT4OID);
+}
+
+
+PG_FUNCTION_INFO_V1(rand_array_bigint);
+Datum
+rand_array_bigint(PG_FUNCTION_ARGS)
+{
+ return rand_array_internal(fcinfo, INT8OID);
+}
+
+
+PG_FUNCTION_INFO_V1(rand_array_float8);
+Datum
+rand_array_float8(PG_FUNCTION_ARGS)
+{
+ return rand_array_internal(fcinfo, FLOAT8OID);
+}
+
+
+PG_FUNCTION_INFO_V1(rand_array_numeric);
+Datum
+rand_array_numeric(PG_FUNCTION_ARGS)
+{
+ return rand_array_internal(fcinfo, NUMERICOID);
+}
+
+
/*
* get_normal_pair()
* Assigns normally distributed (Gaussian) values to a pair of provided
diff --git a/contrib/tablefunc/tablefunc.control b/contrib/tablefunc/tablefunc.control
index 7b25d16170..9cc6222a4f 100644
--- a/contrib/tablefunc/tablefunc.control
+++ b/contrib/tablefunc/tablefunc.control
@@ -1,6 +1,6 @@
# tablefunc extension
comment = 'functions that manipulate whole tables, including crosstab'
-default_version = '1.0'
+default_version = '1.1'
module_pathname = '$libdir/tablefunc'
relocatable = true
trusted = true
diff --git a/doc/src/sgml/tablefunc.sgml b/doc/src/sgml/tablefunc.sgml
index e10fe7009d..ae71374e91 100644
--- a/doc/src/sgml/tablefunc.sgml
+++ b/doc/src/sgml/tablefunc.sgml
@@ -53,6 +53,17 @@
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <function>rand_array</function> (<parameter>numvals</parameter> <type>integer</type>,<parameter>minlen</parameter> <type>int4</type>, <parameter>maxlen</parameter> <type>int4</type>,<parameter>minval</parameter> <type><replaceable>numeric_type</replaceable></type>, <parameter>maxval</parameter> <type><replaceable>numeric_type</replaceable></type> )
+ <returnvalue>setof <replaceable>numeric_type</replaceable>[]</returnvalue>
+ </para>
+ <para>
+ Produces a set of random numeric_type[], uses the same deterministic pseudo-random number generator as random().
+ </para>
+ </entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<function>crosstab</function> ( <parameter>sql</parameter> <type>text</type> )
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index e5c7e57a5d..0165d97d45 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -3404,6 +3404,12 @@ construct_array_builtin(Datum *elems, int nelems, Oid elmtype)
elmalign = TYPALIGN_INT;
break;
+ case FLOAT8OID:
+ elmlen = sizeof(float8);
+ elmbyval = FLOAT8PASSBYVAL;
+ elmalign = TYPALIGN_DOUBLE;
+ break;
+
case INT2OID:
elmlen = sizeof(int16);
elmbyval = true;
@@ -3436,6 +3442,7 @@ construct_array_builtin(Datum *elems, int nelems, Oid elmtype)
break;
case TEXTOID:
+ case NUMERICOID:
elmlen = -1;
elmbyval = false;
elmalign = TYPALIGN_INT;
--
2.45.1
On Tue, 27 Aug 2024 at 16:43, Andy Fan <zhihuifan1213@163.com> wrote:
Andy Fan <zhihuifan1213@163.com> writes:
My suggestion would be to mirror the signatures of the core random()
functions more closely, and have this:1). rand_array(numvals int, minlen int, maxlen int)
returns setof float8[]..>
4). rand_array(numvals int, minlen int, maxlen int,
minval numeric, maxval numeric)
returns setof numeric[]this is indeed a more clean and correct APIs, I will use the above ones
in the next version. Thanks for the suggestion.I followed your suggestion in the new attached version. They are not
only some cleaner APIs for user and but also some cleaner implementation
in core, Thank for this suggestion as well.A new version is attached, nothing changed except replace
PG_GETARG_INT16 with PG_GETARG_INT32. PG_GETARG_INT16 is a copy-paste
error.
Thanks for updating the patch. Here are some comments.
+ if (minlen >= maxlen)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("minlen must be greater than maxlen.")));
There error message should be "minlen must be smaller than maxlen", right?
+ if (minlen < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("minlen and maxlen must be greater than zero.")));
Here the minlen might be zero, so the error message is incorrect.
How about use "minlen must be greater than or equal to zero"?
--
Regrads,
Japin Li
Japin Li <japinli@hotmail.com> writes:
Thanks for updating the patch. Here are some comments.
+ if (minlen >= maxlen) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("minlen must be greater than maxlen.")));There error message should be "minlen must be smaller than maxlen", right?
+ if (minlen < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("minlen and maxlen must be greater than zero.")));Here the minlen might be zero, so the error message is incorrect.
How about use "minlen must be greater than or equal to zero"?
Yes, you are right. A new version is attached, thanks for checking!
--
Best Regards
Andy Fan
Attachments:
v20240828-0001-Add-functions-rand_array-function-to-contr.patchtext/x-diffDownload
From 292561243f8c77ca2fd905734e793a84c4431b9e Mon Sep 17 00:00:00 2001
From: Andy Fan <zhihuifan1213@163.com>
Date: Mon, 26 Aug 2024 18:50:57 +0800
Subject: [PATCH v20240828 1/1] Add functions rand_array function to
contrib/tablefunc.
It produces an array of numeric_type with its length in the range of
[minlen,maxlen] and each value is in the range of [minval,maxval].
---
contrib/tablefunc/Makefile | 2 +-
contrib/tablefunc/expected/tablefunc.out | 105 ++++++++++++++
contrib/tablefunc/sql/tablefunc.sql | 12 ++
contrib/tablefunc/tablefunc--1.0--1.1.sql | 22 +++
contrib/tablefunc/tablefunc.c | 161 ++++++++++++++++++++++
contrib/tablefunc/tablefunc.control | 2 +-
doc/src/sgml/tablefunc.sgml | 11 ++
src/backend/utils/adt/arrayfuncs.c | 7 +
8 files changed, 320 insertions(+), 2 deletions(-)
create mode 100644 contrib/tablefunc/tablefunc--1.0--1.1.sql
diff --git a/contrib/tablefunc/Makefile b/contrib/tablefunc/Makefile
index 191a3a1d38..f0c67308fd 100644
--- a/contrib/tablefunc/Makefile
+++ b/contrib/tablefunc/Makefile
@@ -3,7 +3,7 @@
MODULES = tablefunc
EXTENSION = tablefunc
-DATA = tablefunc--1.0.sql
+DATA = tablefunc--1.0.sql tablefunc--1.0--1.1.sql
PGFILEDESC = "tablefunc - various functions that return tables"
REGRESS = tablefunc
diff --git a/contrib/tablefunc/expected/tablefunc.out b/contrib/tablefunc/expected/tablefunc.out
index ddece79029..586988881e 100644
--- a/contrib/tablefunc/expected/tablefunc.out
+++ b/contrib/tablefunc/expected/tablefunc.out
@@ -12,6 +12,111 @@ SELECT avg(normal_rand)::int, count(*) FROM normal_rand(100, 250, 0.2);
-- negative number of tuples
SELECT avg(normal_rand)::int, count(*) FROM normal_rand(-1, 250, 0.2);
ERROR: number of rows cannot be negative
+select setseed(0.8);
+ setseed
+---------
+
+(1 row)
+
+select rand_array(10, 0, 3, 50::int, 80::int);
+ rand_array
+------------
+ {57,58,61}
+ {}
+ {50,50,78}
+ {}
+ {52,52}
+ {55,79,61}
+ {72,78}
+ {}
+ {64,70}
+ {59,78}
+(10 rows)
+
+select setseed(0.8);
+ setseed
+---------
+
+(1 row)
+
+select rand_array(10, 0, 3, 50::bigint, 80::bigint);
+ rand_array
+------------
+ {57,58,61}
+ {}
+ {50,50,78}
+ {}
+ {52,52}
+ {55,79,61}
+ {72,78}
+ {}
+ {64,70}
+ {59,78}
+(10 rows)
+
+select setseed(0.8);
+ setseed
+---------
+
+(1 row)
+
+select rand_array(10, 0, 3, 50::float8, 80::float8);
+ rand_array
+----------------------------------------------------------------
+ {0.2328377064354923,0.2557039868740123,0.3469304653544969}
+ {}
+ {0.000835197565928425,0.011969003780824217,0.8981013605381762}
+ {}
+ {0.08527426384290426,0.06616981677920686}
+ {0.1809935627858228,0.9105400064668479,0.34989620485343464}
+ {0.7139255282088908,0.8803672957879083}
+ {}
+ {0.45650965657162845,0.6253950409407578}
+ {0.30472421749069234,0.8833976124588141}
+(10 rows)
+
+select setseed(0.8);
+ setseed
+---------
+
+(1 row)
+
+select rand_array(10, 0, 3, 50::float4, 80::float4);
+ rand_array
+----------------------------------------------------------------
+ {0.2328377064354923,0.2557039868740123,0.3469304653544969}
+ {}
+ {0.000835197565928425,0.011969003780824217,0.8981013605381762}
+ {}
+ {0.08527426384290426,0.06616981677920686}
+ {0.1809935627858228,0.9105400064668479,0.34989620485343464}
+ {0.7139255282088908,0.8803672957879083}
+ {}
+ {0.45650965657162845,0.6253950409407578}
+ {0.30472421749069234,0.8833976124588141}
+(10 rows)
+
+select setseed(0.8);
+ setseed
+---------
+
+(1 row)
+
+select rand_array(10, 0, 3, 50::numeric, 80::numeric);
+ rand_array
+------------
+ {57,58,61}
+ {}
+ {50,50,78}
+ {}
+ {52,52}
+ {55,79,61}
+ {72,78}
+ {}
+ {64,70}
+ {59,78}
+(10 rows)
+
--
-- crosstab()
--
diff --git a/contrib/tablefunc/sql/tablefunc.sql b/contrib/tablefunc/sql/tablefunc.sql
index 0fb8e40de2..39e51e8024 100644
--- a/contrib/tablefunc/sql/tablefunc.sql
+++ b/contrib/tablefunc/sql/tablefunc.sql
@@ -8,6 +8,18 @@ SELECT avg(normal_rand)::int, count(*) FROM normal_rand(100, 250, 0.2);
-- negative number of tuples
SELECT avg(normal_rand)::int, count(*) FROM normal_rand(-1, 250, 0.2);
+select setseed(0.8);
+select rand_array(10, 0, 3, 50::int, 80::int);
+select setseed(0.8);
+select rand_array(10, 0, 3, 50::bigint, 80::bigint);
+select setseed(0.8);
+select rand_array(10, 0, 3, 50::float8, 80::float8);
+select setseed(0.8);
+select rand_array(10, 0, 3, 50::float4, 80::float4);
+select setseed(0.8);
+select rand_array(10, 0, 3, 50::numeric, 80::numeric);
+
+
--
-- crosstab()
--
diff --git a/contrib/tablefunc/tablefunc--1.0--1.1.sql b/contrib/tablefunc/tablefunc--1.0--1.1.sql
new file mode 100644
index 0000000000..973f6aa68c
--- /dev/null
+++ b/contrib/tablefunc/tablefunc--1.0--1.1.sql
@@ -0,0 +1,22 @@
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION tablefunc UPDATE TO '1.1'" to load this file. \quit
+
+CREATE FUNCTION rand_array(numvals int, minlen int, maxlen int, minval int, maxval int)
+RETURNS setof int[]
+AS 'MODULE_PATHNAME','rand_array_int'
+LANGUAGE C VOLATILE STRICT;
+
+CREATE FUNCTION rand_array(numvals int, minlen int, maxlen int, minval bigint, maxval bigint)
+RETURNS setof bigint[]
+AS 'MODULE_PATHNAME','rand_array_bigint'
+LANGUAGE C VOLATILE STRICT;
+
+CREATE FUNCTION rand_array(numvals int, minlen int, maxlen int, minval float8, maxval float8)
+RETURNS setof float8[]
+AS 'MODULE_PATHNAME','rand_array_float8'
+LANGUAGE C VOLATILE STRICT;
+
+CREATE FUNCTION rand_array(numvals int, minlen int, maxlen int, minval numeric, maxval numeric)
+RETURNS setof numeric[]
+AS 'MODULE_PATHNAME','rand_array_numeric'
+LANGUAGE C VOLATILE STRICT;
diff --git a/contrib/tablefunc/tablefunc.c b/contrib/tablefunc/tablefunc.c
index 7d1b5f5143..b24c70d538 100644
--- a/contrib/tablefunc/tablefunc.c
+++ b/contrib/tablefunc/tablefunc.c
@@ -42,7 +42,9 @@
#include "lib/stringinfo.h"
#include "miscadmin.h"
#include "tablefunc.h"
+#include "utils/array.h"
#include "utils/builtins.h"
+#include "utils/fmgroids.h"
PG_MODULE_MAGIC;
@@ -91,6 +93,12 @@ typedef struct
bool use_carry; /* use second generated value */
} normal_rand_fctx;
+typedef struct
+{
+ FunctionCallInfo random_val_fcinfo;
+ FunctionCallInfo random_len_fcinfo;
+} rand_array_fctx;
+
#define xpfree(var_) \
do { \
if (var_ != NULL) \
@@ -269,6 +277,159 @@ normal_rand(PG_FUNCTION_ARGS)
SRF_RETURN_DONE(funcctx);
}
+static Datum
+rand_array_internal(FunctionCallInfo fcinfo, Oid datatype)
+{
+ FuncCallContext *funcctx;
+ Oid random_fn_oid;
+ int num_tuples = PG_GETARG_INT32(0);
+ int minlen = PG_GETARG_INT32(1);
+ int maxlen = PG_GETARG_INT32(2);
+ Datum minval = PG_GETARG_DATUM(3),
+ maxval = PG_GETARG_DATUM(4);
+ rand_array_fctx *fctx;
+
+ if (datatype == INT4OID)
+ random_fn_oid = F_RANDOM_INT4_INT4;
+ else if (datatype == INT8OID)
+ random_fn_oid = F_RANDOM_INT8_INT8;
+ else if (datatype == FLOAT8OID)
+ random_fn_oid = F_RANDOM_;
+ else if (datatype == NUMERICOID)
+ random_fn_oid = F_RANDOM_NUMERIC_NUMERIC;
+ else
+ elog(ERROR, "unsupported type %d for rand_array function.",
+ datatype);
+
+ if (num_tuples < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("number of rows cannot be negative")));
+
+ if (minlen >= maxlen)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("minlen must be smaller than maxlen.")));
+
+ if (minlen < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("minlen and maxlen must be greater than or equal to zero.")));
+
+
+ /* stuff done only on the first call of the function */
+ if (SRF_IS_FIRSTCALL())
+ {
+ MemoryContext oldcontext;
+
+ FmgrInfo *random_len_flinfo, *random_val_flinfo;
+ FunctionCallInfo random_len_fcinfo, random_val_fcinfo;
+
+ /* create a function context for cross-call persistence */
+ funcctx = SRF_FIRSTCALL_INIT();
+
+ /*
+ * switch to memory context appropriate for multiple function calls
+ */
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ funcctx->max_calls = num_tuples;
+
+ /* allocate memory for user context */
+ fctx = (rand_array_fctx *) palloc(sizeof(rand_array_fctx));
+
+ /* build the random_len_fcinfo */
+ random_len_fcinfo = (FunctionCallInfo) palloc0(SizeForFunctionCallInfo(2));
+ random_len_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo));
+ fmgr_info(F_RANDOM_INT4_INT4, random_len_flinfo);
+ InitFunctionCallInfoData(*random_len_fcinfo, random_len_flinfo, 2,
+ InvalidOid, NULL, NULL);
+
+ random_len_fcinfo->args[0].isnull = false;
+ random_len_fcinfo->args[1].isnull = false;
+ random_len_fcinfo->args[0].value = minlen;
+ random_len_fcinfo->args[1].value = maxlen;
+
+ /* build the random_val_fcinfo */
+ random_val_fcinfo = (FunctionCallInfo) palloc0(SizeForFunctionCallInfo(2));
+ random_val_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo));
+ fmgr_info(random_fn_oid, random_val_flinfo);
+ InitFunctionCallInfoData(*random_val_fcinfo, random_val_flinfo, 2,
+ InvalidOid, NULL, NULL);
+
+ random_val_fcinfo->args[0].isnull = false;
+ random_val_fcinfo->args[1].isnull = false;
+ random_val_fcinfo->args[0].value = minval;
+ random_val_fcinfo->args[1].value = maxval;
+
+ fctx->random_val_fcinfo = random_val_fcinfo;
+ fctx->random_len_fcinfo = random_len_fcinfo;
+
+ funcctx->user_fctx = fctx;
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ /* stuff done on every call of the function */
+ funcctx = SRF_PERCALL_SETUP();
+
+ fctx = funcctx->user_fctx;
+
+ if (funcctx->call_cntr < funcctx->max_calls)
+ {
+ int array_len;
+ int i;
+ Datum *results;
+
+ array_len = Int32GetDatum(FunctionCallInvoke(fctx->random_len_fcinfo));
+
+ results = palloc(array_len * sizeof(Datum));
+
+ for(i = 0; i < array_len; i++)
+ results[i] = FunctionCallInvoke(fctx->random_val_fcinfo);
+
+
+ SRF_RETURN_NEXT(funcctx, PointerGetDatum(
+ construct_array_builtin(results, array_len, datatype)));
+ }
+ else
+ /* do when there is no more left */
+ SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(rand_array_int);
+Datum
+rand_array_int(PG_FUNCTION_ARGS)
+{
+ return rand_array_internal(fcinfo, INT4OID);
+}
+
+
+PG_FUNCTION_INFO_V1(rand_array_bigint);
+Datum
+rand_array_bigint(PG_FUNCTION_ARGS)
+{
+ return rand_array_internal(fcinfo, INT8OID);
+}
+
+
+PG_FUNCTION_INFO_V1(rand_array_float8);
+Datum
+rand_array_float8(PG_FUNCTION_ARGS)
+{
+ return rand_array_internal(fcinfo, FLOAT8OID);
+}
+
+
+PG_FUNCTION_INFO_V1(rand_array_numeric);
+Datum
+rand_array_numeric(PG_FUNCTION_ARGS)
+{
+ return rand_array_internal(fcinfo, NUMERICOID);
+}
+
+
/*
* get_normal_pair()
* Assigns normally distributed (Gaussian) values to a pair of provided
diff --git a/contrib/tablefunc/tablefunc.control b/contrib/tablefunc/tablefunc.control
index 7b25d16170..9cc6222a4f 100644
--- a/contrib/tablefunc/tablefunc.control
+++ b/contrib/tablefunc/tablefunc.control
@@ -1,6 +1,6 @@
# tablefunc extension
comment = 'functions that manipulate whole tables, including crosstab'
-default_version = '1.0'
+default_version = '1.1'
module_pathname = '$libdir/tablefunc'
relocatable = true
trusted = true
diff --git a/doc/src/sgml/tablefunc.sgml b/doc/src/sgml/tablefunc.sgml
index e10fe7009d..ae71374e91 100644
--- a/doc/src/sgml/tablefunc.sgml
+++ b/doc/src/sgml/tablefunc.sgml
@@ -53,6 +53,17 @@
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <function>rand_array</function> (<parameter>numvals</parameter> <type>integer</type>,<parameter>minlen</parameter> <type>int4</type>, <parameter>maxlen</parameter> <type>int4</type>,<parameter>minval</parameter> <type><replaceable>numeric_type</replaceable></type>, <parameter>maxval</parameter> <type><replaceable>numeric_type</replaceable></type> )
+ <returnvalue>setof <replaceable>numeric_type</replaceable>[]</returnvalue>
+ </para>
+ <para>
+ Produces a set of random numeric_type[], uses the same deterministic pseudo-random number generator as random().
+ </para>
+ </entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<function>crosstab</function> ( <parameter>sql</parameter> <type>text</type> )
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index e5c7e57a5d..0165d97d45 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -3404,6 +3404,12 @@ construct_array_builtin(Datum *elems, int nelems, Oid elmtype)
elmalign = TYPALIGN_INT;
break;
+ case FLOAT8OID:
+ elmlen = sizeof(float8);
+ elmbyval = FLOAT8PASSBYVAL;
+ elmalign = TYPALIGN_DOUBLE;
+ break;
+
case INT2OID:
elmlen = sizeof(int16);
elmbyval = true;
@@ -3436,6 +3442,7 @@ construct_array_builtin(Datum *elems, int nelems, Oid elmtype)
break;
case TEXTOID:
+ case NUMERICOID:
elmlen = -1;
elmbyval = false;
elmalign = TYPALIGN_INT;
--
2.45.1
On Wed, 28 Aug 2024 at 12:27, Andy Fan <zhihuifan1213@163.com> wrote:
Japin Li <japinli@hotmail.com> writes:
Thanks for updating the patch. Here are some comments.
+ if (minlen >= maxlen) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("minlen must be greater than maxlen.")));There error message should be "minlen must be smaller than maxlen", right?
+ if (minlen < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("minlen and maxlen must be greater than zero.")));Here the minlen might be zero, so the error message is incorrect.
How about use "minlen must be greater than or equal to zero"?Yes, you are right. A new version is attached, thanks for checking!
Nitpick, the minlen is smaller than maxlen, so the maxlen cannot be zero.
After giving it some more thought, it would also be helpful if maxlen is
equal to minlen.
For example, I want have each row has four items, I can use the following
SELECT rand_array(10, 4, 4, 50::int, 80::int);
OTOH, I find the range bound uses "less than or equal to", how about
replacing "smaller" with "less"?
--
Regrads,
Japin Li
Attachments:
less.patchtext/x-diffDownload
diff --git a/contrib/tablefunc/tablefunc.c b/contrib/tablefunc/tablefunc.c
index b24c70d538..69b276b285 100644
--- a/contrib/tablefunc/tablefunc.c
+++ b/contrib/tablefunc/tablefunc.c
@@ -306,10 +306,10 @@ rand_array_internal(FunctionCallInfo fcinfo, Oid datatype)
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("number of rows cannot be negative")));
- if (minlen >= maxlen)
+ if (minlen > maxlen)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("minlen must be smaller than maxlen.")));
+ errmsg("minlen must be less than or equal to maxlen.")));
if (minlen < 0)
ereport(ERROR,
Japin Li <japinli@hotmail.com> writes:
On Wed, 28 Aug 2024 at 12:27, Andy Fan <zhihuifan1213@163.com> wrote:
Japin Li <japinli@hotmail.com> writes:
Nitpick, the minlen is smaller than maxlen, so the maxlen cannot be zero.
After giving it some more thought, it would also be helpful if maxlen is
equal to minlen.For example, I want have each row has four items, I can use the following
SELECT rand_array(10, 4, 4, 50::int, 80::int);
Yes, that's a valid usage. the new vesion is attached. I have changed
the the commit entry [1] from "Waiting on Author" to "Needs review".
OTOH, I find the range bound uses "less than or equal to", how about
replacing "smaller" with "less"?
--
Best Regards
Andy Fan
Attachments:
v20240829-0001-Add-functions-rand_array-function-to-contr.patchtext/x-diffDownload
From 7a2f65606b7adb4e62fc3e831ae1bc752820b736 Mon Sep 17 00:00:00 2001
From: Andy Fan <zhihuifan1213@163.com>
Date: Mon, 26 Aug 2024 18:50:57 +0800
Subject: [PATCH v20240829 1/1] Add functions rand_array function to
contrib/tablefunc.
It produces an array of numeric_type with its length in the range of
[minlen,maxlen] and each value is in the range of [minval,maxval].
---
contrib/tablefunc/Makefile | 2 +-
contrib/tablefunc/expected/tablefunc.out | 105 ++++++++++++++
contrib/tablefunc/sql/tablefunc.sql | 12 ++
contrib/tablefunc/tablefunc--1.0--1.1.sql | 22 +++
contrib/tablefunc/tablefunc.c | 161 ++++++++++++++++++++++
contrib/tablefunc/tablefunc.control | 2 +-
doc/src/sgml/tablefunc.sgml | 11 ++
src/backend/utils/adt/arrayfuncs.c | 7 +
8 files changed, 320 insertions(+), 2 deletions(-)
create mode 100644 contrib/tablefunc/tablefunc--1.0--1.1.sql
diff --git a/contrib/tablefunc/Makefile b/contrib/tablefunc/Makefile
index 191a3a1d38..f0c67308fd 100644
--- a/contrib/tablefunc/Makefile
+++ b/contrib/tablefunc/Makefile
@@ -3,7 +3,7 @@
MODULES = tablefunc
EXTENSION = tablefunc
-DATA = tablefunc--1.0.sql
+DATA = tablefunc--1.0.sql tablefunc--1.0--1.1.sql
PGFILEDESC = "tablefunc - various functions that return tables"
REGRESS = tablefunc
diff --git a/contrib/tablefunc/expected/tablefunc.out b/contrib/tablefunc/expected/tablefunc.out
index ddece79029..586988881e 100644
--- a/contrib/tablefunc/expected/tablefunc.out
+++ b/contrib/tablefunc/expected/tablefunc.out
@@ -12,6 +12,111 @@ SELECT avg(normal_rand)::int, count(*) FROM normal_rand(100, 250, 0.2);
-- negative number of tuples
SELECT avg(normal_rand)::int, count(*) FROM normal_rand(-1, 250, 0.2);
ERROR: number of rows cannot be negative
+select setseed(0.8);
+ setseed
+---------
+
+(1 row)
+
+select rand_array(10, 0, 3, 50::int, 80::int);
+ rand_array
+------------
+ {57,58,61}
+ {}
+ {50,50,78}
+ {}
+ {52,52}
+ {55,79,61}
+ {72,78}
+ {}
+ {64,70}
+ {59,78}
+(10 rows)
+
+select setseed(0.8);
+ setseed
+---------
+
+(1 row)
+
+select rand_array(10, 0, 3, 50::bigint, 80::bigint);
+ rand_array
+------------
+ {57,58,61}
+ {}
+ {50,50,78}
+ {}
+ {52,52}
+ {55,79,61}
+ {72,78}
+ {}
+ {64,70}
+ {59,78}
+(10 rows)
+
+select setseed(0.8);
+ setseed
+---------
+
+(1 row)
+
+select rand_array(10, 0, 3, 50::float8, 80::float8);
+ rand_array
+----------------------------------------------------------------
+ {0.2328377064354923,0.2557039868740123,0.3469304653544969}
+ {}
+ {0.000835197565928425,0.011969003780824217,0.8981013605381762}
+ {}
+ {0.08527426384290426,0.06616981677920686}
+ {0.1809935627858228,0.9105400064668479,0.34989620485343464}
+ {0.7139255282088908,0.8803672957879083}
+ {}
+ {0.45650965657162845,0.6253950409407578}
+ {0.30472421749069234,0.8833976124588141}
+(10 rows)
+
+select setseed(0.8);
+ setseed
+---------
+
+(1 row)
+
+select rand_array(10, 0, 3, 50::float4, 80::float4);
+ rand_array
+----------------------------------------------------------------
+ {0.2328377064354923,0.2557039868740123,0.3469304653544969}
+ {}
+ {0.000835197565928425,0.011969003780824217,0.8981013605381762}
+ {}
+ {0.08527426384290426,0.06616981677920686}
+ {0.1809935627858228,0.9105400064668479,0.34989620485343464}
+ {0.7139255282088908,0.8803672957879083}
+ {}
+ {0.45650965657162845,0.6253950409407578}
+ {0.30472421749069234,0.8833976124588141}
+(10 rows)
+
+select setseed(0.8);
+ setseed
+---------
+
+(1 row)
+
+select rand_array(10, 0, 3, 50::numeric, 80::numeric);
+ rand_array
+------------
+ {57,58,61}
+ {}
+ {50,50,78}
+ {}
+ {52,52}
+ {55,79,61}
+ {72,78}
+ {}
+ {64,70}
+ {59,78}
+(10 rows)
+
--
-- crosstab()
--
diff --git a/contrib/tablefunc/sql/tablefunc.sql b/contrib/tablefunc/sql/tablefunc.sql
index 0fb8e40de2..39e51e8024 100644
--- a/contrib/tablefunc/sql/tablefunc.sql
+++ b/contrib/tablefunc/sql/tablefunc.sql
@@ -8,6 +8,18 @@ SELECT avg(normal_rand)::int, count(*) FROM normal_rand(100, 250, 0.2);
-- negative number of tuples
SELECT avg(normal_rand)::int, count(*) FROM normal_rand(-1, 250, 0.2);
+select setseed(0.8);
+select rand_array(10, 0, 3, 50::int, 80::int);
+select setseed(0.8);
+select rand_array(10, 0, 3, 50::bigint, 80::bigint);
+select setseed(0.8);
+select rand_array(10, 0, 3, 50::float8, 80::float8);
+select setseed(0.8);
+select rand_array(10, 0, 3, 50::float4, 80::float4);
+select setseed(0.8);
+select rand_array(10, 0, 3, 50::numeric, 80::numeric);
+
+
--
-- crosstab()
--
diff --git a/contrib/tablefunc/tablefunc--1.0--1.1.sql b/contrib/tablefunc/tablefunc--1.0--1.1.sql
new file mode 100644
index 0000000000..973f6aa68c
--- /dev/null
+++ b/contrib/tablefunc/tablefunc--1.0--1.1.sql
@@ -0,0 +1,22 @@
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION tablefunc UPDATE TO '1.1'" to load this file. \quit
+
+CREATE FUNCTION rand_array(numvals int, minlen int, maxlen int, minval int, maxval int)
+RETURNS setof int[]
+AS 'MODULE_PATHNAME','rand_array_int'
+LANGUAGE C VOLATILE STRICT;
+
+CREATE FUNCTION rand_array(numvals int, minlen int, maxlen int, minval bigint, maxval bigint)
+RETURNS setof bigint[]
+AS 'MODULE_PATHNAME','rand_array_bigint'
+LANGUAGE C VOLATILE STRICT;
+
+CREATE FUNCTION rand_array(numvals int, minlen int, maxlen int, minval float8, maxval float8)
+RETURNS setof float8[]
+AS 'MODULE_PATHNAME','rand_array_float8'
+LANGUAGE C VOLATILE STRICT;
+
+CREATE FUNCTION rand_array(numvals int, minlen int, maxlen int, minval numeric, maxval numeric)
+RETURNS setof numeric[]
+AS 'MODULE_PATHNAME','rand_array_numeric'
+LANGUAGE C VOLATILE STRICT;
diff --git a/contrib/tablefunc/tablefunc.c b/contrib/tablefunc/tablefunc.c
index 7d1b5f5143..47b26e11fe 100644
--- a/contrib/tablefunc/tablefunc.c
+++ b/contrib/tablefunc/tablefunc.c
@@ -42,7 +42,9 @@
#include "lib/stringinfo.h"
#include "miscadmin.h"
#include "tablefunc.h"
+#include "utils/array.h"
#include "utils/builtins.h"
+#include "utils/fmgroids.h"
PG_MODULE_MAGIC;
@@ -91,6 +93,12 @@ typedef struct
bool use_carry; /* use second generated value */
} normal_rand_fctx;
+typedef struct
+{
+ FunctionCallInfo random_val_fcinfo;
+ FunctionCallInfo random_len_fcinfo;
+} rand_array_fctx;
+
#define xpfree(var_) \
do { \
if (var_ != NULL) \
@@ -269,6 +277,159 @@ normal_rand(PG_FUNCTION_ARGS)
SRF_RETURN_DONE(funcctx);
}
+static Datum
+rand_array_internal(FunctionCallInfo fcinfo, Oid datatype)
+{
+ FuncCallContext *funcctx;
+ Oid random_fn_oid;
+ int num_tuples = PG_GETARG_INT32(0);
+ int minlen = PG_GETARG_INT32(1);
+ int maxlen = PG_GETARG_INT32(2);
+ Datum minval = PG_GETARG_DATUM(3),
+ maxval = PG_GETARG_DATUM(4);
+ rand_array_fctx *fctx;
+
+ if (datatype == INT4OID)
+ random_fn_oid = F_RANDOM_INT4_INT4;
+ else if (datatype == INT8OID)
+ random_fn_oid = F_RANDOM_INT8_INT8;
+ else if (datatype == FLOAT8OID)
+ random_fn_oid = F_RANDOM_;
+ else if (datatype == NUMERICOID)
+ random_fn_oid = F_RANDOM_NUMERIC_NUMERIC;
+ else
+ elog(ERROR, "unsupported type %d for rand_array function.",
+ datatype);
+
+ if (num_tuples < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("number of rows cannot be negative")));
+
+ if (minlen > maxlen)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("minlen must not be greater than maxlen.")));
+
+ if (minlen < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("minlen and maxlen must be greater than or equal to zero.")));
+
+
+ /* stuff done only on the first call of the function */
+ if (SRF_IS_FIRSTCALL())
+ {
+ MemoryContext oldcontext;
+
+ FmgrInfo *random_len_flinfo, *random_val_flinfo;
+ FunctionCallInfo random_len_fcinfo, random_val_fcinfo;
+
+ /* create a function context for cross-call persistence */
+ funcctx = SRF_FIRSTCALL_INIT();
+
+ /*
+ * switch to memory context appropriate for multiple function calls
+ */
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ funcctx->max_calls = num_tuples;
+
+ /* allocate memory for user context */
+ fctx = (rand_array_fctx *) palloc(sizeof(rand_array_fctx));
+
+ /* build the random_len_fcinfo */
+ random_len_fcinfo = (FunctionCallInfo) palloc0(SizeForFunctionCallInfo(2));
+ random_len_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo));
+ fmgr_info(F_RANDOM_INT4_INT4, random_len_flinfo);
+ InitFunctionCallInfoData(*random_len_fcinfo, random_len_flinfo, 2,
+ InvalidOid, NULL, NULL);
+
+ random_len_fcinfo->args[0].isnull = false;
+ random_len_fcinfo->args[1].isnull = false;
+ random_len_fcinfo->args[0].value = minlen;
+ random_len_fcinfo->args[1].value = maxlen;
+
+ /* build the random_val_fcinfo */
+ random_val_fcinfo = (FunctionCallInfo) palloc0(SizeForFunctionCallInfo(2));
+ random_val_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo));
+ fmgr_info(random_fn_oid, random_val_flinfo);
+ InitFunctionCallInfoData(*random_val_fcinfo, random_val_flinfo, 2,
+ InvalidOid, NULL, NULL);
+
+ random_val_fcinfo->args[0].isnull = false;
+ random_val_fcinfo->args[1].isnull = false;
+ random_val_fcinfo->args[0].value = minval;
+ random_val_fcinfo->args[1].value = maxval;
+
+ fctx->random_val_fcinfo = random_val_fcinfo;
+ fctx->random_len_fcinfo = random_len_fcinfo;
+
+ funcctx->user_fctx = fctx;
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ /* stuff done on every call of the function */
+ funcctx = SRF_PERCALL_SETUP();
+
+ fctx = funcctx->user_fctx;
+
+ if (funcctx->call_cntr < funcctx->max_calls)
+ {
+ int array_len;
+ int i;
+ Datum *results;
+
+ array_len = Int32GetDatum(FunctionCallInvoke(fctx->random_len_fcinfo));
+
+ results = palloc(array_len * sizeof(Datum));
+
+ for(i = 0; i < array_len; i++)
+ results[i] = FunctionCallInvoke(fctx->random_val_fcinfo);
+
+
+ SRF_RETURN_NEXT(funcctx, PointerGetDatum(
+ construct_array_builtin(results, array_len, datatype)));
+ }
+ else
+ /* do when there is no more left */
+ SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(rand_array_int);
+Datum
+rand_array_int(PG_FUNCTION_ARGS)
+{
+ return rand_array_internal(fcinfo, INT4OID);
+}
+
+
+PG_FUNCTION_INFO_V1(rand_array_bigint);
+Datum
+rand_array_bigint(PG_FUNCTION_ARGS)
+{
+ return rand_array_internal(fcinfo, INT8OID);
+}
+
+
+PG_FUNCTION_INFO_V1(rand_array_float8);
+Datum
+rand_array_float8(PG_FUNCTION_ARGS)
+{
+ return rand_array_internal(fcinfo, FLOAT8OID);
+}
+
+
+PG_FUNCTION_INFO_V1(rand_array_numeric);
+Datum
+rand_array_numeric(PG_FUNCTION_ARGS)
+{
+ return rand_array_internal(fcinfo, NUMERICOID);
+}
+
+
/*
* get_normal_pair()
* Assigns normally distributed (Gaussian) values to a pair of provided
diff --git a/contrib/tablefunc/tablefunc.control b/contrib/tablefunc/tablefunc.control
index 7b25d16170..9cc6222a4f 100644
--- a/contrib/tablefunc/tablefunc.control
+++ b/contrib/tablefunc/tablefunc.control
@@ -1,6 +1,6 @@
# tablefunc extension
comment = 'functions that manipulate whole tables, including crosstab'
-default_version = '1.0'
+default_version = '1.1'
module_pathname = '$libdir/tablefunc'
relocatable = true
trusted = true
diff --git a/doc/src/sgml/tablefunc.sgml b/doc/src/sgml/tablefunc.sgml
index e10fe7009d..ae71374e91 100644
--- a/doc/src/sgml/tablefunc.sgml
+++ b/doc/src/sgml/tablefunc.sgml
@@ -53,6 +53,17 @@
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <function>rand_array</function> (<parameter>numvals</parameter> <type>integer</type>,<parameter>minlen</parameter> <type>int4</type>, <parameter>maxlen</parameter> <type>int4</type>,<parameter>minval</parameter> <type><replaceable>numeric_type</replaceable></type>, <parameter>maxval</parameter> <type><replaceable>numeric_type</replaceable></type> )
+ <returnvalue>setof <replaceable>numeric_type</replaceable>[]</returnvalue>
+ </para>
+ <para>
+ Produces a set of random numeric_type[], uses the same deterministic pseudo-random number generator as random().
+ </para>
+ </entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<function>crosstab</function> ( <parameter>sql</parameter> <type>text</type> )
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index e5c7e57a5d..0165d97d45 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -3404,6 +3404,12 @@ construct_array_builtin(Datum *elems, int nelems, Oid elmtype)
elmalign = TYPALIGN_INT;
break;
+ case FLOAT8OID:
+ elmlen = sizeof(float8);
+ elmbyval = FLOAT8PASSBYVAL;
+ elmalign = TYPALIGN_DOUBLE;
+ break;
+
case INT2OID:
elmlen = sizeof(int16);
elmbyval = true;
@@ -3436,6 +3442,7 @@ construct_array_builtin(Datum *elems, int nelems, Oid elmtype)
break;
case TEXTOID:
+ case NUMERICOID:
elmlen = -1;
elmbyval = false;
elmalign = TYPALIGN_INT;
--
2.45.1
On Thu, 29 Aug 2024 at 05:39, Andy Fan <zhihuifan1213@163.com> wrote:
Yes, that's a valid usage. the new vesion is attached. I have changed
the the commit entry [1] from "Waiting on Author" to "Needs review".
Note that this needs a rebase, following commit 4681ad4b2f.
Here are a few more review comments:
1). In the tests:
+select setseed(0.8);
+select rand_array(10, 0, 3, 50::int, 80::int);
+select setseed(0.8);
+select rand_array(10, 0, 3, 50::bigint, 80::bigint);
+select setseed(0.8);
+select rand_array(10, 0, 3, 50::float8, 80::float8);
+select setseed(0.8);
+select rand_array(10, 0, 3, 50::float4, 80::float4);
+select setseed(0.8);
+select rand_array(10, 0, 3, 50::numeric, 80::numeric);
this should really have a comment block to distinguish these new tests
from the preceeding normal_rand() tests.
2). It's worth also having tests for the error cases.
3). It's only necessary to call setseed() once to get a reproducible
set of results from then on.
4). I'd use setseed(0) or setseed(0.5), since those values are exactly
representable as double precision values, unlike 0.8, ensuring that it
works the same on all platforms. It might not matter, but why take the
risk?
5). The float8 case still has minimum and maximum value arguments that
it just ignores. It should either not take those arguments, or it
should respect them, and try to return float8 values in the requested
range. I suspect that trying to return float8 values in the requested
range would be hard, if not impossible, due to the inexact nature of
double precision arithmetic. That's why I suggested earlier having the
float8 function not take minval/maxval arguments, and just return
values in the range 0 to 1.
6). This new function:
+static Datum
+rand_array_internal(FunctionCallInfo fcinfo, Oid datatype)
+{
should have a comment block. In particular, it should document what
its inputs and outputs are.
7). This code:
+ int num_tuples = PG_GETARG_INT32(0);
+ int minlen = PG_GETARG_INT32(1);
+ int maxlen = PG_GETARG_INT32(2);
+ Datum minval = PG_GETARG_DATUM(3),
+ maxval = PG_GETARG_DATUM(4);
+ rand_array_fctx *fctx;
+
+ if (datatype == INT4OID)
+ random_fn_oid = F_RANDOM_INT4_INT4;
+ else if (datatype == INT8OID)
+ random_fn_oid = F_RANDOM_INT8_INT8;
+ else if (datatype == FLOAT8OID)
+ random_fn_oid = F_RANDOM_;
+ else if (datatype == NUMERICOID)
+ random_fn_oid = F_RANDOM_NUMERIC_NUMERIC;
+ else
+ elog(ERROR, "unsupported type %d for rand_array function.",
+ datatype);
+
+ if (num_tuples < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("number of rows cannot be negative")));
+
+ if (minlen > maxlen)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("minlen must not be greater than maxlen.")));
+
+ if (minlen < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("minlen and maxlen must be greater than or
equal to zero.")));
should be inside the "if (SRF_IS_FIRSTCALL())" block. There's no point
repeating all those checks for every call.
8). I think it would be neater to code the "if (datatype == INT4OID)
... else if ..." as a switch statement.
9). I would swap the last 2 bound checks round, and simplify the error
messages as follows:
if (minlen < 0)
ereport(ERROR,
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("minlen must be greater than or equal to zero"));
if (maxlen < minlen)
ereport(ERROR,
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("maxlen must be greater than or equal to minlen"));
Also note that primary error messages should not end with a period.
See https://www.postgresql.org/docs/current/error-style-guide.html
10). In this error:
+ elog(ERROR, "unsupported type %d for rand_array function.",
+ datatype);
"datatype" is of type Oid, which is unsigned, and so it should use
"%u" not "%d". Also, as above, it should not end with a period, so it
should be:
+ elog(ERROR, "unsupported type %u for rand_array function",
+ datatype);
11). If the float8 case is made to not have minval/maxval arguments, this code:
+ Datum minval = PG_GETARG_DATUM(3),
+ maxval = PG_GETARG_DATUM(4);
and the FunctionCallInfo setup code needs to be different for the
float8 case, in order to avoid reading and setting undefined
arguments. Perhaps introduce a "need_val_bounds" boolean variable,
based on the datatype.
12). This code:
+ random_len_fcinfo->args[0].value = minlen;
+ random_len_fcinfo->args[1].value = maxlen;
should really be:
+ random_len_fcinfo->args[0].value = Int32GetDatum(minlen);
+ random_len_fcinfo->args[1].value = Int32GetDatum(maxlen);
It amounts to the same thing, but documents the fact that it's
converting an integer to a Datum.
13). These new functions are significantly under-documented,
especially when compared to all the other functions on
https://www.postgresql.org/docs/current/tablefunc.html
They really should have their own subsection, along the same lines as
"F.41.1.1. Normal_rand", explaining what the functions do, what their
arguments mean, and giving a couple of usage examples.
Regards,
Dean
Hello Dean,
Thanks for the detailed feedback! Here is the rebased version.
1). In the tests:
+select setseed(0.8); +select rand_array(10, 0, 3, 50::int, 80::int);
..
this should really have a comment block to distinguish these new tests
from the preceeding normal_rand() tests.
3). It's only necessary to call setseed() once to get a reproducible
set of results from then on.
4). I'd use setseed(0) or setseed(0.5), since those values are exactly
representable as double precision values, unlike 0.8, ensuring that it
works the same on all platforms. It might not matter, but why take the
risk?
All Done, very interesting about the reason why the 0.8 is bad.
2). It's worth also having tests for the error cases.
After the code refactor, looks the ERROR is not reachable, so I added
both Assert(false) and elog(ERROR) with no test case for that.
5). The float8 case still has minimum and maximum value arguments that
it just ignores. It should either not take those arguments, or it
should respect them, and try to return float8 values in the requested
range. I suspect that trying to return float8 values in the requested
range would be hard, if not impossible, due to the inexact nature of
double precision arithmetic. That's why I suggested earlier having the
float8 function not take minval/maxval arguments, and just return
values in the range 0 to 1.11). If the float8 case is made to not have minval/maxval arguments, this code:
+ Datum minval = PG_GETARG_DATUM(3),
+ maxval = PG_GETARG_DATUM(4);and the FunctionCallInfo setup code needs to be different for the
float8 case, in order to avoid reading and setting undefined
arguments. Perhaps introduce a "need_val_bounds" boolean variable,
based on the datatype.
I should have noticed the float8 issue after reading your first
reply.. Actually I don't have speical reason for which I have to support
float8. At least when I working on the patch, I want some toast and
non-toast data type, so supporting both int/bigint and numeric should be
good, at least for my purpose. So in the attached version, I removed the
support for float8 for simplification.
6). This new function:
+static Datum +rand_array_internal(FunctionCallInfo fcinfo, Oid datatype) +{should have a comment block. In particular, it should document what
its inputs and outputs are.
Done.
7). This code:
+ int num_tuples = PG_GETARG_INT32(0); + int minlen = PG_GETARG_INT32(1); + int maxlen = PG_GETARG_INT32(2); + Datum minval = PG_GETARG_DATUM(3), + maxval = PG_GETARG_DATUM(4); + rand_array_fctx *fctx; + + if (datatype == INT4OID) + random_fn_oid = F_RANDOM_INT4_INT4; + else if (datatype == INT8OID)
...
should be inside the "if (SRF_IS_FIRSTCALL())" block. There's no point
repeating all those checks for every call.
Done.
8). I think it would be neater to code the "if (datatype == INT4OID)
... else if ..." as a switch statement.
Done.
9). I would swap the last 2 bound checks round, and simplify the error
messages as follows:
Done.
Also note that primary error messages should not end with a period.
See https://www.postgresql.org/docs/current/error-style-guide.html
Thanks for sharing this!
10). In this error:
+ elog(ERROR, "unsupported type %d for rand_array function.", + datatype);"datatype" is of type Oid, which is unsigned, and so it should use
"%u" not "%d". Also, as above, it should not end with a period, so it
should be:+ elog(ERROR, "unsupported type %u for rand_array function", + datatype);
Done.
12). This code:
+ random_len_fcinfo->args[0].value = minlen; + random_len_fcinfo->args[1].value = maxlen;should really be:
+ random_len_fcinfo->args[0].value = Int32GetDatum(minlen); + random_len_fcinfo->args[1].value = Int32GetDatum(maxlen);It amounts to the same thing, but documents the fact that it's
converting an integer to a Datum.
Done.
13). These new functions are significantly under-documented,
especially when compared to all the other functions on
https://www.postgresql.org/docs/current/tablefunc.htmlThey really should have their own subsection, along the same lines as
"F.41.1.1. Normal_rand", explaining what the functions do, what their
arguments mean, and giving a couple of usage examples.
Done, very impresive feedback and I know why PostgreSQL can always have
user friendly documentation now.
--
Best Regards
Andy Fan
Attachments:
v20241016-0001-Add-functions-rand_array-function-to-contr.patchtext/x-diffDownload
From 2024871241c35959a259e54a8151d4e9372dbfbf Mon Sep 17 00:00:00 2001
From: Andy Fan <zhihuifan1213@163.com>
Date: Mon, 26 Aug 2024 18:50:57 +0800
Subject: [PATCH v20241016 1/1] Add functions rand_array function to
contrib/tablefunc.
It produces an array of numeric_type with its length in the range of
[minlen,maxlen] and each value is in the range of [minval,maxval].
---
contrib/tablefunc/Makefile | 2 +-
contrib/tablefunc/expected/tablefunc.out | 55 +++++++
contrib/tablefunc/sql/tablefunc.sql | 11 ++
contrib/tablefunc/tablefunc--1.0--1.1.sql | 17 +++
contrib/tablefunc/tablefunc.c | 168 ++++++++++++++++++++++
contrib/tablefunc/tablefunc.control | 2 +-
doc/src/sgml/tablefunc.sgml | 88 +++++++++++-
src/backend/utils/adt/arrayfuncs.c | 1 +
8 files changed, 341 insertions(+), 3 deletions(-)
create mode 100644 contrib/tablefunc/tablefunc--1.0--1.1.sql
diff --git a/contrib/tablefunc/Makefile b/contrib/tablefunc/Makefile
index 191a3a1d38..f0c67308fd 100644
--- a/contrib/tablefunc/Makefile
+++ b/contrib/tablefunc/Makefile
@@ -3,7 +3,7 @@
MODULES = tablefunc
EXTENSION = tablefunc
-DATA = tablefunc--1.0.sql
+DATA = tablefunc--1.0.sql tablefunc--1.0--1.1.sql
PGFILEDESC = "tablefunc - various functions that return tables"
REGRESS = tablefunc
diff --git a/contrib/tablefunc/expected/tablefunc.out b/contrib/tablefunc/expected/tablefunc.out
index ddece79029..2562da6b0f 100644
--- a/contrib/tablefunc/expected/tablefunc.out
+++ b/contrib/tablefunc/expected/tablefunc.out
@@ -12,6 +12,61 @@ SELECT avg(normal_rand)::int, count(*) FROM normal_rand(100, 250, 0.2);
-- negative number of tuples
SELECT avg(normal_rand)::int, count(*) FROM normal_rand(-1, 250, 0.2);
ERROR: number of rows cannot be negative
+--
+-- random_array()
+-- produce the result with a stable seed.
+--
+select setseed(0);
+ setseed
+---------
+
+(1 row)
+
+select rand_array(10, 0, 3, 50::int, 80::int);
+ rand_array
+------------
+ {63,71,66}
+ {64}
+ {54}
+ {72,64,60}
+ {75}
+ {53,73}
+ {69}
+ {74,67}
+ {65}
+ {73}
+(10 rows)
+
+select rand_array(10, 0, 3, 50::bigint, 80::bigint);
+ rand_array
+------------
+ {}
+ {59,53}
+ {72}
+ {80,79}
+ {71}
+ {80,80}
+ {61,64}
+ {62,76,80}
+ {}
+ {}
+(10 rows)
+
+select rand_array(10, 0, 3, 50::numeric, 80::numeric);
+ rand_array
+------------
+ {77,66,73}
+ {51,53}
+ {65,54}
+ {72}
+ {55,72}
+ {}
+ {55,70,64}
+ {75}
+ {}
+ {}
+(10 rows)
+
--
-- crosstab()
--
diff --git a/contrib/tablefunc/sql/tablefunc.sql b/contrib/tablefunc/sql/tablefunc.sql
index 0fb8e40de2..f474c1ab6e 100644
--- a/contrib/tablefunc/sql/tablefunc.sql
+++ b/contrib/tablefunc/sql/tablefunc.sql
@@ -8,6 +8,17 @@ SELECT avg(normal_rand)::int, count(*) FROM normal_rand(100, 250, 0.2);
-- negative number of tuples
SELECT avg(normal_rand)::int, count(*) FROM normal_rand(-1, 250, 0.2);
+
+--
+-- random_array()
+-- produce the result with a stable seed.
+--
+select setseed(0);
+select rand_array(10, 0, 3, 50::int, 80::int);
+select rand_array(10, 0, 3, 50::bigint, 80::bigint);
+select rand_array(10, 0, 3, 50::numeric, 80::numeric);
+
+
--
-- crosstab()
--
diff --git a/contrib/tablefunc/tablefunc--1.0--1.1.sql b/contrib/tablefunc/tablefunc--1.0--1.1.sql
new file mode 100644
index 0000000000..baf9176e98
--- /dev/null
+++ b/contrib/tablefunc/tablefunc--1.0--1.1.sql
@@ -0,0 +1,17 @@
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION tablefunc UPDATE TO '1.1'" to load this file. \quit
+
+CREATE FUNCTION rand_array(numvals int, minlen int, maxlen int, minval int, maxval int)
+RETURNS setof int[]
+AS 'MODULE_PATHNAME','rand_array_int'
+LANGUAGE C VOLATILE STRICT;
+
+CREATE FUNCTION rand_array(numvals int, minlen int, maxlen int, minval bigint, maxval bigint)
+RETURNS setof bigint[]
+AS 'MODULE_PATHNAME','rand_array_bigint'
+LANGUAGE C VOLATILE STRICT;
+
+CREATE FUNCTION rand_array(numvals int, minlen int, maxlen int, minval numeric, maxval numeric)
+RETURNS setof numeric[]
+AS 'MODULE_PATHNAME','rand_array_numeric'
+LANGUAGE C VOLATILE STRICT;
diff --git a/contrib/tablefunc/tablefunc.c b/contrib/tablefunc/tablefunc.c
index 2a25607a2a..fc7099612d 100644
--- a/contrib/tablefunc/tablefunc.c
+++ b/contrib/tablefunc/tablefunc.c
@@ -42,7 +42,9 @@
#include "lib/stringinfo.h"
#include "miscadmin.h"
#include "tablefunc.h"
+#include "utils/array.h"
#include "utils/builtins.h"
+#include "utils/fmgroids.h"
PG_MODULE_MAGIC;
@@ -91,6 +93,12 @@ typedef struct
bool use_carry; /* use second generated value */
} normal_rand_fctx;
+typedef struct
+{
+ FunctionCallInfo random_val_fcinfo;
+ FunctionCallInfo random_len_fcinfo;
+} rand_array_fctx;
+
#define xpfree(var_) \
do { \
if (var_ != NULL) \
@@ -269,6 +277,166 @@ normal_rand(PG_FUNCTION_ARGS)
SRF_RETURN_DONE(funcctx);
}
+/*
+ * rand_array_internal
+ * Fill the requested number of array with random values for the
+ * given range and type into fcinfo.
+ *
+ * Inputs:
+ * fcinfo: includes the number of rows, minlen and maxlen for the array,
+ * minval and maxval for the element in the array.
+ * datatype: the data type for the array element.
+ */
+static Datum
+rand_array_internal(FunctionCallInfo fcinfo, Oid datatype)
+{
+ FuncCallContext *funcctx;
+ rand_array_fctx *fctx;
+
+ /* stuff done only on the first call of the function */
+ if (SRF_IS_FIRSTCALL())
+ {
+ int num_tuples = PG_GETARG_INT32(0);
+ int minlen = PG_GETARG_INT32(1);
+ int maxlen = PG_GETARG_INT32(2);
+ Datum minval = PG_GETARG_DATUM(3),
+ maxval = PG_GETARG_DATUM(4);
+
+ MemoryContext oldcontext;
+
+ FmgrInfo *random_len_flinfo, *random_val_flinfo;
+ FunctionCallInfo random_len_fcinfo, random_val_fcinfo;
+ Oid random_fn_oid;
+
+ switch(datatype)
+ {
+ case INT4OID:
+ random_fn_oid = F_RANDOM_INT4_INT4;
+ break;
+ case INT8OID:
+ random_fn_oid = F_RANDOM_INT8_INT8;
+ break;
+ case NUMERICOID:
+ random_fn_oid = F_RANDOM_NUMERIC_NUMERIC;
+ break;
+ default:
+ elog(ERROR, "unsupported type %u for rand_array function",
+ datatype);
+ break;
+ }
+
+ if (num_tuples < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("number of rows cannot be negative")));
+
+ if (minlen < 0)
+ ereport(ERROR,
+ errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("minlen must be greater than or equal to zero"));
+
+ if (maxlen < minlen)
+ ereport(ERROR,
+ errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("maxlen must be greater than or equal to minlen"));
+
+ /* create a function context for cross-call persistence */
+ funcctx = SRF_FIRSTCALL_INIT();
+
+ /*
+ * switch to memory context appropriate for multiple function calls
+ */
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ funcctx->max_calls = num_tuples;
+
+ /* allocate memory for user context */
+ fctx = (rand_array_fctx *) palloc(sizeof(rand_array_fctx));
+
+ /* build the random_len_fcinfo */
+ random_len_fcinfo = (FunctionCallInfo) palloc0(SizeForFunctionCallInfo(2));
+ random_len_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo));
+ fmgr_info(F_RANDOM_INT4_INT4, random_len_flinfo);
+ InitFunctionCallInfoData(*random_len_fcinfo, random_len_flinfo, 2,
+ InvalidOid, NULL, NULL);
+
+ random_len_fcinfo->args[0].isnull = false;
+ random_len_fcinfo->args[1].isnull = false;
+ random_len_fcinfo->args[0].value = Int32GetDatum(minlen);
+ random_len_fcinfo->args[1].value = Int32GetDatum(maxlen);
+
+ /* build the random_val_fcinfo for the specified data type */
+ random_val_fcinfo = (FunctionCallInfo) palloc0(SizeForFunctionCallInfo(2));
+ random_val_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo));
+ fmgr_info(random_fn_oid, random_val_flinfo);
+ InitFunctionCallInfoData(*random_val_fcinfo, random_val_flinfo, 2,
+ InvalidOid, NULL, NULL);
+
+ random_val_fcinfo->args[0].isnull = false;
+ random_val_fcinfo->args[1].isnull = false;
+ random_val_fcinfo->args[0].value = minval;
+ random_val_fcinfo->args[1].value = maxval;
+
+ fctx->random_val_fcinfo = random_val_fcinfo;
+ fctx->random_len_fcinfo = random_len_fcinfo;
+
+ funcctx->user_fctx = fctx;
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ /* stuff done on every call of the function */
+ funcctx = SRF_PERCALL_SETUP();
+
+ fctx = funcctx->user_fctx;
+
+ if (funcctx->call_cntr < funcctx->max_calls)
+ {
+ int array_len;
+ int i;
+ Datum *results;
+
+ array_len = Int32GetDatum(FunctionCallInvoke(fctx->random_len_fcinfo));
+
+ results = palloc(array_len * sizeof(Datum));
+
+ for(i = 0; i < array_len; i++)
+ results[i] = FunctionCallInvoke(fctx->random_val_fcinfo);
+
+
+ SRF_RETURN_NEXT(funcctx, PointerGetDatum(
+ construct_array_builtin(results, array_len, datatype)));
+ }
+ else
+ /* do when there is no more left */
+ SRF_RETURN_DONE(funcctx);
+}
+
+
+PG_FUNCTION_INFO_V1(rand_array_int);
+Datum
+rand_array_int(PG_FUNCTION_ARGS)
+{
+ return rand_array_internal(fcinfo, INT4OID);
+}
+
+
+PG_FUNCTION_INFO_V1(rand_array_bigint);
+Datum
+rand_array_bigint(PG_FUNCTION_ARGS)
+{
+ return rand_array_internal(fcinfo, INT8OID);
+}
+
+
+PG_FUNCTION_INFO_V1(rand_array_numeric);
+Datum
+rand_array_numeric(PG_FUNCTION_ARGS)
+{
+ return rand_array_internal(fcinfo, NUMERICOID);
+}
+
+
/*
* get_normal_pair()
* Assigns normally distributed (Gaussian) values to a pair of provided
diff --git a/contrib/tablefunc/tablefunc.control b/contrib/tablefunc/tablefunc.control
index 7b25d16170..9cc6222a4f 100644
--- a/contrib/tablefunc/tablefunc.control
+++ b/contrib/tablefunc/tablefunc.control
@@ -1,6 +1,6 @@
# tablefunc extension
comment = 'functions that manipulate whole tables, including crosstab'
-default_version = '1.0'
+default_version = '1.1'
module_pathname = '$libdir/tablefunc'
relocatable = true
trusted = true
diff --git a/doc/src/sgml/tablefunc.sgml b/doc/src/sgml/tablefunc.sgml
index e10fe7009d..6eab69a129 100644
--- a/doc/src/sgml/tablefunc.sgml
+++ b/doc/src/sgml/tablefunc.sgml
@@ -53,6 +53,17 @@
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <function>rand_array</function> (<parameter>numvals</parameter> <type>integer</type>,<parameter>minlen</parameter> <type>int4</type>, <parameter>maxlen</parameter> <type>int4</type>,<parameter>minval</parameter> <type><replaceable>numeric_type</replaceable></type>, <parameter>maxval</parameter> <type><replaceable>numeric_type</replaceable></type> )
+ <returnvalue>setof <replaceable>numeric_type</replaceable>[]</returnvalue>
+ </para>
+ <para>
+ Produces a set of random numeric_type[], uses the same deterministic pseudo-random number generator as random().
+ </para>
+ </entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<function>crosstab</function> ( <parameter>sql</parameter> <type>text</type> )
@@ -849,7 +860,82 @@ SELECT * FROM connectby('connectby_tree', 'keyid', 'parent_keyid', 'pos', 'row2'
(6 rows)
</programlisting>
</para>
- </sect3>
+ </sect3>
+
+ <sect3 id="tablefunc-functions-rand-array">
+ <title><function>rand_array</function></title>
+
+ <indexterm>
+ <primary>rand_array</primary>
+ </indexterm>
+
+ <synopsis>
+ rand_array(int numvals, int minlen, int maxlen, int numeric, int numeric) returns setof numeric
+ </synopsis>
+
+ <para>
+ <function>rand_array</function> produces a set of array with its lenght belongs
+ to [minlen, maxlen] and each element in the array belongs to [minval, maxval].
+ </para>
+
+ <para>
+ <parameter>numvals</parameter> is the number of values to be returned
+ from the function. The array length is controlled in [<parameter>minlen</parameter>,
+ <parameter>maxlen</parameter>], and the value of each element in the array
+ is controlled in [<parameter>minval</parameter>, <parameter>maxval</parameter>]
+ </para>
+
+ <para>
+ For example, this call requests 10 arrays with each array's length is between
+ 3 and 5, and each element in the array is between 2.1 and 3.5. The final value
+ follows the impact of setseed().
+ </para>
+
+ <screen>
+test=# SELECT * FROM rand_array(10, 3, 5, 3.0, 5.0);
+ rand_array
+-----------------------
+ {5.0,3.2,3.6,3.7,4.7}
+ {5.0,4.0,5.0,4.8,4.3}
+ {3.5,4.3,4.3,3.7}
+ {3.8,3.9,4.8,5.0,4.2}
+ {5.0,4.7,4.9}
+ {3.5,5.0,3.5,4.8}
+ {4.9,3.8,3.6,4.2}
+ {4.0,4.5,3.5}
+ {4.2,3.4,3.8,3.8,3.9}
+ {4.4,4.2,4.0,4.1}
+(10 rows)
+ </screen>
+
+ <para>
+ Besides numeric, both int4 and int8 are supported.
+ </para>
+
+ <screen>
+test=# SELECT pg_typeof(rand_array), * FROM rand_array(5, 3, 5, 3::int4, 5.0::int4);
+ pg_typeof | rand_array
+-----------+-------------
+ integer[] | {5,5,5,4,5}
+ integer[] | {4,5,3,4}
+ integer[] | {4,5,4,3}
+ integer[] | {5,4,4,3}
+ integer[] | {3,4,3,3,5}
+(5 rows)
+
+test=# SELECT pg_typeof(rand_array), * FROM rand_array(5, 3, 5, 3::int8, 5.0::int8);
+ pg_typeof | rand_array
+-----------+-------------
+ bigint[] | {3,5,5,4,4}
+ bigint[] | {5,4,5,5}
+ bigint[] | {3,4,3}
+ bigint[] | {5,3,5,5}
+ bigint[] | {5,5,5,4}
+(5 rows)
+ </screen>
+ </sect3>
+
+
</sect2>
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index a715e7e0b8..4255aede2b 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -3442,6 +3442,7 @@ construct_array_builtin(Datum *elems, int nelems, Oid elmtype)
break;
case TEXTOID:
+ case NUMERICOID:
elmlen = -1;
elmbyval = false;
elmalign = TYPALIGN_INT;
--
2.45.1
10). In this error:
+ elog(ERROR, "unsupported type %d for rand_array function.", + datatype);"datatype" is of type Oid, which is unsigned, and so it should use
"%u" not "%d". Also, as above, it should not end with a period, so it
should be:+ elog(ERROR, "unsupported type %u for rand_array function", + datatype);
I remember my IDE could detect such issue before, but failed this
time. I just checked more today and it is still failing. Looks the
checker is not stable and I can't find out the reason so far.
main.c
#include <stdio.h>
int main(int argc, char *argv[])
{
unsigned int i = 0;
int x = 2;
printf("i = %d\n", i);
printf("i = %u\n", x);
return 0;
}
All the following commands succeed without any warnings.
clang -O0 -g main.c -o main -Wall -Wformat
gcc -g main.c -o main -Wall -Wformat
scan-build clang -g main.c -o main -Wall -Wformat
cppcheck main.c
clang: 18.1.6
gcc: 13.3.0
Only "cppcheck --enable=all main.c" catch the warnning.
Any hints on this will be appreicated.
--
Best Regards
Andy Fan
On Sat, 26 Oct 2024 at 01:51, Andy Fan <zhihuifan1213@163.com> wrote:
10). In this error:
+ elog(ERROR, "unsupported type %d for rand_array function.", + datatype);"datatype" is of type Oid, which is unsigned, and so it should use
"%u" not "%d". Also, as above, it should not end with a period, so it
should be:+ elog(ERROR, "unsupported type %u for rand_array function", + datatype);All the following commands succeed without any warnings.
clang -O0 -g main.c -o main -Wall -Wformat
gcc -g main.c -o main -Wall -Wformat
This can be detected in gcc with -Wformat plus -Wformat-signedness flags.
I see that this has been discussed before (e.g., [1]/messages/by-id/CA+hUKGJ7EQm9extQAgrFZNNUKqRT8Vv5t1tKqA-5nEcYn0+wNA@mail.gmail.com), but it doesn't
look like those patches were committed, and there are still many such
warnings, if you try compiling postgres with those flags.
[1]: /messages/by-id/CA+hUKGJ7EQm9extQAgrFZNNUKqRT8Vv5t1tKqA-5nEcYn0+wNA@mail.gmail.com
I don't know if anyone has any plans to pick up that work again, but
in any case, it seems wise to not add more.
Regards,
Dean
Dean Rasheed <dean.a.rasheed@gmail.com> writes:
On Sat, 26 Oct 2024 at 01:51, Andy Fan <zhihuifan1213@163.com> wrote:
10). In this error:
+ elog(ERROR, "unsupported type %d for rand_array function.", + datatype);"datatype" is of type Oid, which is unsigned, and so it should use
"%u" not "%d". Also, as above, it should not end with a period, so it
should be:+ elog(ERROR, "unsupported type %u for rand_array function", + datatype);All the following commands succeed without any warnings.
clang -O0 -g main.c -o main -Wall -Wformat
gcc -g main.c -o main -Wall -WformatThis can be detected in gcc with -Wformat plus -Wformat-signedness
flags.
Yes, this one works. I didn't realize we have "-Wformat-signedness"
subsection after we already have "-Wformat". I have added this one into
my toolset.
For recording purpose, clang doesn't support this option until now.
I see that this has been discussed before (e.g., [1]), but it doesn't
look like those patches were committed, and there are still many such
warnings, if you try compiling postgres with those flags.
OK, Thanks for the information. Currently I add the "c/c++-gcc" checker
for my c file, it just trigger when I am writting a file. so the
warnings in other places probably doesn't bother me.
[1]
/messages/by-id/CA+hUKGJ7EQm9extQAgrFZNNUKqRT8Vv5t1tKqA-5nEcYn0+wNA@mail.gmail.comI don't know if anyone has any plans to pick up that work again.
I can take that very soon.
but in any case, it seems wise to not add more.
Very true.
Thank you for all your helps on this series.
--
Best Regards
Andy Fan
On Wed, 16 Oct 2024 at 08:43, Andy Fan <zhihuifan1213@163.com> wrote:
Thanks for the detailed feedback! Here is the rebased version.
I took another look at this and I think it's in reasonable shape.
I'm attaching an update, rebasing it on top of 9be4e5d293.
Also it was missing a required update to the meson.build file --
that's the immediate cause of the other cfbot failures.
The rest is just cosmetic tidying up, fixing indentation, tweaking
comments, and the like. I also hacked on the docs a bit -- the
synopsis only listed one of the new function signatures for some
reason. After fixing that, I think it's sufficient to just list one
usage example.
Regards,
Dean
Attachments:
v3-0001-tablefunc-Add-rand_array-functions.patchtext/x-patch; charset=US-ASCII; name=v3-0001-tablefunc-Add-rand_array-functions.patchDownload
From 2a14be071dd2e721e768fdbaa57b096d509773aa Mon Sep 17 00:00:00 2001
From: Dean Rasheed <dean.a.rasheed@gmail.com>
Date: Wed, 30 Oct 2024 08:41:41 +0000
Subject: [PATCH v3] tablefunc: Add rand_array() functions.
These functions return sets of random-length arrays, containing
uniformly distributed random values of type integer, bigint, or
numeric.
Andy Fan, reviewed by Jim Jones, Japin Li, and Dean Rasheed.
Discussion: https://postgr.es/m/87plssezpc.fsf@163.com
---
contrib/tablefunc/Makefile | 2 +-
contrib/tablefunc/expected/tablefunc.out | 66 ++++++++
contrib/tablefunc/meson.build | 1 +
contrib/tablefunc/sql/tablefunc.sql | 16 ++
contrib/tablefunc/tablefunc--1.0--1.1.sql | 19 +++
contrib/tablefunc/tablefunc.c | 197 ++++++++++++++++++++++
contrib/tablefunc/tablefunc.control | 2 +-
doc/src/sgml/tablefunc.sgml | 66 ++++++++
src/backend/utils/adt/arrayfuncs.c | 1 +
src/tools/pgindent/typedefs.list | 1 +
10 files changed, 369 insertions(+), 2 deletions(-)
create mode 100644 contrib/tablefunc/tablefunc--1.0--1.1.sql
diff --git a/contrib/tablefunc/Makefile b/contrib/tablefunc/Makefile
index 191a3a1d38..f0c67308fd 100644
--- a/contrib/tablefunc/Makefile
+++ b/contrib/tablefunc/Makefile
@@ -3,7 +3,7 @@
MODULES = tablefunc
EXTENSION = tablefunc
-DATA = tablefunc--1.0.sql
+DATA = tablefunc--1.0.sql tablefunc--1.0--1.1.sql
PGFILEDESC = "tablefunc - various functions that return tables"
REGRESS = tablefunc
diff --git a/contrib/tablefunc/expected/tablefunc.out b/contrib/tablefunc/expected/tablefunc.out
index ddece79029..9c3f7529d5 100644
--- a/contrib/tablefunc/expected/tablefunc.out
+++ b/contrib/tablefunc/expected/tablefunc.out
@@ -13,6 +13,72 @@ SELECT avg(normal_rand)::int, count(*) FROM normal_rand(100, 250, 0.2);
SELECT avg(normal_rand)::int, count(*) FROM normal_rand(-1, 250, 0.2);
ERROR: number of rows cannot be negative
--
+-- rand_array()
+-- use setseed() to get stable results
+--
+SELECT setseed(0);
+ setseed
+---------
+
+(1 row)
+
+SELECT pg_typeof(rand_array), * FROM rand_array(10, 0, 3, 50::int, 80::int);
+ pg_typeof | rand_array
+-----------+------------
+ integer[] | {63,71,66}
+ integer[] | {64}
+ integer[] | {54}
+ integer[] | {72,64,60}
+ integer[] | {75}
+ integer[] | {53,73}
+ integer[] | {69}
+ integer[] | {74,67}
+ integer[] | {65}
+ integer[] | {73}
+(10 rows)
+
+SELECT pg_typeof(rand_array), * FROM rand_array(10, 0, 3, 50::bigint, 80::bigint);
+ pg_typeof | rand_array
+-----------+------------
+ bigint[] | {}
+ bigint[] | {59,53}
+ bigint[] | {72}
+ bigint[] | {80,79}
+ bigint[] | {71}
+ bigint[] | {80,80}
+ bigint[] | {61,64}
+ bigint[] | {62,76,80}
+ bigint[] | {}
+ bigint[] | {}
+(10 rows)
+
+SELECT pg_typeof(rand_array), * FROM rand_array(10, 0, 3, 50::numeric, 80::numeric);
+ pg_typeof | rand_array
+-----------+------------
+ numeric[] | {77,66,73}
+ numeric[] | {51,53}
+ numeric[] | {65,54}
+ numeric[] | {72}
+ numeric[] | {55,72}
+ numeric[] | {}
+ numeric[] | {55,70,64}
+ numeric[] | {75}
+ numeric[] | {}
+ numeric[] | {}
+(10 rows)
+
+-- negative number of tuples
+SELECT pg_typeof(rand_array), * FROM rand_array(-1, 0, 3, 50::int, 80::int);
+ERROR: number of rows cannot be negative
+-- invalid length bounds
+SELECT pg_typeof(rand_array), * FROM rand_array(10, -1, 3, 50::int, 80::int);
+ERROR: minlen must be greater than or equal to zero
+SELECT pg_typeof(rand_array), * FROM rand_array(10, 3, 0, 50::int, 80::int);
+ERROR: maxlen must be greater than or equal to minlen
+-- invalid value bounds
+SELECT pg_typeof(rand_array), * FROM rand_array(10, 0, 3, 80::int, 50::int);
+ERROR: lower bound must be less than or equal to upper bound
+--
-- crosstab()
--
CREATE TABLE ct(id int, rowclass text, rowid text, attribute text, val text);
diff --git a/contrib/tablefunc/meson.build b/contrib/tablefunc/meson.build
index dccf3b3758..f794163ecc 100644
--- a/contrib/tablefunc/meson.build
+++ b/contrib/tablefunc/meson.build
@@ -18,6 +18,7 @@ contrib_targets += tablefunc
install_data(
'tablefunc--1.0.sql',
+ 'tablefunc--1.0--1.1.sql',
'tablefunc.control',
kwargs: contrib_data_args,
)
diff --git a/contrib/tablefunc/sql/tablefunc.sql b/contrib/tablefunc/sql/tablefunc.sql
index 0fb8e40de2..f83ccb7ec4 100644
--- a/contrib/tablefunc/sql/tablefunc.sql
+++ b/contrib/tablefunc/sql/tablefunc.sql
@@ -8,6 +8,22 @@ SELECT avg(normal_rand)::int, count(*) FROM normal_rand(100, 250, 0.2);
-- negative number of tuples
SELECT avg(normal_rand)::int, count(*) FROM normal_rand(-1, 250, 0.2);
+--
+-- rand_array()
+-- use setseed() to get stable results
+--
+SELECT setseed(0);
+SELECT pg_typeof(rand_array), * FROM rand_array(10, 0, 3, 50::int, 80::int);
+SELECT pg_typeof(rand_array), * FROM rand_array(10, 0, 3, 50::bigint, 80::bigint);
+SELECT pg_typeof(rand_array), * FROM rand_array(10, 0, 3, 50::numeric, 80::numeric);
+-- negative number of tuples
+SELECT pg_typeof(rand_array), * FROM rand_array(-1, 0, 3, 50::int, 80::int);
+-- invalid length bounds
+SELECT pg_typeof(rand_array), * FROM rand_array(10, -1, 3, 50::int, 80::int);
+SELECT pg_typeof(rand_array), * FROM rand_array(10, 3, 0, 50::int, 80::int);
+-- invalid value bounds
+SELECT pg_typeof(rand_array), * FROM rand_array(10, 0, 3, 80::int, 50::int);
+
--
-- crosstab()
--
diff --git a/contrib/tablefunc/tablefunc--1.0--1.1.sql b/contrib/tablefunc/tablefunc--1.0--1.1.sql
new file mode 100644
index 0000000000..18e4aed826
--- /dev/null
+++ b/contrib/tablefunc/tablefunc--1.0--1.1.sql
@@ -0,0 +1,19 @@
+/* contrib/tablefunc/tablefunc--1.0--1.1.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION tablefunc UPDATE TO '1.1'" to load this file. \quit
+
+CREATE FUNCTION rand_array(numvals int, minlen int, maxlen int, minval int, maxval int)
+RETURNS setof int[]
+AS 'MODULE_PATHNAME','rand_array_int'
+LANGUAGE C VOLATILE STRICT;
+
+CREATE FUNCTION rand_array(numvals int, minlen int, maxlen int, minval bigint, maxval bigint)
+RETURNS setof bigint[]
+AS 'MODULE_PATHNAME','rand_array_bigint'
+LANGUAGE C VOLATILE STRICT;
+
+CREATE FUNCTION rand_array(numvals int, minlen int, maxlen int, minval numeric, maxval numeric)
+RETURNS setof numeric[]
+AS 'MODULE_PATHNAME','rand_array_numeric'
+LANGUAGE C VOLATILE STRICT;
diff --git a/contrib/tablefunc/tablefunc.c b/contrib/tablefunc/tablefunc.c
index 4b6ae2fe5c..ff5670d122 100644
--- a/contrib/tablefunc/tablefunc.c
+++ b/contrib/tablefunc/tablefunc.c
@@ -42,7 +42,9 @@
#include "funcapi.h"
#include "lib/stringinfo.h"
#include "miscadmin.h"
+#include "utils/array.h"
#include "utils/builtins.h"
+#include "utils/fmgroids.h"
PG_MODULE_MAGIC;
@@ -83,6 +85,9 @@ static void build_tuplestore_recursively(char *key_fld,
AttInMetadata *attinmeta,
Tuplestorestate *tupstore);
+/*
+ * cross-call data structure for SRF normal_rand()
+ */
typedef struct
{
float8 mean; /* mean of the distribution */
@@ -91,6 +96,15 @@ typedef struct
bool use_carry; /* use second generated value */
} normal_rand_fctx;
+/*
+ * cross-call data structure for SRF rand_array()
+ */
+typedef struct
+{
+ FunctionCallInfo random_len_fcinfo; /* random array length function */
+ FunctionCallInfo random_val_fcinfo; /* random array elem value function */
+} rand_array_fctx;
+
#define xpfree(var_) \
do { \
if (var_ != NULL) \
@@ -313,6 +327,189 @@ get_normal_pair(float8 *x1, float8 *x2)
}
}
+/*
+ * rand_array_internal()
+ * Return the requested number of random-length arrays, filled with
+ * random values of the specified datatype.
+ *
+ * Inputs:
+ * fcinfo: includes the number of arrays to return, minlen and maxlen array
+ * length bounds, and minval and maxval array element bounds.
+ * datatype: the datatype of the array elements.
+ *
+ * returns setof datatype[].
+ */
+static Datum
+rand_array_internal(FunctionCallInfo fcinfo, Oid datatype)
+{
+ FuncCallContext *funcctx;
+ rand_array_fctx *fctx;
+
+ /* stuff done only on the first call of the function */
+ if (SRF_IS_FIRSTCALL())
+ {
+ MemoryContext oldcontext;
+ int32 num_tuples;
+ int32 minlen;
+ int32 maxlen;
+ Datum minval;
+ Datum maxval;
+ Oid random_fn_oid;
+ FmgrInfo *random_len_flinfo;
+ FunctionCallInfo random_len_fcinfo;
+ FmgrInfo *random_val_flinfo;
+ FunctionCallInfo random_val_fcinfo;
+
+ /* create a function context for cross-call persistence */
+ funcctx = SRF_FIRSTCALL_INIT();
+
+ /*
+ * switch to memory context appropriate for multiple function calls
+ */
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ /* total number of tuples (arrays) to be returned */
+ num_tuples = PG_GETARG_INT32(0);
+ if (num_tuples < 0)
+ ereport(ERROR,
+ errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("number of rows cannot be negative"));
+ funcctx->max_calls = num_tuples;
+
+ /* minimum length of arrays returned */
+ minlen = PG_GETARG_INT32(1);
+ if (minlen < 0)
+ ereport(ERROR,
+ errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("minlen must be greater than or equal to zero"));
+
+ /* maximum length of arrays returned */
+ maxlen = PG_GETARG_INT32(2);
+ if (maxlen < minlen)
+ ereport(ERROR,
+ errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("maxlen must be greater than or equal to minlen"));
+
+ /* minimum value of array elements */
+ minval = PG_GETARG_DATUM(3);
+
+ /* maximum value of array elements */
+ maxval = PG_GETARG_DATUM(4);
+
+ /* function to return each array element */
+ switch (datatype)
+ {
+ case INT4OID:
+ random_fn_oid = F_RANDOM_INT4_INT4;
+ break;
+ case INT8OID:
+ random_fn_oid = F_RANDOM_INT8_INT8;
+ break;
+ case NUMERICOID:
+ random_fn_oid = F_RANDOM_NUMERIC_NUMERIC;
+ break;
+ default:
+ elog(ERROR, "unsupported type %u for rand_array function",
+ datatype);
+ random_fn_oid = 0; /* keep compiler quiet */
+ break;
+ }
+
+ /* allocate memory for user context */
+ fctx = (rand_array_fctx *) palloc(sizeof(rand_array_fctx));
+
+ /*
+ * Use fctx to keep track of upper and lower array length bounds and
+ * upper and lower array element value bounds from call to call. These
+ * bounds are held in the function call info for the array length and
+ * array element functions.
+ */
+ /* array length function: random(int, int) */
+ random_len_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo));
+ fmgr_info(F_RANDOM_INT4_INT4, random_len_flinfo);
+
+ random_len_fcinfo = (FunctionCallInfo) palloc0(SizeForFunctionCallInfo(2));
+ InitFunctionCallInfoData(*random_len_fcinfo, random_len_flinfo, 2,
+ InvalidOid, NULL, NULL);
+
+ random_len_fcinfo->args[0].value = Int32GetDatum(minlen);
+ random_len_fcinfo->args[0].isnull = false;
+ random_len_fcinfo->args[1].value = Int32GetDatum(maxlen);
+ random_len_fcinfo->args[1].isnull = false;
+
+ /* array element function: random(minval, maxval) for datatype */
+ random_val_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo));
+ fmgr_info(random_fn_oid, random_val_flinfo);
+
+ random_val_fcinfo = (FunctionCallInfo) palloc0(SizeForFunctionCallInfo(2));
+ InitFunctionCallInfoData(*random_val_fcinfo, random_val_flinfo, 2,
+ InvalidOid, NULL, NULL);
+
+ random_val_fcinfo->args[0].value = minval;
+ random_val_fcinfo->args[0].isnull = false;
+ random_val_fcinfo->args[1].value = maxval;
+ random_val_fcinfo->args[1].isnull = false;
+
+ /* store in SRF user context */
+ fctx->random_len_fcinfo = random_len_fcinfo;
+ fctx->random_val_fcinfo = random_val_fcinfo;
+
+ funcctx->user_fctx = fctx;
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ /* stuff done on every call of the function */
+ funcctx = SRF_PERCALL_SETUP();
+
+ fctx = funcctx->user_fctx;
+
+ if (funcctx->call_cntr < funcctx->max_calls)
+ {
+ int array_len;
+ Datum *array_elems;
+ ArrayType *array;
+
+ /*
+ * Return a(nother) random-length array, filled with random values.
+ */
+ array_len = DatumGetInt32(FunctionCallInvoke(fctx->random_len_fcinfo));
+
+ array_elems = palloc(array_len * sizeof(Datum));
+
+ for (int i = 0; i < array_len; i++)
+ array_elems[i] = FunctionCallInvoke(fctx->random_val_fcinfo);
+
+ array = construct_array_builtin(array_elems, array_len, datatype);
+
+ SRF_RETURN_NEXT(funcctx, PointerGetDatum(array));
+ }
+ else
+ /* do when there is no more left */
+ SRF_RETURN_DONE(funcctx);
+}
+
+PG_FUNCTION_INFO_V1(rand_array_int);
+Datum
+rand_array_int(PG_FUNCTION_ARGS)
+{
+ return rand_array_internal(fcinfo, INT4OID);
+}
+
+PG_FUNCTION_INFO_V1(rand_array_bigint);
+Datum
+rand_array_bigint(PG_FUNCTION_ARGS)
+{
+ return rand_array_internal(fcinfo, INT8OID);
+}
+
+PG_FUNCTION_INFO_V1(rand_array_numeric);
+Datum
+rand_array_numeric(PG_FUNCTION_ARGS)
+{
+ return rand_array_internal(fcinfo, NUMERICOID);
+}
+
/*
* crosstab - create a crosstab of rowids and values columns from a
* SQL statement returning one rowid column, one category column,
diff --git a/contrib/tablefunc/tablefunc.control b/contrib/tablefunc/tablefunc.control
index 7b25d16170..9cc6222a4f 100644
--- a/contrib/tablefunc/tablefunc.control
+++ b/contrib/tablefunc/tablefunc.control
@@ -1,6 +1,6 @@
# tablefunc extension
comment = 'functions that manipulate whole tables, including crosstab'
-default_version = '1.0'
+default_version = '1.1'
module_pathname = '$libdir/tablefunc'
relocatable = true
trusted = true
diff --git a/doc/src/sgml/tablefunc.sgml b/doc/src/sgml/tablefunc.sgml
index e10fe7009d..a06ba10e20 100644
--- a/doc/src/sgml/tablefunc.sgml
+++ b/doc/src/sgml/tablefunc.sgml
@@ -53,6 +53,21 @@
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <function>rand_array</function> ( <parameter>numvals</parameter> <type>integer</type>, <parameter>minlen</parameter> <type>integer</type>, <parameter>maxlen</parameter> <type>integer</type>, <parameter>minval</parameter> <type><replaceable>numeric_type</replaceable></type>, <parameter>maxval</parameter> <type><replaceable>numeric_type</replaceable></type> )
+ <returnvalue>setof <replaceable>numeric_type[]</replaceable></returnvalue>
+ </para>
+ <para>
+ Produces a set of random-length <replaceable>numeric_type[]</replaceable>
+ arrays of uniformly distributed random values, using the
+ <function>random()</function> functions listed in
+ <xref linkend="functions-math-random-table"/>; available for
+ <type>integer</type>, <type>bigint</type>, and <type>numeric</type>.
+ </para>
+ </entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<function>crosstab</function> ( <parameter>sql</parameter> <type>text</type> )
@@ -166,6 +181,57 @@ test=# SELECT * FROM normal_rand(1000, 5, 3);
</screen>
</sect3>
+ <sect3 id="tablefunc-functions-rand-array">
+ <title><function>rand_array</function></title>
+
+ <indexterm>
+ <primary>rand_array</primary>
+ </indexterm>
+
+<synopsis>
+rand_array(int numvals, int minlen, int maxlen, int minval, int maxval) returns setof int[]
+rand_array(int numvals, int minlen, int maxlen, bigint minval, bigint maxval) returns setof bigint[]
+rand_array(int numvals, int minlen, int maxlen, numeric minval, numeric maxval) returns setof numeric[]
+</synopsis>
+
+ <para>
+ <function>rand_array</function> produces a set of random-length arrays of
+ uniformly distributed random values.
+ </para>
+
+ <para>
+ <parameter>numvals</parameter> is the number of arrays to be returned
+ by the function. <parameter>minlen</parameter> and
+ <parameter>maxlen</parameter> are the minumum and maximum lengths of each
+ array, and <parameter>minval</parameter> and <parameter>maxval</parameter>
+ are the minumum and maximum values of each array element.
+ </para>
+
+ <para>
+ For example, this call requests 1000 arrays with lengths between 0 and 4
+ and elements of type <type>numeric</type> with values between
+ <literal>-1.0</literal> and <literal>1.0</literal>:
+ </para>
+
+<screen>
+test=# SELECT * FROM rand_array(1000, 0, 4, -1.0, 1.0);
+ rand_array
+-----------------------
+ {1.0,-1.0,-0.7}
+ {1.0}
+ {-0.6,-0.7}
+ {-0.9,-0.9,-0.6,0.5}
+ {}
+ .
+ .
+ .
+ {0.0,-0.8}
+ {}
+ {-0.9,-0.9}
+(1000 rows)
+</screen>
+ </sect3>
+
<sect3 id="tablefunc-functions-crosstab-text">
<title><function>crosstab(text)</function></title>
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index a715e7e0b8..4255aede2b 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -3442,6 +3442,7 @@ construct_array_builtin(Datum *elems, int nelems, Oid elmtype)
break;
case TEXTOID:
+ case NUMERICOID:
elmlen = -1;
elmbyval = false;
elmalign = TYPALIGN_INT;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 171a7dd5d2..cf1d1757d4 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3834,6 +3834,7 @@ qsort_comparator
query_pathkeys_callback
radius_attribute
radius_packet
+rand_array_fctx
rangeTableEntry_used_context
rank_context
rbt_allocfunc
--
2.43.0
On Fri, 01 Nov 2024 at 09:33, Dean Rasheed <dean.a.rasheed@gmail.com> wrote:
On Wed, 16 Oct 2024 at 08:43, Andy Fan <zhihuifan1213@163.com> wrote:
Thanks for the detailed feedback! Here is the rebased version.
I took another look at this and I think it's in reasonable shape.
I'm attaching an update, rebasing it on top of 9be4e5d293.
Also it was missing a required update to the meson.build file --
that's the immediate cause of the other cfbot failures.The rest is just cosmetic tidying up, fixing indentation, tweaking
comments, and the like. I also hacked on the docs a bit -- the
synopsis only listed one of the new function signatures for some
reason. After fixing that, I think it's sufficient to just list one
usage example.
LGTM expect there is a warning when applying the patch.
Applying: tablefunc: Add rand_array() functions.
.git/rebase-apply/patch:475: trailing whitespace.
rand_array
warning: 1 line adds whitespace errors.
The other looks good to me.
--
Regrads,
Japin Li
Dean Rasheed <dean.a.rasheed@gmail.com> writes:
On Wed, 16 Oct 2024 at 08:43, Andy Fan <zhihuifan1213@163.com> wrote:
Thanks for the detailed feedback! Here is the rebased version.
I took another look at this and I think it's in reasonable shape.
I'm attaching an update, rebasing it on top of 9be4e5d293.
Thank you Dean.
Also it was missing a required update to the meson.build file --
that's the immediate cause of the other cfbot failures.
OK, great to know this one, I should have paid more attention to cfbot.
The rest is just cosmetic tidying up, fixing indentation, tweaking
comments, and the like. I also hacked on the docs a bit -- the
synopsis only listed one of the new function signatures for some
reason. After fixing that, I think it's sufficient to just list one
usage example.
That looks better now, more concise and expressive sentences.
--
Best Regards
Andy Fan
Hi everyone,
Thanks for the detailed feedback! Here is the rebased version.
I took another look at this and I think it's in reasonable shape.
I'm attaching an update, rebasing it on top of 9be4e5d293.
Thank you Dean.
Also it was missing a required update to the meson.build file --
that's the immediate cause of the other cfbot failures.OK, great to know this one, I should have paid more attention to cfbot.
The rest is just cosmetic tidying up, fixing indentation, tweaking
comments, and the like. I also hacked on the docs a bit -- the
synopsis only listed one of the new function signatures for some
reason. After fixing that, I think it's sufficient to just list one
usage example.That looks better now, more concise and expressive sentences.
I played with patch v3. All in all it seems to be in good shape.
I wonder though whether tablefunc extension is the right place for the
function. To me it seems to be as useful as array_shuffle().
Personally I would name the function array_rand() in order to be
consistent with the rest of array_* functions [1]https://www.postgresql.org/docs/current/functions-array.html.
I would also replace `minlen` and `maxlen` arguments with a single
`len` argument because the same result (although more slowly) can be
achieved like this:
SELECT trim_array(arr, random(0,3)) FROM rand_array(10, 3, 3, 50::int,
80::int) as arr;
This could be just a bike-shedding though. Does anyone feel necessary
to second any of these nitpicks?
[1]: https://www.postgresql.org/docs/current/functions-array.html
--
Best regards,
Aleksander Alekseev
Hi,
I played with patch v3. All in all it seems to be in good shape.
I wonder though whether tablefunc extension is the right place for the
function. To me it seems to be as useful as array_shuffle().Personally I would name the function array_rand() in order to be
consistent with the rest of array_* functions [1].I would also replace `minlen` and `maxlen` arguments with a single
`len` argument because the same result (although more slowly) can be
achieved like this:SELECT trim_array(arr, random(0,3)) FROM rand_array(10, 3, 3, 50::int,
80::int) as arr;This could be just a bike-shedding though. Does anyone feel necessary
to second any of these nitpicks?[1]: https://www.postgresql.org/docs/current/functions-array.html
On second thought, is there really much value in returning a setof?
Any reason not to have an interface as simple and straightforward as
this:
```
=# SELECT array_random(1, 10, random(0, 3)) FROM generate_series( ... )
{5}
{1, 3, 8}
{7, 6}
...
```
? Or maybe I'm missing something?
--
Best regards,
Aleksander Alekseev
On Mon, 4 Nov 2024 at 14:46, Aleksander Alekseev
<aleksander@timescale.com> wrote:
Any reason not to have an interface as simple and straightforward as
this:=# SELECT array_random(1, 10, random(0, 3)) FROM generate_series( ... )
{5}
{1, 3, 8}
{7, 6}
...
Yeah, that looks like a neater API.
Something that bothers me somewhat is that it's completely trivial for
the user to write such a function for themselves, so is it really
useful enough to include in core?
The other question is whether it's an array function or a random
function. I.e., should it be listed in "Table 9.55. Array Functions",
in which case the name array_random() makes sense, or should it be
listed in "Table 9.6. Random Functions", in which case it should
probably be called random_array(). I think the latter makes more
sense, since it's a function that generates random values, more
similar to the random(min, max) functions. Also I think it's more
useful if it shares the same PRNG, controlled by setseed(), and it
makes sense to group all such functions together.
Regards,
Dean
Hi Dean,
Thanks for your input.
Any reason not to have an interface as simple and straightforward as
this:=# SELECT array_random(1, 10, random(0, 3)) FROM generate_series( ... )
{5}
{1, 3, 8}
{7, 6}
...Yeah, that looks like a neater API.
Something that bothers me somewhat is that it's completely trivial for
the user to write such a function for themselves, so is it really
useful enough to include in core?
I think it would be useful. Many users don't bother writing C
extensions for tasks like this. So at least our implementation is
going to be faster.
The other question is whether it's an array function or a random
function. I.e., should it be listed in "Table 9.55. Array Functions",
in which case the name array_random() makes sense, or should it be
listed in "Table 9.6. Random Functions", in which case it should
probably be called random_array(). I think the latter makes more
sense, since it's a function that generates random values, more
similar to the random(min, max) functions. Also I think it's more
useful if it shares the same PRNG, controlled by setseed(), and it
makes sense to group all such functions together.
Good point. Personally I don't have a strong opinion on whether
random_array() or array_random() is preferable, for me either option
is OK. Perhaps we should cross-reference the function between two
sections of the documentation to make it easier to find.
--
Best regards,
Aleksander Alekseev
On Tue, 5 Nov 2024 at 15:23, Aleksander Alekseev
<aleksander@timescale.com> wrote:
=# SELECT array_random(1, 10, random(0, 3)) FROM generate_series( ... )
{5}
{1, 3, 8}
{7, 6}
...Yeah, that looks like a neater API.
Something that bothers me somewhat is that it's completely trivial for
the user to write such a function for themselves, so is it really
useful enough to include in core?I think it would be useful. Many users don't bother writing C
extensions for tasks like this. So at least our implementation is
going to be faster.
If we are going to add such a function to core, then I think we should
make it consistent and at least as flexible as the other array
functions, and support multi-dimensional arrays with optional
non-default lower-bounds, like array_fill(). I.e., something like:
random_array(min int, max int, dims int[] [, lbounds int[]]) -> int[]
Returns an array filled with random values in the range min <= x <= max,
having dimensions of the lengths specified by dims. The optional lbounds
argument supplies lower-bound values for each dimension (which default
to all 1).
Regards,
Dean
Hi,
If we are going to add such a function to core, then I think we should
make it consistent and at least as flexible as the other array
functions, and support multi-dimensional arrays with optional
non-default lower-bounds, like array_fill(). I.e., something like:random_array(min int, max int, dims int[] [, lbounds int[]]) -> int[]
Returns an array filled with random values in the range min <= x <= max,
having dimensions of the lengths specified by dims. The optional lbounds
argument supplies lower-bound values for each dimension (which default
to all 1).
FWIW we have several array_* functions that deal only with the first
dimension of an array: array_shuffle(), array_sample() the recently
added array_reverse() [1]https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=49d6c7d8daba, the currently discussed array_sort() [2]https://commitfest.postgresql.org/50/5277/.
I suggest implementing the function in several steps. First implement
random_array(min, max, len). It's straightforward and IMO will provide
the most value to the majority of the users.Then we can either add an
optional argument or a random_array_multidim() function. This can be
implemented and discussed as a separate patch. This approach would be
convenient for the author and the reviewers and also will allow us to
deliver value to the users sooner.
[1]: https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=49d6c7d8daba
[2]: https://commitfest.postgresql.org/50/5277/
--
Best regards,
Aleksander Alekseev
Hi,
Looks I miss some interesting dicussions in the recent days, the
pretty neat API random_array, random_array or array_random (I prefer the
random_array because of the setseed stuff as Dean said). These
dicussions absoluatly enrichs my API / decoument design experience.
I'm still not sure if it should be put into core, I agree that
random_array/array_random should be put into core if we go with this
way. It looks to me that the cost of putting them into core is just
it takes more OIDs in initdb, hence making catalog a bit of bigger, not
sure the real matter of it.
As for if we should support random_array_multidim at the first step, as
a developer, I would like to try dim sutff since I have little
experience on this area, It is possible that this area bring some
other interesting experience for me as the things happen to me recently.
I suggest implementing the function in several steps. First implement
random_array(min, max, len). It's straightforward and IMO will provide
the most value to the majority of the users.Then we can either add an
optional argument or a random_array_multidim() function.
This may doubles the OID expence, every invariants needs 3 OIDs(int, bigint,
numeric).
This can be
implemented and discussed as a separate patch. This approach would be
convenient for the author and the reviewers and also will allow us to
deliver value to the users sooner.
Appricated for your suggestion like this, Aleksander, I can understand
your kindly intention!
If no new ideas, I will defer to Dean's final decision for efficient
purpose.
--
Best Regards
Andy Fan
On Wed, Nov 6, 2024 at 12:20 AM Dean Rasheed <dean.a.rasheed@gmail.com> wrote:
On Tue, 5 Nov 2024 at 15:23, Aleksander Alekseev
<aleksander@timescale.com> wrote:=# SELECT array_random(1, 10, random(0, 3)) FROM generate_series( ... )
{5}
{1, 3, 8}
{7, 6}
...Yeah, that looks like a neater API.
Something that bothers me somewhat is that it's completely trivial for
the user to write such a function for themselves, so is it really
useful enough to include in core?I think it would be useful. Many users don't bother writing C
extensions for tasks like this. So at least our implementation is
going to be faster.
it's fine to add such a function, IMHO.
we already have array_fill, array_fill fill the same value repeatedly,
here we can fill an array with random value. So this is better than array_fill.
The most trivial query I can think of is
select array_agg((array[random(1, 10), random(1,10), random(1, 10)]));
but this can only be up to two dimensional.
Users wishing to write such functions with several dimensions can be tricky.
If we are going to add such a function to core, then I think we should
make it consistent and at least as flexible as the other array
functions, and support multi-dimensional arrays with optional
non-default lower-bounds, like array_fill(). I.e., something like:random_array(min int, max int, dims int[] [, lbounds int[]]) -> int[]
Returns an array filled with random values in the range min <= x <= max,
having dimensions of the lengths specified by dims. The optional lbounds
argument supplies lower-bound values for each dimension (which default
to all 1).
please try the attached.
instead of
random_array(min int, max int, dims int[] [, lbounds int[]]) -> int[]
i choose
array_random(min int, max int, dims int[] [, lbounds int[]]) -> int[]
I placed it in src/backend/utils/adt/arrayfuncs.c, naming it as array_random
to align with the surrounding function name convention.
it can be changed.
Another issue is
we can not use function signature as array_random(anyelement,
anyelement, int[] [, int[]])
because currently, we cannot resolve type conflict for cases like
array_random(1, 2::bigint).
In this case, the first argument should be promoted to bigint.
For example:
create or replace function polyf(x anyelement, y anyelement) returns
anyelement as $$
select x + 1
$$ language sql;
select polyf(1, 2::bigint);
ERROR: function polyf(integer, bigint) does not exist
select polyf(1::bigint, 2);
ERROR: function polyf(bigint, integer) does not exist
so i invented 3 functions, similar to what we did with random().
now it looks like:
\df array_random
List of functions
Schema | Name | Result data type |
Argument data types | Type
------------+--------------+------------------+-------------------------------------------------------------------------------------+------
pg_catalog | array_random | bigint[] | min bigint, max
bigint, dims integer[], lbounds integer[] DEFAULT NULL::integer[] |
func
pg_catalog | array_random | integer[] | min integer, max
integer, dims integer[], lbounds integer[] DEFAULT NULL::integer[] |
func
pg_catalog | array_random | numeric[] | min numeric, max
numeric, dims integer[], lbounds integer[] DEFAULT NULL::integer[] |
func
doc is not there yet.
Attachments:
v1-0001-array_random.patchtext/x-patch; charset=US-ASCII; name=v1-0001-array_random.patchDownload
From 5c1e6a3e5f4764935c60d05e116692fdea422b5a Mon Sep 17 00:00:00 2001
From: jian he <jian.universality@gmail.com>
Date: Mon, 23 Jun 2025 13:38:33 +0800
Subject: [PATCH v1 1/1] array_random
we can not use function signature as array_random(anyelement, anyelement, int[] [, int[]])
because currently, we cannot resolve the conflict for array_random(1, 2::bigint). In
this case, the first argument should be promoted to bigint.
For example:
create or replace function polyf(x anyelement, y anyelement) returns anyelement as $$
select x + 1
$$ language sql;
select polyf(1, 2::bigint);
ERROR: function polyf(integer, bigint) does not exist
select polyf(1::bigint, 2);
ERROR: function polyf(bigint, integer) does not exist
So, we define three separate functions for array_random, similar to the approach
used for the random() function.
now it looks like:
\df array_random
List of functions
Schema | Name | Result data type | Argument data types | Type
------------+--------------+------------------+-------------------------------------------------------------------------------------+------
pg_catalog | array_random | bigint[] | min bigint, max bigint, dims integer[], lbounds integer[] DEFAULT NULL::integer[] | func
pg_catalog | array_random | integer[] | min integer, max integer, dims integer[], lbounds integer[] DEFAULT NULL::integer[] | func
pg_catalog | array_random | numeric[] | min numeric, max numeric, dims integer[], lbounds integer[] DEFAULT NULL::integer[] | func
(3 rows)
doc is missing.
---
src/backend/catalog/system_functions.sql | 21 ++
src/backend/utils/adt/arrayfuncs.c | 255 +++++++++++++++++++++++
src/include/catalog/pg_proc.dat | 12 ++
src/test/regress/expected/arrays.out | 85 ++++++++
src/test/regress/sql/arrays.sql | 26 +++
5 files changed, 399 insertions(+)
diff --git a/src/backend/catalog/system_functions.sql b/src/backend/catalog/system_functions.sql
index 566f308e443..b3819a0cbdd 100644
--- a/src/backend/catalog/system_functions.sql
+++ b/src/backend/catalog/system_functions.sql
@@ -73,6 +73,27 @@ CREATE OR REPLACE FUNCTION
VOLATILE PARALLEL RESTRICTED STRICT COST 1
AS 'drandom_normal';
+CREATE OR REPLACE FUNCTION
+ array_random(min integer, max integer, dims integer[], lbounds integer[] DEFAULT NULL)
+ RETURNS integer[]
+ LANGUAGE internal
+ VOLATILE PARALLEL RESTRICTED COST 1
+AS 'int4array_random';
+
+CREATE OR REPLACE FUNCTION
+ array_random (min bigint, max bigint, dims integer[], lbounds integer[] DEFAULT NULL)
+ RETURNS bigint[]
+ LANGUAGE internal
+ VOLATILE PARALLEL RESTRICTED COST 1
+AS 'int8array_random';
+
+CREATE OR REPLACE FUNCTION
+ array_random (min numeric, max numeric, dims integer[], lbounds integer[] DEFAULT NULL)
+ RETURNS numeric[]
+ LANGUAGE internal
+ VOLATILE PARALLEL RESTRICTED COST 1
+AS 'numeric_array_random';
+
CREATE OR REPLACE FUNCTION log(numeric)
RETURNS numeric
LANGUAGE sql
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index c8f53c6fbe7..1924c96388f 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -166,6 +166,7 @@ static int width_bucket_array_variable(Datum operand,
Oid collation,
TypeCacheEntry *typentry);
+static Datum array_random_internal(FunctionCallInfo fcinfo, Oid elmtype);
/*
* array_in :
@@ -6064,6 +6065,260 @@ array_fill(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(result);
}
+Datum
+int4array_random(PG_FUNCTION_ARGS)
+{
+ return array_random_internal(fcinfo, INT4OID);
+}
+
+Datum
+int8array_random(PG_FUNCTION_ARGS)
+{
+ return array_random_internal(fcinfo, INT8OID);
+}
+
+Datum
+numeric_array_random(PG_FUNCTION_ARGS)
+{
+ return array_random_internal(fcinfo, NUMERICOID);
+}
+
+/*
+ * array_random_internal
+ * Create and populate array random value with default lower bounds.
+ *
+ * main internal function for
+ * array_random(min int4, max int4, dims int[] [, lbounds int[]]) -> int[]
+ * array_random(min int8, max int8, dims int[] [, lbounds int[]]) -> int8[]
+ * array_random(min numeric, max numeric, dims int[] [, lbounds int[]]) -> numeric[]
+ */
+static Datum
+array_random_internal(FunctionCallInfo fcinfo, Oid elmtype)
+{
+ ArrayType *result;
+ ArrayType *dims;
+ ArrayType *lbs;
+ int *dimv;
+ int *lbsv;
+ int ndims;
+ int nitems;
+ int deflbs[MAXDIM];
+ int16 elmlen;
+ bool elmbyval;
+ char elmalign;
+ Datum *values;
+ bool *nulls;
+ Oid random_fn_oid = InvalidOid;
+
+ values = (Datum *) palloc(2 * sizeof(Datum));
+ nulls = (bool *) palloc(2 * sizeof(bool));
+ if (!PG_ARGISNULL(0))
+ {
+ values[0] = PG_GETARG_DATUM(0);
+ nulls[0] = false;
+ }
+ else
+ {
+ values[0] = 0;
+ nulls[0] = true;
+ }
+
+ if (!PG_ARGISNULL(1))
+ {
+ values[1] = PG_GETARG_DATUM(1);
+ nulls[1] = false;
+ }
+ else
+ {
+ values[1] = 0;
+ nulls[1] = true;
+ }
+
+ if (PG_ARGISNULL(2))
+ ereport(ERROR,
+ errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("dimension array cannot be null"));
+
+ dims = PG_GETARG_ARRAYTYPE_P(2);
+ if (ARR_NDIM(dims) > 1)
+ ereport(ERROR,
+ errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("wrong number of array subscripts"),
+ errdetail("Dimension array must be one dimensional."));
+
+ if (array_contains_nulls(dims))
+ ereport(ERROR,
+ errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("dimension values cannot be null"));
+
+ dimv = (int *) ARR_DATA_PTR(dims);
+ ndims = (ARR_NDIM(dims) > 0) ? ARR_DIMS(dims)[0] : 0;
+
+ if (ndims < 0) /* we do allow zero-dimension arrays */
+ ereport(ERROR,
+ errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid number of dimensions: %d", ndims));
+ if (ndims > MAXDIM)
+ ereport(ERROR,
+ errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
+ ndims, MAXDIM));
+
+ if (!PG_ARGISNULL(3))
+ {
+ lbs = PG_GETARG_ARRAYTYPE_P(3);
+
+ if (ARR_NDIM(lbs) > 1)
+ ereport(ERROR,
+ errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("wrong number of array subscripts"),
+ errdetail("Low bound array must be one dimensional."));
+
+ if (array_contains_nulls(lbs))
+ ereport(ERROR,
+ errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("Low bound array can not contain null values"));
+
+ if (ndims != ((ARR_NDIM(lbs) > 0) ? ARR_DIMS(lbs)[0] : 0))
+ ereport(ERROR,
+ errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("wrong number of array subscripts"),
+ errdetail("Low bound array has different size than dimensions array."));
+
+ lbsv = (int *) ARR_DATA_PTR(lbs);
+ }
+ else
+ {
+ int i;
+
+ for (i = 0; i < MAXDIM; i++)
+ deflbs[i] = 1;
+
+ lbsv = deflbs;
+ }
+
+ /* random function for generating each array element */
+ switch (elmtype)
+ {
+ case INT4OID:
+ random_fn_oid = F_RANDOM_INT4_INT4;
+ break;
+ case INT8OID:
+ random_fn_oid = F_RANDOM_INT8_INT8;
+ break;
+ case NUMERICOID:
+ random_fn_oid = F_RANDOM_NUMERIC_NUMERIC;
+ break;
+ default:
+ /* unlikely to happen */
+ elog(ERROR, "unsupported type %u for array_random function", elmtype);
+ break;
+ }
+
+ if (get_fn_expr_argtype(fcinfo->flinfo, 0) != elmtype)
+ elog(ERROR, "expected input data type as %u", elmtype);
+
+ /* This checks for overflow of the array dimensions */
+ nitems = ArrayGetNItems(ndims, dimv);
+ ArrayCheckBounds(ndims, dimv, lbsv);
+
+ /* fast track for empty array */
+ if (nitems <= 0)
+ {
+ result = construct_empty_array(elmtype);
+ PG_RETURN_ARRAYTYPE_P(result);
+ }
+
+ get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign);
+
+ /*
+ * For each array element call random(minval, maxval). minval is a type of
+ * elmtype. We also check array element size.
+ */
+ if (!nulls[0] && !nulls[1])
+ {
+ int i;
+ int nbytes = 0;
+ int totbytes = 0;
+ Datum *out_datums;
+ bool *out_nulls;
+ FmgrInfo *random_val_flinfo;
+ FunctionCallInfo random_val_fcinfo;
+
+ random_val_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo));
+ fmgr_info(random_fn_oid, random_val_flinfo);
+
+ random_val_fcinfo = (FunctionCallInfo) palloc0(SizeForFunctionCallInfo(2));
+ InitFunctionCallInfoData(*random_val_fcinfo, random_val_flinfo, 2,
+ InvalidOid, NULL, NULL);
+
+ random_val_fcinfo->args[0].value = values[0];
+ random_val_fcinfo->args[0].isnull = false;
+ random_val_fcinfo->args[1].value = values[1];
+ random_val_fcinfo->args[1].isnull = false;
+
+ out_datums = palloc(sizeof(Datum) * nitems);
+ out_nulls = palloc0(sizeof(bool) * nitems);
+
+ for (i = 0; i < nitems; i++)
+ {
+ out_datums[i] = FunctionCallInvoke(random_val_fcinfo);
+ out_nulls[i] = false;
+
+ /* make sure data is not toasted */
+ if (elmlen == -1)
+ out_datums[i] = PointerGetDatum(PG_DETOAST_DATUM(out_datums[i]));
+
+ /*is this correct? */
+ if (i == 0)
+ {
+ nbytes = att_addlength_datum(0, elmlen, out_datums[i]);
+ nbytes = att_align_nominal(nbytes, elmalign);
+ Assert(nbytes > 0);
+
+ totbytes = nbytes * nitems;
+
+ /* check for overflow of multiplication or total request */
+ if (totbytes / nbytes != nitems ||
+ !AllocSizeIsValid(totbytes))
+ ereport(ERROR,
+ errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("array size exceeds the maximum allowed (%d)",
+ (int) MaxAllocSize));
+ }
+ }
+
+ result = construct_md_array(out_datums,
+ out_nulls,
+ ndims,
+ dimv,
+ lbsv,
+ elmtype,
+ elmlen,
+ elmbyval,
+ elmalign);
+ }
+ else
+ {
+ /*
+ * if array_random specified lower bound or upper bound is null, then
+ * return null we can use create_array_envelope do the job
+ */
+ int nbytes;
+ int dataoffset;
+
+ dataoffset = ARR_OVERHEAD_WITHNULLS(ndims, nitems);
+ nbytes = dataoffset;
+
+ result = create_array_envelope(ndims, dimv, lbsv, nbytes,
+ elmtype, dataoffset);
+
+ /* create_array_envelope already zeroed the bitmap, so we're done */
+ }
+
+ PG_RETURN_ARRAYTYPE_P(result);
+}
+
static ArrayType *
create_array_envelope(int ndims, int *dimv, int *lbsv, int nbytes,
Oid elmtype, int dataoffset)
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index d3d28a263fa..eb2dcdb3db5 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1712,6 +1712,18 @@
proname => 'array_fill', proisstrict => 'f', prorettype => 'anyarray',
proargtypes => 'anyelement _int4 _int4',
prosrc => 'array_fill_with_lower_bounds' },
+{ oid => '6347', descr => 'array constructor with random integer element value',
+ proname => 'array_random', provolatile => 'v', proisstrict => 'f',
+ prorettype => '_int4', proargtypes => 'int4 int4 _int4 _int4',
+ proargnames => '{min,max,dims,lbounds}', prosrc => 'int4array_random' },
+{ oid => '6348', descr => 'array constructor with random bigint element value',
+ proname => 'array_random', provolatile => 'v', proisstrict => 'f',
+ prorettype => '_int8', proargtypes => 'int8 int8 _int4 _int4',
+ proargnames => '{min,max,dims,lbounds}', prosrc => 'int8array_random' },
+{ oid => '6349', descr => 'array constructor with random numeric element value',
+ proname => 'array_random', provolatile => 'v', proisstrict => 'f',
+ prorettype => '_numeric', proargtypes => 'numeric numeric _int4 _int4',
+ proargnames => '{min,max,dims,lbounds}', prosrc => 'numeric_array_random' },
{ oid => '2331', descr => 'expand array to set of rows',
proname => 'unnest', prorows => '100', prosupport => 'array_unnest_support',
proretset => 't', prorettype => 'anyelement', proargtypes => 'anyarray',
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index b815473f414..172e04fe1fe 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -3002,3 +3002,88 @@ SELECT array_sort(a [-1:0][20:20]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{
{{1},{10}}
(1 row)
+--array_random error case
+SELECT array_random(1, 10, '{134217728}');
+ERROR: array size exceeds the maximum allowed (134217727)
+SELECT array_random(2, 1, '{2}'::int[]);
+ERROR: lower bound must be less than or equal to upper bound
+SELECT array_random(2, 1, '{-1}'::int[]);
+ERROR: array size exceeds the maximum allowed (134217727)
+SELECT array_random(NULL, 1, NULL);
+ERROR: dimension array cannot be null
+SELECT array_random(1, 2, '{NULL}'::int[]);
+ERROR: dimension values cannot be null
+SELECT array_random(1, 2, '{{1},{1}}'::int[]);
+ERROR: wrong number of array subscripts
+DETAIL: Dimension array must be one dimensional.
+SELECT array_random('-infinity', 2.1, '{2,1}'::int[]);
+ERROR: lower bound cannot be infinity
+SELECT array_random('NaN', 2.1, '{2,1}'::int[]);
+ERROR: lower bound cannot be NaN
+SELECT array_random(1::numeric, 'infinity', '{2,1}'::int[]);
+ERROR: upper bound cannot be infinity
+SELECT array_random(1::numeric, 'NaN', '{2,1}'::int[]);
+ERROR: upper bound cannot be NaN
+SELECT array_random(1, 10, '{2}'::int[],'{NULL}');
+ERROR: Low bound array can not contain null values
+SELECT array_random(1, 10, '{2}'::int[],'{1,2}');
+ERROR: wrong number of array subscripts
+DETAIL: Low bound array has different size than dimensions array.
+SELECT array_random(1, 10, '{2}'::int[],'{{1},{2}}');
+ERROR: wrong number of array subscripts
+DETAIL: Low bound array must be one dimensional.
+--array_random ok case, roughly copied from random.sql
+SELECT setseed(0.5);
+ setseed
+---------
+
+(1 row)
+
+SELECT array_random(1, 10::int8, '{2}'::int[]);
+ array_random
+--------------
+ {3,3}
+(1 row)
+
+SELECT array_random(1, 6, array[2,5], array[2,4]);
+ array_random
+--------------------------------------
+ [2:3][4:8]={{6,2,2,5,4},{4,5,6,4,6}}
+(1 row)
+
+SELECT array_random(-2147483648, 2147483647, array[5,2], '{5,2}');
+ array_random
+---------------------------------------------------------------------------------------------------------------------------------------
+ [5:9][2:3]={{358987948,1720494942},{1522989794,-1952561092},{-562516644,-1082313839},{1917063748,1470913398},{-1663900214,383076569}}
+(1 row)
+
+SELECT array_random(-9223372036854775808, 9223372036854775807, '{5,2}', '{5,2}');
+ array_random
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ [5:9][2:3]={{8946456079150312946,-5017870099771078560},{2119550555182341436,4426024215363564415},{108422942355035228,8010643357635886140},{7966634679525141473,-7256609373765379790},{1904816578321642612,2378596988253719677}}
+(1 row)
+
+\gdesc
+ Column | Type
+--------------+----------
+ array_random | bigint[]
+(1 row)
+
+SELECT array_random(-1e30, -1e30, '{3,2}');
+ array_random
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ {{-1000000000000000000000000000000,-1000000000000000000000000000000},{-1000000000000000000000000000000,-1000000000000000000000000000000},{-1000000000000000000000000000000,-1000000000000000000000000000000}}
+(1 row)
+
+SELECT array_random(-0.45, 0.45, '{2,2,3}', '{5,2,3}');
+ array_random
+--------------------------------------------------------------------------------------------------
+ [5:6][2:3][3:5]={{{-0.31,0.17,-0.16},{-0.25,-0.33,-0.02}},{{0.26,-0.31,-0.16},{0.08,0.23,0.13}}}
+(1 row)
+
+SELECT array_random(0, 1 - 1e-30, '{2, 5}');
+ array_random
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ {{0.068990271742366814671825526682,0.865328382850933408408243093209,0.535285120767345535159068774920,0.550766856332365486462263124933,0.327167727807358961934200155904},{0.985749287984797224624803826994,0.475761851397322538460984536852,0.793479002749253814546091362589,0.164860958495312801172978594373,0.519707802923826482493035607188}}
+(1 row)
+
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 47d62c1d38d..8bcc5424db8 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -892,3 +892,29 @@ SELECT array_sort(a) FROM (VALUES ('[-1:0]={7,1}'::int[])) v(a);
SELECT array_sort(a) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
SELECT array_sort(a [-1:0]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
SELECT array_sort(a [-1:0][20:20]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+
+--array_random error case
+SELECT array_random(1, 10, '{134217728}');
+SELECT array_random(2, 1, '{2}'::int[]);
+SELECT array_random(2, 1, '{-1}'::int[]);
+SELECT array_random(NULL, 1, NULL);
+SELECT array_random(1, 2, '{NULL}'::int[]);
+SELECT array_random(1, 2, '{{1},{1}}'::int[]);
+SELECT array_random('-infinity', 2.1, '{2,1}'::int[]);
+SELECT array_random('NaN', 2.1, '{2,1}'::int[]);
+SELECT array_random(1::numeric, 'infinity', '{2,1}'::int[]);
+SELECT array_random(1::numeric, 'NaN', '{2,1}'::int[]);
+SELECT array_random(1, 10, '{2}'::int[],'{NULL}');
+SELECT array_random(1, 10, '{2}'::int[],'{1,2}');
+SELECT array_random(1, 10, '{2}'::int[],'{{1},{2}}');
+
+--array_random ok case, roughly copied from random.sql
+SELECT setseed(0.5);
+SELECT array_random(1, 10::int8, '{2}'::int[]);
+SELECT array_random(1, 6, array[2,5], array[2,4]);
+SELECT array_random(-2147483648, 2147483647, array[5,2], '{5,2}');
+SELECT array_random(-9223372036854775808, 9223372036854775807, '{5,2}', '{5,2}');
+\gdesc
+SELECT array_random(-1e30, -1e30, '{3,2}');
+SELECT array_random(-0.45, 0.45, '{2,2,3}', '{5,2,3}');
+SELECT array_random(0, 1 - 1e-30, '{2, 5}');
--
2.34.1
hi.
I don't want to disrupt this thread too much.
so I created a separate thread ([1]/messages/by-id/CACJufxF8_VzCFRHRt4OHHF74QtB8tj5Z=djsy7Y31OHKG5s1-w@mail.gmail.com) for my attached patch.
[1]: /messages/by-id/CACJufxF8_VzCFRHRt4OHHF74QtB8tj5Z=djsy7Y31OHKG5s1-w@mail.gmail.com