From 2c6c28755416556597188c323604ec975faffa25 Mon Sep 17 00:00:00 2001
From: Martin Kalcher <martin.kalcher@aboutsource.net>
Date: Sun, 17 Jul 2022 01:53:05 +0200
Subject: [PATCH] Introduce shuffle() and sample() to intarray extension

* shuffle(arr) returns an array with the elements of the provided array
  in random order
* sample(arr, n) returns an array with n randomly-chosen elements from
  the provided array
---
 contrib/intarray/Makefile               |  4 +-
 contrib/intarray/_int.h                 |  2 +
 contrib/intarray/_int_op.c              | 35 +++++++++++++++
 contrib/intarray/_int_tool.c            | 57 +++++++++++++++++++++++++
 contrib/intarray/expected/_int.out      | 36 ++++++++++++++++
 contrib/intarray/intarray--1.5--1.6.sql | 16 +++++++
 contrib/intarray/intarray.control       |  2 +-
 contrib/intarray/sql/_int.sql           |  6 +++
 8 files changed, 155 insertions(+), 3 deletions(-)
 create mode 100644 contrib/intarray/intarray--1.5--1.6.sql

diff --git a/contrib/intarray/Makefile b/contrib/intarray/Makefile
index 3817c1669a..1a79c359ed 100644
--- a/contrib/intarray/Makefile
+++ b/contrib/intarray/Makefile
@@ -12,8 +12,8 @@ OBJS = \
 	_intbig_gist.o
 
 EXTENSION = intarray
-DATA = intarray--1.4--1.5.sql intarray--1.3--1.4.sql intarray--1.2--1.3.sql \
-	intarray--1.2.sql intarray--1.1--1.2.sql \
+DATA = intarray--1.5--1.6.sql intarray--1.4--1.5.sql intarray--1.3--1.4.sql \
+	intarray--1.2--1.3.sql intarray--1.2.sql intarray--1.1--1.2.sql \
 	intarray--1.0--1.1.sql
 PGFILEDESC = "intarray - functions and operators for arrays of integers"
 
diff --git a/contrib/intarray/_int.h b/contrib/intarray/_int.h
index 304c29525c..eaa7a09ae8 100644
--- a/contrib/intarray/_int.h
+++ b/contrib/intarray/_int.h
@@ -123,6 +123,8 @@ bool		inner_int_overlap(ArrayType *a, ArrayType *b);
 bool		inner_int_contains(ArrayType *a, ArrayType *b);
 ArrayType  *inner_int_union(ArrayType *a, ArrayType *b);
 ArrayType  *inner_int_inter(ArrayType *a, ArrayType *b);
+void		inner_int_shuffle(ArrayType *a);
+ArrayType  *inner_int_sample(ArrayType *a, int n);
 void		rt__int_size(ArrayType *a, float *size);
 void		gensign(BITVECP sign, int *a, int len, int siglen);
 
diff --git a/contrib/intarray/_int_op.c b/contrib/intarray/_int_op.c
index 5b164f6788..8c4edf6ddf 100644
--- a/contrib/intarray/_int_op.c
+++ b/contrib/intarray/_int_op.c
@@ -167,6 +167,8 @@ PG_FUNCTION_INFO_V1(sort);
 PG_FUNCTION_INFO_V1(sort_asc);
 PG_FUNCTION_INFO_V1(sort_desc);
 PG_FUNCTION_INFO_V1(uniq);
+PG_FUNCTION_INFO_V1(shuffle);
+PG_FUNCTION_INFO_V1(sample);
 PG_FUNCTION_INFO_V1(idx);
 PG_FUNCTION_INFO_V1(subarray);
 PG_FUNCTION_INFO_V1(intarray_push_elem);
@@ -255,6 +257,39 @@ uniq(PG_FUNCTION_ARGS)
 	PG_RETURN_POINTER(a);
 }
 
+Datum
+shuffle(PG_FUNCTION_ARGS)
+{
+	ArrayType  *a = PG_GETARG_ARRAYTYPE_P_COPY(0);
+
+	CHECKARRVALID(a);
+	inner_int_shuffle(a);
+	PG_RETURN_POINTER(a);
+}
+
+Datum
+sample(PG_FUNCTION_ARGS)
+{
+	ArrayType  *a = PG_GETARG_ARRAYTYPE_P_COPY(0);
+	ArrayType  *r;
+
+	int	n = PG_GETARG_INT32(1);
+
+	CHECKARRVALID(a);
+
+	if (n >= ARRNELEMS(a))
+		PG_RETURN_POINTER(a);
+
+	if (n < 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("second parameter must not be negative")));
+
+	r = inner_int_sample(a, n);
+	PG_FREE_IF_COPY(a, 0);
+	PG_RETURN_POINTER(r);
+}
+
 Datum
 idx(PG_FUNCTION_ARGS)
 {
diff --git a/contrib/intarray/_int_tool.c b/contrib/intarray/_int_tool.c
index 8ed4d63fc3..b26c910deb 100644
--- a/contrib/intarray/_int_tool.c
+++ b/contrib/intarray/_int_tool.c
@@ -179,6 +179,63 @@ inner_int_inter(ArrayType *a, ArrayType *b)
 		return resize_intArrayType(r, k);
 }
 
+void
+inner_int_shuffle(ArrayType *a)
+{
+	int *d = ARRPTR(a);
+	int n = ARRNELEMS(a);
+	int i, tmp;
+
+	/*
+	 * Shuffling is done using the Fisher-Yates shuffle algorithm:
+	 * Iterate through the array and swap the current head with a
+	 * randomly-chosen element from behind the current head (possibly
+	 * the head itself).
+	 */
+	while (n > 1)
+	{
+		i = random() % n;
+		tmp = d[i];
+		d[i] = *d;
+		*d = tmp;
+		d++;
+		n--;
+	}
+}
+
+ArrayType *
+inner_int_sample(ArrayType *a, int n)
+{
+	ArrayType *r;
+	int *da,
+		*dr;
+	int na = ARRNELEMS(a);
+
+	n = Min(na, n);
+	r = new_intArrayType(n);
+	da = ARRPTR(a),
+	dr = ARRPTR(r);
+
+	/*
+	 * Choosing samples is done using a variant of the "inside-out"
+	 * Fisher-Yates shuffle algorithm:
+	 * Iterate through the source array, move a randomly-chosen
+	 * element from behind the current head (or the head itself) to
+	 * the destination array and replace it with the current head.
+	 */
+	while (n > 0)
+	{
+		int i = random() % na;
+		*dr = da[i];
+		da[i] = *da;
+		dr++;
+		n--;
+		da++;
+		na--;
+	}
+	return r;
+}
+
 void
 rt__int_size(ArrayType *a, float *size)
 {
diff --git a/contrib/intarray/expected/_int.out b/contrib/intarray/expected/_int.out
index a09d40efa1..cffa683ffe 100644
--- a/contrib/intarray/expected/_int.out
+++ b/contrib/intarray/expected/_int.out
@@ -85,6 +85,42 @@ SELECT subarray('{1234234,-30,-30,234234,-30}',0,-1);
  {1234234,-30,-30,234234}
 (1 row)
 
+SELECT sort(shuffle('{1234234,-30,-30,234234,-30}'));
+             sort             
+------------------------------
+ {-30,-30,-30,234234,1234234}
+(1 row)
+
+SELECT shuffle('{1,2,3,4,5,6,7,8,9,10,11,12}') != shuffle('{1,2,3,4,5,6,7,8,9,10,11,12}');
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT icount(sample('{1,2,3,4,5,6,7,8,9,10,11,12}', 6));
+ icount 
+--------
+      6
+(1 row)
+
+SELECT icount(uniq(sort(sample('{1,2,3,4,5,6,7,8,9,10,11,12}', 6))));
+ icount 
+--------
+      6
+(1 row)
+
+SELECT sample('{1,2,3,4,5,6,7,8,9,10,11,12}', 6) <@ '{1,2,3,4,5,6,7,8,9,10,11,12}';
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT sample('{1,2,3,4,5,6,7,8,9,10,11,12}', 6) != sample('{1,2,3,4,5,6,7,8,9,10,11,12}', 6);
+ ?column? 
+----------
+ t
+(1 row)
+
 SELECT #'{1234234,234234}'::int[];
  ?column? 
 ----------
diff --git a/contrib/intarray/intarray--1.5--1.6.sql b/contrib/intarray/intarray--1.5--1.6.sql
new file mode 100644
index 0000000000..5d5ef8ee01
--- /dev/null
+++ b/contrib/intarray/intarray--1.5--1.6.sql
@@ -0,0 +1,16 @@
+/* contrib/intarray/intarray--1.5--1.6.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION intarray UPDATE TO '1.6'" to load this file. \quit
+
+-- Add shuffle functions
+
+CREATE FUNCTION shuffle(_int4)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION sample(_int4, int4)
+RETURNS _int4
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
diff --git a/contrib/intarray/intarray.control b/contrib/intarray/intarray.control
index c3ff753e2c..6ae8881f15 100644
--- a/contrib/intarray/intarray.control
+++ b/contrib/intarray/intarray.control
@@ -1,6 +1,6 @@
 # intarray extension
 comment = 'functions, operators, and index support for 1-D arrays of integers'
-default_version = '1.5'
+default_version = '1.6'
 module_pathname = '$libdir/_int'
 relocatable = true
 trusted = true
diff --git a/contrib/intarray/sql/_int.sql b/contrib/intarray/sql/_int.sql
index b26fc57e4d..3eb15d5c08 100644
--- a/contrib/intarray/sql/_int.sql
+++ b/contrib/intarray/sql/_int.sql
@@ -18,6 +18,12 @@ SELECT idx('{1234234,-30,-30,234234,-30}',-30);
 SELECT subarray('{1234234,-30,-30,234234,-30}',2,3);
 SELECT subarray('{1234234,-30,-30,234234,-30}',-1,1);
 SELECT subarray('{1234234,-30,-30,234234,-30}',0,-1);
+SELECT sort(shuffle('{1234234,-30,-30,234234,-30}'));
+SELECT shuffle('{1,2,3,4,5,6,7,8,9,10,11,12}') != shuffle('{1,2,3,4,5,6,7,8,9,10,11,12}');
+SELECT icount(sample('{1,2,3,4,5,6,7,8,9,10,11,12}', 6));
+SELECT icount(uniq(sort(sample('{1,2,3,4,5,6,7,8,9,10,11,12}', 6))));
+SELECT sample('{1,2,3,4,5,6,7,8,9,10,11,12}', 6) <@ '{1,2,3,4,5,6,7,8,9,10,11,12}';
+SELECT sample('{1,2,3,4,5,6,7,8,9,10,11,12}', 6) != sample('{1,2,3,4,5,6,7,8,9,10,11,12}', 6);
 
 SELECT #'{1234234,234234}'::int[];
 SELECT '{123,623,445}'::int[] + 1245;
-- 
2.37.1

