intarray sort returns wrong result

Started by Junwang Zhaoabout 1 year ago3 messages
#1Junwang Zhao
zhjwpku@gmail.com
1 attachment(s)

Hi Hackers,

While working on general array sort[1]/messages/by-id/CAEG8a3KD7ZmQpxNhfPxyc0BjTTTUXiqb56VuMgB7Muu0+yV=qQ@mail.gmail.com, I played with intarray
extension, found a bug (or at least inconsistency) when sorting
multidimensional int array:

create extension intarray;
select sort('{{1,2,3}, {2,3,4}}');

this returns {{1,2,2},{3,3,4}} instead of {{1,2,3},{2,3,4}}

I think this is misleading, if int array is only for one dimension
array, we should
error out when sorting multidimensional int array. Or we can do something like
attached POC patch to make it work with multidimensional int array.

Thoughts?

[1]: /messages/by-id/CAEG8a3KD7ZmQpxNhfPxyc0BjTTTUXiqb56VuMgB7Muu0+yV=qQ@mail.gmail.com

--
Regards
Junwang Zhao

Attachments:

v1-0001-int-array-support-multi-dim.patchapplication/octet-stream; name=v1-0001-int-array-support-multi-dim.patchDownload
From 262ca22b6efd1bd2dc2437bf2c2c83cd71096ccf Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Tue, 12 Nov 2024 00:22:00 +0000
Subject: [PATCH v1] int array support multi dim

Signed-off-by: Junwang Zhao <zhjwpku@gmail.com>
---
 contrib/intarray/_int.h      | 12 ++++++++++++
 contrib/intarray/_int_op.c   | 36 +++++++++++++++++++++++++++++++++---
 contrib/intarray/_int_tool.c | 28 ++++++++++++++++++++++++++++
 3 files changed, 73 insertions(+), 3 deletions(-)

diff --git a/contrib/intarray/_int.h b/contrib/intarray/_int.h
index 0352cbd368..2b2d6906ae 100644
--- a/contrib/intarray/_int.h
+++ b/contrib/intarray/_int.h
@@ -188,4 +188,16 @@ int			compDESC(const void *a, const void *b);
 				  (direction) ? compASC : compDESC ); \
 	} while(0)
 
+
+int			compArrayASC(const void *a, const void *b, void *arg);
+int			compArrayDESC(const void *a, const void *b, void *arg);
+
+/* sort, either ascending or descending */
+#define QSORT_ARRAY(a, direction, dim0, step) \
+	do { \
+		if (dim0 > 1) \
+			qsort_arg((void*) ARRPTR(a), dim0, sizeof(int32) * step, \
+				  (direction) ? compArrayASC : compArrayDESC, &step); \
+	} while(0)
+
 #endif							/* ___INT_H__ */
diff --git a/contrib/intarray/_int_op.c b/contrib/intarray/_int_op.c
index 5b164f6788..03f0deea84 100644
--- a/contrib/intarray/_int_op.c
+++ b/contrib/intarray/_int_op.c
@@ -199,11 +199,21 @@ sort(PG_FUNCTION_ARGS)
 	int32		dc = (dirstr) ? VARSIZE_ANY_EXHDR(dirstr) : 0;
 	char	   *d = (dirstr) ? VARDATA_ANY(dirstr) : NULL;
 	int			dir = -1;
+	int ndim,
+	nelems,
+	step;
 
 	CHECKARRVALID(a);
 	if (ARRNELEMS(a) < 2)
 		PG_RETURN_POINTER(a);
 
+	ndim = ARR_NDIM(a);
+	if (ndim < 1)
+		PG_RETURN_POINTER(a);
+
+	nelems = ARRNELEMS(a);
+	step = nelems / ARR_DIMS(a)[0];
+
 	if (dirstr == NULL || (dc == 3
 						   && (d[0] == 'A' || d[0] == 'a')
 						   && (d[1] == 'S' || d[1] == 's')
@@ -219,7 +229,7 @@ sort(PG_FUNCTION_ARGS)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("second parameter must be \"ASC\" or \"DESC\"")));
-	QSORT(a, dir);
+	QSORT_ARRAY(a, dir, ARR_DIMS(a)[0], step);
 	PG_RETURN_POINTER(a);
 }
 
@@ -227,9 +237,19 @@ Datum
 sort_asc(PG_FUNCTION_ARGS)
 {
 	ArrayType  *a = PG_GETARG_ARRAYTYPE_P_COPY(0);
+	int ndim,
+	nelems,
+	step;
 
 	CHECKARRVALID(a);
-	QSORT(a, 1);
+
+	ndim = ARR_NDIM(a);
+	if (ndim < 1)
+		PG_RETURN_POINTER(a);
+
+	nelems = ARRNELEMS(a);
+	step = nelems / ARR_DIMS(a)[0];
+	QSORT_ARRAY(a, 1, ARR_DIMS(a)[0], step);
 	PG_RETURN_POINTER(a);
 }
 
@@ -237,9 +257,19 @@ Datum
 sort_desc(PG_FUNCTION_ARGS)
 {
 	ArrayType  *a = PG_GETARG_ARRAYTYPE_P_COPY(0);
+	int ndim,
+	nelems,
+	step;
 
 	CHECKARRVALID(a);
-	QSORT(a, 0);
+
+	ndim = ARR_NDIM(a);
+	if (ndim < 1)
+		PG_RETURN_POINTER(a);
+
+	nelems = ARRNELEMS(a);
+	step = nelems / ARR_DIMS(a)[0];
+	QSORT_ARRAY(a, 0, ARR_DIMS(a)[0], step);
 	PG_RETURN_POINTER(a);
 }
 
diff --git a/contrib/intarray/_int_tool.c b/contrib/intarray/_int_tool.c
index c85280c842..cae032d051 100644
--- a/contrib/intarray/_int_tool.c
+++ b/contrib/intarray/_int_tool.c
@@ -405,3 +405,31 @@ compDESC(const void *a, const void *b)
 {
 	return pg_cmp_s32(*(const int32 *) b, *(const int32 *) a);
 }
+
+int
+compArrayASC(const void *a, const void *b, void *arg)
+{
+	int cnt = *(int*)arg;
+	for (int i = 0; i < cnt; i++) {
+		int cmp = pg_cmp_s32(((const int32 *) a)[i], ((const int32 *) b)[i]);
+		if (cmp == 0) {
+			continue;
+		}
+		return cmp;
+	}
+	return 0;
+}
+
+int
+compArrayDESC(const void *a, const void *b, void *arg)
+{
+	int cnt = *(int*)arg;
+	for (int i = 0; i < cnt; i++) {
+		int cmp = pg_cmp_s32(((const int32 *) b)[i], ((const int32 *) a)[i]);
+		if (cmp == 0) {
+			continue;
+		}
+		return cmp;
+	}
+	return 0;
+}
-- 
2.39.5

#2Tom Lane
tgl@sss.pgh.pa.us
In reply to: Junwang Zhao (#1)
Re: intarray sort returns wrong result

Junwang Zhao <zhjwpku@gmail.com> writes:

While working on general array sort[1], I played with intarray
extension, found a bug (or at least inconsistency) when sorting
multidimensional int array:

create extension intarray;
select sort('{{1,2,3}, {2,3,4}}');

this returns {{1,2,2},{3,3,4}} instead of {{1,2,3},{2,3,4}}

This is documented, isn't it?

Many of these operations are only sensible for one-dimensional
arrays. Although they will accept input arrays of more dimensions,
the data is treated as though it were a linear array in storage
order.

I don't think anyone will thank us for changing intarray's behavior
many years after the fact.

regards, tom lane

#3Junwang Zhao
zhjwpku@gmail.com
In reply to: Tom Lane (#2)
Re: intarray sort returns wrong result

On Tue, Nov 12, 2024 at 9:13 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:

Junwang Zhao <zhjwpku@gmail.com> writes:

While working on general array sort[1], I played with intarray
extension, found a bug (or at least inconsistency) when sorting
multidimensional int array:

create extension intarray;
select sort('{{1,2,3}, {2,3,4}}');

this returns {{1,2,2},{3,3,4}} instead of {{1,2,3},{2,3,4}}

This is documented, isn't it?

Many of these operations are only sensible for one-dimensional
arrays. Although they will accept input arrays of more dimensions,
the data is treated as though it were a linear array in storage
order.

I did not notice this statement, my bad 😞

I don't think anyone will thank us for changing intarray's behavior
many years after the fact.

Agreed. Sorry for the noise.

regards, tom lane

--
Regards
Junwang Zhao