From 35842f788f43712f33533a65154aafc700ff6d6f Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <peter.geoghegan86@gmail.com>
Date: Wed, 29 Jul 2015 15:38:12 -0700
Subject: [PATCH 2/2] Add cursory regression tests for sorting

This is not intended to be a formal patch submission.  Tests are added
that happened to be useful during the development of the "quicksort with
spillover" patch, as the regression tests currently have precisely zero
coverage for any variety of external sort operation.  The tests are
provided as a convenience to reviewers of that patch only.

In the long run, there should be comprehensive smoke-testing of these
cases (probably not in the standard regression tests), but this patch
does not pretend to be any kind of basis for that.

The tests added have a number of obvious problems:

* They take far too long to run to be in the standard regression test
suite, and yet they're run as part of that suite.  With a little effort,
they could probably be made to run quicker with no appreciable loss of
coverage, but that didn't happen.

* They're far from comprehensive.

* The tests require about 1.5GB of disk space to run.

* They're not portable.  They might even be extremely non-portable due
to implementation differences across platform pseudo-random number
generators.

The important point is that each query tested gives consistent results.
There is no reason to think that varying work_mem settings will affect
which basic approach to sorting each query takes as compared to during
my original testing (at least assuming a 64-bit platform), which is also
important.
---
 src/test/regress/expected/sorting.out | 210 ++++++++++++++++++++++++++++++++++
 src/test/regress/parallel_schedule    |   1 +
 src/test/regress/serial_schedule      |   1 +
 src/test/regress/sql/sorting.sql      |  82 +++++++++++++
 4 files changed, 294 insertions(+)
 create mode 100644 src/test/regress/expected/sorting.out
 create mode 100644 src/test/regress/sql/sorting.sql

diff --git a/src/test/regress/expected/sorting.out b/src/test/regress/expected/sorting.out
new file mode 100644
index 0000000..3aa30f4
--- /dev/null
+++ b/src/test/regress/expected/sorting.out
@@ -0,0 +1,210 @@
+--
+-- sorting tests
+--
+--
+-- int4 test (10 million tuples, medium cardinality)
+--
+select setseed(1);
+ setseed 
+---------
+ 
+(1 row)
+
+create unlogged table int4_sort_tbl as
+  select (random() * 1000000)::int4 s, 'abcdefghijlmn'::text junk
+  from generate_series(1, 10000000);
+--
+-- int4 test (10 million tuples, high cardinality)
+--
+create unlogged table highcard_int4_sort_tbl as
+  select (random() * 100000000)::int4 s, 'abcdefghijlmn'::text junk
+  from generate_series(1, 10000000);
+--
+-- int4 test (10 million tuples, low cardinality)
+--
+create unlogged table lowcard_int4_sort_tbl as
+  select (random() * 100)::int4 s, 'abcdefghijlmn'::text junk
+  from generate_series(1, 10000000);
+-- Results should be consistent:
+set work_mem = '64MB';
+select count(distinct(s)) from int4_sort_tbl;
+ count  
+--------
+ 999949
+(1 row)
+
+select count(distinct(s)) from highcard_int4_sort_tbl;
+  count  
+---------
+ 9515397
+(1 row)
+
+select count(distinct(s)) from lowcard_int4_sort_tbl;
+ count 
+-------
+   101
+(1 row)
+
+set work_mem = '100MB';
+select count(distinct(s)) from int4_sort_tbl;
+ count  
+--------
+ 999949
+(1 row)
+
+select count(distinct(s)) from highcard_int4_sort_tbl;
+  count  
+---------
+ 9515397
+(1 row)
+
+select count(distinct(s)) from lowcard_int4_sort_tbl;
+ count 
+-------
+   101
+(1 row)
+
+set work_mem = '110MB';
+select count(distinct(s)) from int4_sort_tbl;
+ count  
+--------
+ 999949
+(1 row)
+
+select count(distinct(s)) from highcard_int4_sort_tbl;
+  count  
+---------
+ 9515397
+(1 row)
+
+select count(distinct(s)) from lowcard_int4_sort_tbl;
+ count 
+-------
+   101
+(1 row)
+
+set work_mem = '128MB';
+select count(distinct(s)) from int4_sort_tbl;
+ count  
+--------
+ 999949
+(1 row)
+
+select count(distinct(s)) from highcard_int4_sort_tbl;
+  count  
+---------
+ 9515397
+(1 row)
+
+select count(distinct(s)) from lowcard_int4_sort_tbl;
+ count 
+-------
+   101
+(1 row)
+
+set work_mem = '140MB';
+select count(distinct(s)) from int4_sort_tbl;
+ count  
+--------
+ 999949
+(1 row)
+
+select count(distinct(s)) from highcard_int4_sort_tbl;
+  count  
+---------
+ 9515397
+(1 row)
+
+select count(distinct(s)) from lowcard_int4_sort_tbl;
+ count 
+-------
+   101
+(1 row)
+
+set work_mem = '150MB';
+select count(distinct(s)) from int4_sort_tbl;
+ count  
+--------
+ 999949
+(1 row)
+
+select count(distinct(s)) from highcard_int4_sort_tbl;
+  count  
+---------
+ 9515397
+(1 row)
+
+select count(distinct(s)) from lowcard_int4_sort_tbl;
+ count 
+-------
+   101
+(1 row)
+
+-- should be in-memory:
+set work_mem = '512MB';
+select count(distinct(s)) from int4_sort_tbl;
+ count  
+--------
+ 999949
+(1 row)
+
+select count(distinct(s)) from highcard_int4_sort_tbl;
+  count  
+---------
+ 9515397
+(1 row)
+
+select count(distinct(s)) from lowcard_int4_sort_tbl;
+ count 
+-------
+   101
+(1 row)
+
+--
+-- text test (uses abbreviated keys, 10 million tuples)
+--
+select setseed(1);
+ setseed 
+---------
+ 
+(1 row)
+
+create unlogged table text_sort_tbl as
+  select (random() * 100000)::int4::text s
+  from generate_series(1, 10000000);
+-- Start with sort that results in 3-way final merge:
+set work_mem = '190MB';
+select count(distinct(s)) from text_sort_tbl;
+ count  
+--------
+ 100001
+(1 row)
+
+-- Uses optimization where it's marginal:
+set work_mem = '200MB';
+select count(distinct(s)) from text_sort_tbl;
+ count  
+--------
+ 100001
+(1 row)
+
+-- Uses optimization where it's favorable:
+set work_mem = '450MB';
+select count(distinct(s)) from text_sort_tbl;
+ count  
+--------
+ 100001
+(1 row)
+
+-- internal sort:
+set work_mem = '500MB';
+select count(distinct(s)) from text_sort_tbl;
+ count  
+--------
+ 100001
+(1 row)
+
+drop table int4_sort_tbl;
+drop table highcard_int4_sort_tbl;
+drop table lowcard_int4_sort_tbl;
+drop table text_sort_tbl;
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 4df15de..7ff656a 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -37,6 +37,7 @@ test: geometry horology regex oidjoins type_sanity opr_sanity
 # ----------
 test: insert
 test: insert_conflict
+test: sorting
 test: create_function_1
 test: create_type
 test: create_table
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index 15d74d4..ebe7de0 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -51,6 +51,7 @@ test: type_sanity
 test: opr_sanity
 test: insert
 test: insert_conflict
+test: sorting
 test: create_function_1
 test: create_type
 test: create_table
diff --git a/src/test/regress/sql/sorting.sql b/src/test/regress/sql/sorting.sql
new file mode 100644
index 0000000..75453fc
--- /dev/null
+++ b/src/test/regress/sql/sorting.sql
@@ -0,0 +1,82 @@
+--
+-- sorting tests
+--
+
+--
+-- int4 test (10 million tuples, medium cardinality)
+--
+select setseed(1);
+create unlogged table int4_sort_tbl as
+  select (random() * 1000000)::int4 s, 'abcdefghijlmn'::text junk
+  from generate_series(1, 10000000);
+
+--
+-- int4 test (10 million tuples, high cardinality)
+--
+create unlogged table highcard_int4_sort_tbl as
+  select (random() * 100000000)::int4 s, 'abcdefghijlmn'::text junk
+  from generate_series(1, 10000000);
+
+--
+-- int4 test (10 million tuples, low cardinality)
+--
+create unlogged table lowcard_int4_sort_tbl as
+  select (random() * 100)::int4 s, 'abcdefghijlmn'::text junk
+  from generate_series(1, 10000000);
+
+-- Results should be consistent:
+set work_mem = '64MB';
+select count(distinct(s)) from int4_sort_tbl;
+select count(distinct(s)) from highcard_int4_sort_tbl;
+select count(distinct(s)) from lowcard_int4_sort_tbl;
+set work_mem = '100MB';
+select count(distinct(s)) from int4_sort_tbl;
+select count(distinct(s)) from highcard_int4_sort_tbl;
+select count(distinct(s)) from lowcard_int4_sort_tbl;
+set work_mem = '110MB';
+select count(distinct(s)) from int4_sort_tbl;
+select count(distinct(s)) from highcard_int4_sort_tbl;
+select count(distinct(s)) from lowcard_int4_sort_tbl;
+set work_mem = '128MB';
+select count(distinct(s)) from int4_sort_tbl;
+select count(distinct(s)) from highcard_int4_sort_tbl;
+select count(distinct(s)) from lowcard_int4_sort_tbl;
+set work_mem = '140MB';
+select count(distinct(s)) from int4_sort_tbl;
+select count(distinct(s)) from highcard_int4_sort_tbl;
+select count(distinct(s)) from lowcard_int4_sort_tbl;
+set work_mem = '150MB';
+select count(distinct(s)) from int4_sort_tbl;
+select count(distinct(s)) from highcard_int4_sort_tbl;
+select count(distinct(s)) from lowcard_int4_sort_tbl;
+-- should be in-memory:
+set work_mem = '512MB';
+select count(distinct(s)) from int4_sort_tbl;
+select count(distinct(s)) from highcard_int4_sort_tbl;
+select count(distinct(s)) from lowcard_int4_sort_tbl;
+
+--
+-- text test (uses abbreviated keys, 10 million tuples)
+--
+select setseed(1);
+create unlogged table text_sort_tbl as
+  select (random() * 100000)::int4::text s
+  from generate_series(1, 10000000);
+
+-- Start with sort that results in 3-way final merge:
+set work_mem = '190MB';
+select count(distinct(s)) from text_sort_tbl;
+-- Uses optimization where it's marginal:
+set work_mem = '200MB';
+select count(distinct(s)) from text_sort_tbl;
+-- Uses optimization where it's favorable:
+set work_mem = '450MB';
+select count(distinct(s)) from text_sort_tbl;
+-- internal sort:
+set work_mem = '500MB';
+select count(distinct(s)) from text_sort_tbl;
+
+drop table int4_sort_tbl;
+drop table highcard_int4_sort_tbl;
+drop table lowcard_int4_sort_tbl;
+drop table text_sort_tbl;
-- 
1.9.1