commit f5e73a04684d0b70f9ce3a904db2b96a21e19f58
Author: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date:   Thu Oct 22 14:34:51 2020 +0300

    Sort benchmark stuff

diff --git a/src/test/sort/Makefile b/src/test/sort/Makefile
new file mode 100644
index 00000000000..4ec150a69c7
--- /dev/null
+++ b/src/test/sort/Makefile
@@ -0,0 +1,32 @@
+CFLAGS=-g -I`pg_config --includedir`
+
+LDFLAGS=-L`pg_config --libdir` -lpq -lm
+
+TESTDB=sorttest
+
+# For testing quicksort.
+SCALE_SMALL=1024	# 1 MB
+
+# For testing external sort, while the dataset still fits in OS cache.
+SCALE_MEDIUM=1048576	# 1 GB
+#SCALE_MEDIUM=102400	# 100 MB
+
+# Does not fit in memory.
+SCALE_LARGE=20971520	# 20 GB
+#SCALE_LARGE=1500000	# 20 GB
+
+all: generate speed correctness
+
+generate: generate.c
+
+speed: speed.c
+
+correctness: correctness.c
+
+generate_testdata:
+	dropdb --if-exists $(TESTDB)
+	createdb $(TESTDB)
+	psql $(TESTDB) -c "CREATE SCHEMA small; CREATE SCHEMA medium; CREATE SCHEMA large;"
+	(echo "set search_path=medium;"; ./generate all $(SCALE_MEDIUM)) | psql $(TESTDB)
+	(echo "set search_path=small;"; ./generate all $(SCALE_SMALL)) | psql $(TESTDB)
+	(echo "set search_path=large;"; ./generate all $(SCALE_LARGE)) | psql $(TESTDB)
diff --git a/src/test/sort/correctness.c b/src/test/sort/correctness.c
new file mode 100644
index 00000000000..b41aa2e3a42
--- /dev/null
+++ b/src/test/sort/correctness.c
@@ -0,0 +1,153 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/time.h>
+
+#include <libpq-fe.h>
+
+static PGconn *conn;
+
+static void
+execute(const char *sql)
+{
+	int			i;
+	PGresult   *res;
+
+	fprintf(stderr, "%s\n", sql);
+	
+	res = PQexec(conn, sql);
+	if (PQresultStatus(res) != PGRES_COMMAND_OK && PQresultStatus(res) != PGRES_TUPLES_OK)
+	{
+		fprintf(stderr,"command failed: %s\n%s", sql, PQerrorMessage(conn));
+		PQclear(res);
+		exit(1);
+	}
+
+	PQclear(res);
+}
+
+static void
+check_sorted(const char *sql, int (*cmp)(const char *a, const char *b))
+{
+	int			i;
+	PGresult   *res;
+	PGresult   *prevres = NULL;
+	int			rowno;
+
+	fprintf(stderr, "running query: %s\n", sql);
+	if (!PQsendQuery(conn, sql))
+	{
+		fprintf(stderr,"query failed: %s\n%s", sql, PQerrorMessage(conn));
+		PQclear(res);
+		exit(1);
+	}
+	if (!PQsetSingleRowMode(conn))
+	{
+		fprintf(stderr,"setting single-row mode failed: %s", PQerrorMessage(conn));
+		PQclear(res);
+		exit(1);
+	}
+
+	rowno = 1;
+	while (res = PQgetResult(conn))
+	{
+		if (PQresultStatus(res) == PGRES_TUPLES_OK)
+			continue;
+		if (PQresultStatus(res) != PGRES_SINGLE_TUPLE)
+		{
+			fprintf(stderr,"error while fetching: %d, %s\n%s", PQresultStatus(res), sql, PQerrorMessage(conn));
+			PQclear(res);
+			exit(1);
+		}
+
+		if (prevres)
+		{
+			if (!cmp(PQgetvalue(prevres, 0, 0), PQgetvalue(res, 0, 0)))
+			{
+				fprintf(stderr,"FAIL: result not sorted, row %d: %s, prev %s\n", rowno,
+						PQgetvalue(prevres, 0, 0), PQgetvalue(res, 0, 0));
+				PQclear(res);
+				exit(1);
+			}
+			PQclear(prevres);
+		}
+		prevres = res;
+
+		rowno++;
+	}
+
+	if (prevres)
+		PQclear(prevres);
+}
+
+
+static int
+compare_strings(const char *a, const char *b)
+{
+	return strcmp(a, b) <= 0;
+}
+
+static int
+compare_ints(const char *a, const char *b)
+{
+	return atoi(a) <= atoi(b);
+}
+
+int
+main(int argc, char **argv)
+{
+	double duration;
+	char		buf[1000];
+
+	/* Make a connection to the database */
+	conn = PQconnectdb("");
+
+	/* Check to see that the backend connection was successfully made */
+	if (PQstatus(conn) != CONNECTION_OK)
+	{
+		fprintf(stderr, "Connection to database failed: %s",
+				PQerrorMessage(conn));
+		exit(1);
+	}
+	execute("set trace_sort=on");
+
+	execute("set work_mem = '4MB'");
+
+	check_sorted("SELECT * FROM small.ordered_ints ORDER BY i", compare_ints);
+	check_sorted("SELECT * FROM small.random_ints ORDER BY i", compare_ints);
+	check_sorted("SELECT * FROM small.ordered_text ORDER BY t", compare_strings);
+	check_sorted("SELECT * FROM small.random_text ORDER BY t", compare_strings);
+
+	execute("set work_mem = '16MB'");
+
+	check_sorted("SELECT * FROM medium.ordered_ints ORDER BY i", compare_ints);
+	check_sorted("SELECT * FROM medium.random_ints ORDER BY i", compare_ints);
+	check_sorted("SELECT * FROM medium.ordered_text ORDER BY t", compare_strings);
+	check_sorted("SELECT * FROM medium.random_text ORDER BY t", compare_strings);
+
+	execute("set work_mem = '256MB'");
+
+	check_sorted("SELECT * FROM medium.ordered_ints ORDER BY i", compare_ints);
+	check_sorted("SELECT * FROM medium.random_ints ORDER BY i", compare_ints);
+	check_sorted("SELECT * FROM medium.ordered_text ORDER BY t", compare_strings);
+	check_sorted("SELECT * FROM medium.random_text ORDER BY t", compare_strings);
+
+	execute("set work_mem = '512MB'");
+
+	check_sorted("SELECT * FROM medium.ordered_ints ORDER BY i", compare_ints);
+	check_sorted("SELECT * FROM medium.random_ints ORDER BY i", compare_ints);
+	check_sorted("SELECT * FROM medium.ordered_text ORDER BY t", compare_strings);
+	check_sorted("SELECT * FROM medium.random_text ORDER BY t", compare_strings);
+
+	execute("set work_mem = '2048MB'");
+
+	check_sorted("SELECT * FROM medium.ordered_ints ORDER BY i", compare_ints);
+	check_sorted("SELECT * FROM medium.random_ints ORDER BY i", compare_ints);
+	check_sorted("SELECT * FROM medium.ordered_text ORDER BY t", compare_strings);
+	check_sorted("SELECT * FROM medium.random_text ORDER BY t", compare_strings);
+
+	PQfinish(conn);
+
+	return 0;
+}
diff --git a/src/test/sort/generate.c b/src/test/sort/generate.c
new file mode 100644
index 00000000000..9b005f97ed6
--- /dev/null
+++ b/src/test/sort/generate.c
@@ -0,0 +1,198 @@
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+static void
+generate_ordered_integers(int scale)
+{
+	int			rows = ((double) scale) * 28.75;
+	int			i;
+
+	printf("DROP TABLE IF EXISTS ordered_ints;\n");
+	printf("BEGIN;");
+	printf("CREATE TABLE ordered_ints (i int4);\n");
+	printf("COPY ordered_ints FROM STDIN WITH (FREEZE);\n");
+
+	for (i = 0; i < rows; i++)
+		printf("%d\n", i);
+
+	printf("\\.\n");
+	printf("COMMIT;\n");
+}
+
+static void
+generate_random_integers(int scale)
+{
+	int			rows = ((double) scale) * 28.75;
+	int			i;
+
+	printf("DROP TABLE IF EXISTS random_ints;\n");
+	printf("BEGIN;");
+	printf("CREATE TABLE random_ints (i int4);\n");
+	printf("COPY random_ints FROM STDIN WITH (FREEZE);\n");
+
+	for (i = 0; i < rows; i++)
+		printf("%d\n", random());
+
+	printf("\\.\n");
+	printf("COMMIT;\n");
+}
+
+#define ALPHABET_SIZE 26
+static const char alphabet[ALPHABET_SIZE + 1] = "abcdefghijklmnopqrstuvwxyz";
+
+#define TEXT_LEN 50
+
+static void
+random_string(char *buf, int len)
+{
+	int			i;
+	long		r;
+	long		m;
+
+	m = 0;
+	for (i = 0; i < len; i++)
+	{
+		if (m / ALPHABET_SIZE < ALPHABET_SIZE)
+		{
+			m = RAND_MAX;
+			r = random();
+		}
+
+		*buf = alphabet[r % ALPHABET_SIZE];
+		m = m / ALPHABET_SIZE;
+		r = r / ALPHABET_SIZE;
+		buf++;
+	}
+	*buf = '\0';
+	return;
+}
+
+static void
+generate_random_text(int scale)
+{
+	int			rows = ((double) scale) * 12.7;
+	int			i;
+	char		buf[TEXT_LEN + 1] = { 0 };
+
+	printf("DROP TABLE IF EXISTS random_text;\n");
+	printf("BEGIN;");
+	printf("CREATE TABLE random_text (t text);\n");
+	printf("COPY random_text FROM STDIN WITH (FREEZE);\n");
+
+	for (i = 0; i < rows; i++)
+	{
+		random_string(buf, TEXT_LEN);
+		printf("%s\n", buf);
+	}
+
+	printf("\\.\n");
+	printf("COMMIT;\n");
+}
+
+static void
+generate_ordered_text(int scale)
+{
+	int			rows = ((double) scale) * 12.7;
+	int			i;
+	int			j;
+	char		indexes[TEXT_LEN] = {0};
+	char		buf[TEXT_LEN + 1];
+	double			digits;
+
+	printf("DROP TABLE IF EXISTS ordered_text;\n");
+	printf("BEGIN;");
+	printf("CREATE TABLE ordered_text (t text);\n");
+	printf("COPY ordered_text FROM STDIN WITH (FREEZE);\n");
+
+	/*
+	 * We don't want all the strings to have the same prefix.
+	 * That makes the comparisons very expensive. That might be an
+	 * interesting test case too, but not what we want here. To avoid
+	 * that, figure out how many characters will change, with the #
+	 * of rows we chose.
+	 */
+	digits = ceil(log(rows) / log((double) ALPHABET_SIZE));
+
+	if (digits > TEXT_LEN)
+		digits = TEXT_LEN;
+
+	for (i = 0; i < rows; i++)
+	{
+		for (j = 0; j < TEXT_LEN; j++)
+		{
+			buf[j] = alphabet[indexes[j]];
+		}
+		buf[j] = '\0';
+		printf("%s\n", buf);
+
+		/* increment last character, carrying if needed */
+		for (j = digits - 1; j >= 0; j--)
+		{
+			indexes[j]++;
+			if (indexes[j] == ALPHABET_SIZE)
+				indexes[j] = 0;
+			else
+				break;
+		}
+	}
+
+	printf("\\.\n");
+	printf("COMMIT;\n");
+}
+
+
+struct
+{
+	char *name;
+	void (*generate_func)(int scale);
+} datasets[] =
+{
+ 	{ "ordered_integers", generate_ordered_integers },
+	{ "random_integers", generate_random_integers },
+	{ "ordered_text", generate_ordered_text },
+	{ "random_text", generate_random_text },
+	{ NULL, NULL }
+};
+
+void
+usage()
+{
+	printf("Usage: generate <dataset name> [scale] [schema]\n");
+	exit(1);
+}
+
+int
+main(int argc, char **argv)
+{
+	int			scale;
+	int			i;
+	int			found = 0;
+
+	if (argc < 2)
+		usage();
+
+	if (argc >= 3)
+		scale = atoi(argv[2]);
+	else
+		scale = 1024; /* 1 MB */
+
+	for (i = 0; datasets[i].name != NULL; i++)
+	{
+		if (strcmp(argv[1], datasets[i].name) == 0 ||
+			strcmp(argv[1], "all") == 0)
+		{
+			fprintf (stderr, "Generating %s for %d kB...\n", datasets[i].name, scale);
+			datasets[i].generate_func(scale);
+			found = 1;
+		}
+	}
+
+	if (!found)
+	{
+		fprintf(stderr, "unrecognized test name %s\n", argv[1]);
+		exit(1);
+	}
+	exit(0);
+}
diff --git a/src/test/sort/plot.sh b/src/test/sort/plot.sh
new file mode 100644
index 00000000000..942a4af125d
--- /dev/null
+++ b/src/test/sort/plot.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+set -e
+
+PATH=~/pgsql.fsmfork/bin:$PATH
+
+plot_test() {
+  local testname=$1;
+    
+  psql sorttest <<EOF
+\copy (select work_mem, avg(time_ms), min(time_ms), max(time_ms) from public.results where testname='$testname' group by work_mem order by 1) to 'data/results-patched.dat'
+\copy (select work_mem, avg(time_ms), min(time_ms), max(time_ms) from public.results_unpatched where testname='$testname' group by work_mem order by 1) to 'data/results-unpatched.dat'
+EOF
+
+  gnuplot - <<EOF
+set terminal png
+set output '$1.png'
+set title "$testname (1 GB)" noenhanced
+set xlabel "work_mem (kB)" noenhanced
+set ylabel "time (ms)"
+set logscale x
+set yrange [32:]
+set yrange [0:]
+plot "data/results-patched.dat" using 1:2 with lines title "balanced k-way merge", \
+     "data/results-unpatched.dat" using 1:2 with lines title "unpatched (commit e7c2b95d37)"
+pause mouse close
+EOF
+
+
+}
+
+
+plot_test medium.ordered_ints
+plot_test medium.random_ints
+plot_test medium.ordered_text
+plot_test medium.random_text
diff --git a/src/test/sort/runtest.sh b/src/test/sort/runtest.sh
new file mode 100644
index 00000000000..b952b986d23
--- /dev/null
+++ b/src/test/sort/runtest.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+PGDATA=/dev/shm/sorttest-pgdata
+
+PREFIX=/home/hlinnakangas/pgsql-install
+
+EMAIL=hlinnakangas@pivotal.io
+
+set -e
+
+function compile
+{
+    git checkout -f $1
+    git checkout -f sort-balanced-merge2 src/test/sort
+    ./configure --enable-debug --enable-depend --prefix=$PREFIX CC="ccache gcc"
+    make -j4 -s clean
+    make -j4 -s install
+
+    PATH=$PREFIX/bin:$PATH make -C src/test/sort speed generate # build the test suite
+}
+
+function runtest
+{
+    # Kill everything
+    pkill -9 postgres || true
+    rm -rf $PGDATA
+
+    $PREFIX/bin/initdb -D $PGDATA
+    echo "max_parallel_workers_per_gather = 0" >> $PGDATA/postgresql.conf
+    echo "shared_buffers = '1 GB' " >> $PGDATA/postgresql.conf
+
+    $PREFIX/bin/pg_ctl -D $PGDATA start -w -l serverlog-$1
+
+    LD_LIBRARY_PATH=$PREFIX/lib PATH=$PREFIX/bin:$PATH make -C src/test/sort generate_testdata
+
+    git rev-parse --short HEAD > results-$1.txt
+    
+    LD_LIBRARY_PATH=$PREFIX/lib PGDATABASE=sorttest ./src/test/sort/speed >> results-$1.txt
+    mail -s "pgperftest-vm: test $1 finished!" $EMAIL < results-$1.txt
+}
+
+compile master
+runtest master
+
+compile sort-balanced-merge2
+runtest sort-balanced-merge2
diff --git a/src/test/sort/speed.c b/src/test/sort/speed.c
new file mode 100644
index 00000000000..72a9e9b117e
--- /dev/null
+++ b/src/test/sort/speed.c
@@ -0,0 +1,177 @@
+#define FRONTEND 1
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/time.h>
+
+#define pg_attribute_printf(f,a)
+
+#include <libpq-fe.h>
+#include <internal/pqexpbuffer.h>
+
+#define REPETITIONS 1
+
+static PGconn *conn;
+
+/* returns duration in ms */
+static double
+execute(const char *sql)
+{
+	struct timeval before, after;
+	PGresult   *res;
+
+	gettimeofday(&before, NULL);
+	res = PQexec(conn, sql);
+	gettimeofday(&after, NULL);
+	if (PQresultStatus(res) != PGRES_COMMAND_OK && PQresultStatus(res) != PGRES_TUPLES_OK)
+	{
+		fprintf(stderr,"command failed: %s\n%s", sql, PQerrorMessage(conn));
+		PQclear(res);
+		exit(1);
+	}
+	PQclear(res);
+
+	return (((double) (after.tv_sec - before.tv_sec)) * 1000.0 + ((double) (after.tv_usec - before.tv_usec) / 1000.0));
+}
+
+static void
+execute_test(const char *testname, const char *query)
+{
+	double		duration;
+	char		buf[1000];
+	int			i;
+
+	for (i = 0; i < REPETITIONS; i++)
+	{
+		printf ("%s: ", testname);
+		fflush(stdout);
+		duration = execute(query);
+
+		//if (i > 0)
+		//	printf(", ");
+		printf("%.0f ms", duration);
+		printf("\n");
+		fflush(stdout);
+
+		sprintf(buf,
+				"insert into public.results (testname, work_mem, time_ms) "
+				"values ('%s', (select setting from pg_settings where name='work_mem')::int, '%g')",
+				testname, duration);
+		execute(buf);
+	}
+}
+
+static void
+execute_test_series(char *tblname)
+{
+	//static const char *work_mems[] = { "1MB", "4MB", "8MB", "16MB", "32MB", "64MB", "128MB", "256MB", "512MB", NULL };
+	static const char *work_mems[] = {
+		"64 kB",
+		"80 kB",
+
+		"100 kB",
+		"150 kB",
+		"250 kB",
+		"400 kB",
+		"650 kB",
+
+		"1000 kB",
+		"1500 kB",
+		"2500 kB",
+		"4000 kB",
+		"6500 kB",
+
+		"10000 kB",
+		"15000 kB",
+		"25000 kB",
+		"40000 kB",
+		"65000 kB",
+
+		"100 MB",
+		"150 MB",
+		"250 MB",
+		"400 MB",
+		"650 MB",
+		"800 MB",
+
+		"1000 MB",
+		"1500 MB",
+		"2500 MB",
+		
+		NULL, NULL, NULL, NULL };
+	int			i;
+	int			j;
+	PQExpBufferData sqlbuf;
+
+	initPQExpBuffer(&sqlbuf);
+
+	printf("# Tests with %s, different work_mems, no parallelism\n", tblname);
+	printf("-----\n");
+
+	printfPQExpBuffer(&sqlbuf, "set max_parallel_workers_per_gather=0");
+	execute(sqlbuf.data);
+	
+	for (i = 0; work_mems[i] != NULL; i++)
+	{
+		const char *work_mem = work_mems[i];
+		char testname[100];
+
+ 		printfPQExpBuffer(&sqlbuf, "set work_mem='%s'", work_mem);
+		execute(sqlbuf.data);
+		snprintf(testname, sizeof(testname), "%s", tblname, work_mem);
+
+		resetPQExpBuffer(&sqlbuf);
+
+		appendPQExpBuffer(&sqlbuf, "SELECT COUNT(*) FROM ((");
+		appendPQExpBuffer(&sqlbuf, "SELECT * FROM %s", tblname);
+		appendPQExpBuffer(&sqlbuf, ") ORDER BY 1) t");
+
+		execute_test(testname,  sqlbuf.data);
+	}
+}
+
+int
+main(int argc, char **argv)
+{
+	double duration;
+	char		buf[1000];
+
+	/* Make a connection to the database */
+	conn = PQconnectdb("");
+
+	/* Check to see that the backend connection was successfully made */
+	if (PQstatus(conn) != CONNECTION_OK)
+	{
+		fprintf(stderr, "Connection to database failed: %s",
+				PQerrorMessage(conn));
+		exit(1);
+	}
+
+	execute("create table if not exists public.results (testname text, work_mem int, time_ms numeric)");
+
+	//execute("set trace_sort=on");
+#if 0
+	execute_test_series("small.ordered_ints");
+	execute_test_series("small.random_ints");
+	execute_test_series("small.ordered_text");
+	execute_test_series("small.random_text");
+#endif
+	
+	execute_test_series("medium.ordered_ints");
+	execute_test_series("medium.random_ints");
+	execute_test_series("medium.ordered_text");
+	execute_test_series("medium.random_text");
+
+#if 0
+	execute_test_series("large.ordered_ints");
+	execute_test_series("large.random_ints");
+	execute_test_series("large.ordered_text");
+	execute_test_series("large.random_text");
+#endif
+	
+	PQfinish(conn);
+
+	return 0;
+}
