#!/bin/bash

set -e

TESTDB="testcoll"

if [[ $# -ne 2 ]]; then
    rows=40000
    max_cp=16383
else
    rows=$1
    max_cp=$2
fi
    
function createdb
{
psql -d postgres <<EOF
create database "$TESTDB" encoding='UTF8' template='template0';
EOF
}


function populate_random
{
psql -d "$TESTDB" -v rows=$1 -v max_codepoint=$2 -v collname=$3 <<'EOF'
CREATE TABLE IF NOT EXISTS random_words(wordtext text);
TRUNCATE TABLE random_words;

CREATE OR REPLACE FUNCTION random_string(max_cp int)
 RETURNS text
 LANGUAGE sql
AS $function$
  select string_agg(c,'') from (
    select chr(1+(random()*max_cp)::int) as c from generate_series(1,(5+random()*20)::int)
  ) s;
$function$
;

DROP COLLATION "public".:"collname";

CREATE COLLATION "public".:"collname" (locale=:'collname', provider='libc');

INSERT INTO random_words
  SELECT random_string(:max_codepoint) FROM generate_series(1,:rows);

ANALYZE random_words;
EOF
}


function run_sorts
{
collation="$1"
prev=""
run_count=0
while true
do
    cksum=$(psql -Atq -d "$TESTDB" <<EOF
-- favor parallel scan, even for a small dataset
set min_parallel_table_scan_size to '8kB';
set parallel_tuple_cost to 0.0001;
set parallel_setup_cost to 0;

select * from random_words order by wordtext collate "public"."$collation" 
 \g |md5sum
EOF
	 )
    cksum=$(echo "$cksum"| cut -d' ' -f1)

    let run_count++ || true
    echo -n "$run_count "
    if [[ ! -z "$prev" && "$cksum" != "$prev" ]]; then
	echo $'\n'"$prev -> $cksum (iter=$run_count)"
    fi
    prev=$cksum
done
}

populate_random $rows $max_cp "en-US"
run_sorts "en-US"  # Windows locale, any valid one except C/POSIX will do 
