#!/usr/bin/env python
import psycopg2
import time

datatypes = {'int4': '%s', 'int8': '%s', 'numeric': '%s', 'text': 'md5(%s::text)'}

tables = [
	['int4', 'int4', 'int4'],
	['int8', 'int8', 'int4'],
	['numeric', 'int4', 'int4'],
	['int4', 'int4', 'numeric'],
	['text', 'int8', 'numeric', 'text'],
	['text', 'int8', 'text', 'int4'],
	['int4', 'int8', 'numeric', 'text'],
	['numeric', 'text', 'int4', 'int8']
]

tableSize = 1000000
groupsCounts = [1000000, 100000, 10000, 1000, 100, 10, 1]
preorderedRates = [0.0, 0.1, 0.5, 0.9, 1.0]
skewFactors = [-10.0, -5.0, -1.0, 0.0, 1.0, 5.0, 10.0]

con = psycopg2.connect("dbname='postgres' user='smagen' host='/tmp' password='' port=5431")
con.set_isolation_level(0)
con.autocommit = True
queryCursor = con.cursor()
queryCursor.execute("SET work_mem = '2GB';")
queryCursor.execute("SET enable_seqscan = off;")

def queryTime(query):
	start = time.time()
	queryCursor.execute(query)
	end = time.time()
	return (end - start)

def findMedian(query):
	times = [queryTime(query) for i in range (0, 3)]
	times.sort()
	return times[1]

print("Table,GroupedCols,GroupCount,Skew,PreorderedFrac,FullSortMedian,IncSortMedian,TimeChangePercent")
for table in tables:
	for groupedCols in range(1, len(table) - 1):
		for groupsCount in groupsCounts:
			if groupsCount >= tableSize:
				rates = [1.0]
				skews = [0.0]
			elif groupsCount == 1:
				rates = preorderedRates
				skews = [0.0]
			else:
				rates = preorderedRates
				skews = skewFactors
			for preorderedFrac in rates:
				for skew in skews:
					nPreordered = int(tableSize * preorderedFrac)
					clist = ['c' + str(i + 1) + ' ' + table[i] for i in range(0, len(table))]
					tlist = [(datatypes[table[i]] % 'j') for i in range(0, groupedCols)]
					tlist = tlist + [(datatypes[table[i]] % 'i') for i in range(groupedCols, len(table) - 1)]
					tlist.append((datatypes[table[len(table) - 1]] % 'k'))
					orderColList = ['c' + str(i + 1) for i in range(0, groupedCols)]
					orderColList.append('c' + str(len(table)))
					colList = ['c' + str(i + 1) for i in range(0, len(table))]
					if skew == 0.0:
						groupNumCol = "i / " + str(tableSize / groupsCount)
					elif skew > 0.0:
						groupNumCol = "floor(pow(i::float8 / %f, %f) * %f)::int" % (tableSize, 1.0 / (1.0 + skew), groupsCount)
					elif skew < 0.0:
						groupNumCol = "(%d - floor(pow((%d - i)::float8 / %f, %f) * %f)::int)" % (groupsCount - 1, tableSize - 1, tableSize, 1.0 / (1.0 - skew), groupsCount)
					queryCursor.execute("DROP TABLE IF EXISTS tmp")
					queryCursor.execute("CREATE UNLOGGED TABLE tmp (" + ', '.join(clist) + ")")
					queryCursor.execute(
						"INSERT INTO tmp "
						"SELECT " + ', '.join(tlist) + " FROM " +
						"    (SELECT i, " + groupNumCol + " AS j, (CASE WHEN i < " + str(nPreordered) + " THEN i ELSE (random() * " + str(tableSize) + ")::int END) AS k " + 
						"     FROM generate_series(0, " + str(tableSize - 1) + ") i ) x")
					queryCursor.execute("CREATE INDEX tmp_idx ON tmp (" + ', '.join(colList) + ")")
					queryCursor.execute("VACUUM ANALYZE tmp;")
					q = ("SELECT count(*) FROM " +
						 "   (SELECT * FROM tmp ORDER BY " + ', '.join(orderColList) + ") x")
					fullSortMedian = findMedian("SET enable_incrementalsort = off; " + q)
					incSortMedian = findMedian("SET enable_incrementalsort = on; " + q)
					percent = ((incSortMedian - fullSortMedian) / fullSortMedian) * 100.0
					print("%s,%s,%s,%s,%s,%s,%s,%.2f" % ('|'.join(table), groupedCols, groupsCount, skew, preorderedFrac, fullSortMedian, incSortMedian, percent))
