#!/usr/bin/env python
import psycopg2
import time

datatypes = {'int4': '%s', 'int8': '%s', 'numeric': '%s', 'text': 'md5(%s::text)'}

tables = [
	['int4', 'int4', 'int4'],
	['int8', 'int8', 'int4'],
	['numeric', 'int4', 'int4'],
	['int4', 'int4', 'numeric'],
	['text', 'int8', 'numeric', 'text'],
	['text', 'int8', 'text', 'int4'],
	['int4', 'int8', 'numeric', 'text'],
	['numeric', 'text', 'int4', 'int8']
]

tableSize = 1000000
groupSizes = [1, 10, 100, 1000, 10000, 100000, 1000000]
preorderedRates = [0, 0.1, 0.5, 0.9, 1.0]

con = psycopg2.connect("dbname='postgres' user='korotkovae' host='/tmp' password='' port=5429")
con.set_isolation_level(0)
con.autocommit = True
queryCursor = con.cursor()
queryCursor.execute("SET work_mem = '2GB';")
queryCursor.execute("SET enable_seqscan = off;")

def queryTime(query):
	start = time.time()
	queryCursor.execute(query)
	end = time.time()
	return (end - start)

def findMedian(query):
	times = [queryTime(query) for i in range (0, 3)]
	times.sort()
	return times[1]

print("Table,GroupedCols,GroupSize,PreorderedFrac,FullSortMedian,IncSortMedian,TimeChangePercent")
for table in tables:
	for groupedCols in range(1, len(table) - 1):
		for groupSize in groupSizes:
			if groupSize > 1:
				rates = preorderedRates
			else:
				rates = [1.0]
			for preorderedFrac in rates:
				nPreordered = int(tableSize * preorderedFrac)
				clist = ['c' + str(i + 1) + ' ' + table[i] for i in range(0, len(table))]
				tlist = [(datatypes[table[i]] % 'j') for i in range(0, groupedCols)]
				tlist = tlist + [(datatypes[table[i]] % 'i') for i in range(groupedCols, len(table) - 1)]
				tlist.append((datatypes[table[len(table) - 1]] % 'k'))
				orderColList = ['c' + str(i + 1) for i in range(0, groupedCols)]
				orderColList.append('c' + str(len(table)))
				colList = ['c' + str(i + 1) for i in range(0, len(table))]
				queryCursor.execute("DROP TABLE IF EXISTS tmp")
				queryCursor.execute("CREATE UNLOGGED TABLE tmp (" + ', '.join(clist) + ")")
				queryCursor.execute(
					"INSERT INTO tmp "
					"SELECT " + ', '.join(tlist) + " FROM " +
					"    (SELECT i, i / " + str(groupSize) + " AS j, (CASE WHEN i < " + str(nPreordered) + " THEN i ELSE (random() * 1000000)::int END) AS k " + 
					"     FROM generate_series(0, " + str(tableSize - 1) + ") i ) x")
				queryCursor.execute("CREATE INDEX tmp_idx ON tmp (" + ', '.join(colList) + ")")
				queryCursor.execute("VACUUM ANALYZE tmp;")
				q = ("SELECT count(*) FROM " +
					 "   (SELECT * FROM tmp ORDER BY " + ', '.join(orderColList) + ") x")
				fullSortMedian = findMedian("SET enable_incrementalsort = off; " + q)
				incSortMedian = findMedian("SET enable_incrementalsort = on; " + q)
				percent = ((incSortMedian - fullSortMedian) / fullSortMedian) * 100.0
				print("%s,%s,%s,%s,%s,%s,%.2f" % ('|'.join(table), groupedCols, groupSize, preorderedFrac, fullSortMedian, incSortMedian, percent))
