#!/bin/env python
# tuplesort_test.py : Test the effectiveness of fast-path tuplesorting

# It's intended for this program to have 3 runs, which will be compared.

# run 1 - unaltered HEAD
# run 2 - Patch, but with inline specializations commented out
# run 3 - Patch

# We parse explain analyze output, because it's important to isolate
# query speed from any client overhead

import psycopg2
import subprocess
import json # json explains used
import time
import csv
from optparse import OptionParser

queries = [
			# Queries from dellstore database
			"select * from orderlines order by prod_id;",
			"select * from orderlines order by prod_id, quantity;" # For "not inlined" codepath, that uses direct comparator for first scanKey.
			]

# Walk explain tree. Should be sufficiently flexible for our purposes 
# (i.e not very) - assumes an in-memory sort node is parent node
def walk_tree(tree):
	for od in tree: # outer dictionary
		for k,v in od.items():
			if k == "Plan":
				for at, val in v.items():
					if at == "Actual Total Time":
						print at,": ",  val
						act_tot_time = float(val)
					elif at == "Sort Space Used":
						print at, ": ", val
						sort_space = val
					elif at == "Sort Space Type":
						assert(val == "Memory")
					elif at == "Node Type":
						assert(val == "Sort")


	return [ act_tot_time, sort_space, False ]

# store results of a given run in a dedicated csv file
def serialize_to_file(vals_dic, filename):
	wrt = csv.writer(open(filename, 'wb'), delimiter=',')

	# mark median run for runs of this 
	# query (or if there is an even number of elements, near enough)
	median_i = (len(vals_dic) + 1) / 2 - 1 

	for i, k in enumerate(vals_dic):
		wrt.writerow([k[0][0], time.ctime(k[0][1]),str(k[1][0]) + "ms", str(k[1][1]) + "kb sort",'*' if i == median_i else 'n'])

def test_main():
	parser = OptionParser(description="")
	parser.add_option('-c', '--conninfo', type=str, help="libpq-style connection info string of database to connect to. "
						"Can be omitted, in which case we get details from our environment. "
						"You'll probably want to put this in double-quotes, like this: --conninfo \"hostaddr=127.0.0.1 port=5432 dbname=postgres user=postgres\". ", default="")
	parser.add_option('-w', '--warmcache', type=int, help="Number of times to execute each statement without recording, to warm the cache.", default=3)
	parser.add_option('-r', '--runs', type=int, help="Number of times to run each query", default=5)
	parser.add_option('-d', '--description', type=str, help="Mandatory description for this run of the tool, such as 'inlining', 'non-inlining' or 'HEAD'", default=None)

	args = parser.parse_args()[0]
	conn_str = args.conninfo

	conn = psycopg2.connect(conn_str)
	cur = conn.cursor()

	warm_cache_n = args.warmcache
	run_n = args.runs

	description = args.description
	
	if description is None:
		raise SystemExit("You must specify a description for this run of tuplesort_test.py!") 


	for qry in queries:
		vals = {}
		print qry
		for i, j in enumerate(range(warm_cache_n + run_n)):
				cur.execute("explain (analyze true, costs true, format json) " + qry)
				if i < warm_cache_n:
					print "Skip recording query that just warms cache"
					continue
				for j in cur:
					expl_ana_tree = json.loads(j[0])
				vals[( qry, time.time() )] = walk_tree(expl_ana_tree) 
			
				# Sort values for reference, and to locate the median value	
				sort_vals = sorted(vals.iteritems(), key=lambda tot_time: tot_time[1][0])
		
		serialize_to_file(sort_vals, description + " " + qry + ".csv")

if __name__== "__main__":
	test_main()

