#!/usr/bin/env python
#
# Cross-check node handling code in PostgreSQL looking for bugs.

import re

def scan_node_categories(path):
  """Find all type tags and put them into categories."""
  results = {}
  with open(path) as file:
    current_list = []
    results["INVALID"] = current_list
    for line in file:
      matches = re.search(r"TAGS FOR ([A-Z].*(NODES|STUFF))", line)
      if matches:
        category = matches.group(1)
        current_list = []
        results[category] = current_list
        continue
      matches = re.search(r"(T_[a-zA-Z0-9_]+)", line)
      if matches:
        assert current_list != None
        tag = matches.group(1)
        current_list.append(tag)
  return results

def scan_switch(path, function):
  """Find all cases handled by "function" in translation unit "path"."""
  cases = []
  in_function = False
  with open(path) as file:
    for line in file:
      if line.startswith(function + "("):
        in_function = True
      elif line.startswith("}"):
        in_function = False
      elif in_function:
        matches = re.search(r"case (T_.+):", line)
        if matches:
          cases.append(matches.group(1))
  return cases

def scan_read_tagnames(path):
  """Find all tagnames that readfuncs.c recognizes."""
  results = []
  with open(path) as file:
    for line in file:
      matches = re.search("MATCH\(\"([A-Z]+)\", ([0-9]+)", line)
      if matches:
        tagname = matches.group(1)
        length = int(matches.group(2))
        results.append(tagname)
        if length != len(tagname):
          print "Unexpected length:", line
  return results

def scan_out_tagnames(path):
  """Learn what tagname outfuncs.c uses to output each type."""
  results = {}
  current_tag = None
  with open(path) as file:
    for line in file:
      matches = re.match(r"_out.*, const ([^ ]+) \*", line)
      if matches:
        # see makeNode macro definition: typedef Foo must have enum T_Foo
        current_tag = "T_" + matches.group(1)
      elif line.startswith("}"):
        current_tag = None
      elif current_tag != None:
        matches = re.search("WRITE_NODE_TYPE\(\"([A-Z]+)\"\)", line)
        if matches:
          results[current_tag] = matches.group(1)
  return results

# cases that don't fit the general pattern for out/read/equals but we'll just
# trust that they work
special_cases = [ "T_List", "T_IntList", "T_OidList", "T_Integer", "T_Float", "T_String", "T_BitString", "T_Expr", "T_Null", "T_Value" ]

node_categories = scan_node_categories("src/include/nodes/nodes.h")

copyfuncs = scan_switch("src/backend/nodes/copyfuncs.c", "copyObjectImpl")
equalfuncs = scan_switch("src/backend/nodes/equalfuncs.c", "equal")
outfuncs = scan_switch("src/backend/nodes/outfuncs.c", "outNode")

get_rule_expr = scan_switch("src/backend/utils/adt/ruleutils.c", "get_rule_expr")
exprSetCollation = scan_switch("src/backend/nodes/nodeFuncs.c", "exprSetCollation")
expression_tree_walker = scan_switch("src/backend/nodes/nodeFuncs.c", "expression_tree_walker")
expression_tree_mutator = scan_switch("src/backend/nodes/nodeFuncs.c", "expression_tree_mutator")
raw_expression_tree_walker = scan_switch("src/backend/nodes/nodeFuncs.c", "expression_tree_mutator")

outfuncs_tagname_map = scan_out_tagnames("src/backend/nodes/outfuncs.c")
readfuncs_tagnames = scan_read_tagnames("src/backend/nodes/readfuncs.c")

# the following node categories must be supported by copy, out
for tag in node_categories["PLAN NODES"] + node_categories["PRIMITIVE NODES"] + node_categories["VALUE NODES"] + node_categories["LIST NODES"] + node_categories["PARSE TREE NODES"]:
  if tag in special_cases:
    continue
  if tag not in copyfuncs:
    print "%s not handled in copyfuncs.c" % (tag,)
  if tag not in outfuncs:
    print "%s not handled in outfuncs.c" % (tag,)

# the following node categories must be supported by equals
for tag in node_categories["PRIMITIVE NODES"] + node_categories["VALUE NODES"] + node_categories["LIST NODES"] + node_categories["PARSE TREE NODES"]:
  if tag in special_cases:
    continue
  if tag not in equalfuncs:
    print "%s not handled in equalfuncs.c" % (tag,)

# the following node categories must be supported by parseNodeString using the
# tagname that is output by outfuncs.c
for tag in node_categories["PLAN NODES"] + node_categories["PRIMITIVE NODES"]:
  if tag in outfuncs_tagname_map and outfuncs_tagname_map[tag] not in readfuncs_tagnames:
    print "%s can be produced by outfuncs.c with tagname %s but that tagname is not recognized by readfuncs.c" % (tag, outfuncs_tagname_map[tag])

# the following node categories must be supported by ruleutils.c
for tag in node_categories["PRIMITIVE NODES"]:
  if tag not in get_rule_expr:
    print "%s not handled in ruleutils.c" % (tag,)

# TODO: figure out what the deal is with the various tree walker functions
#for tag in node_categories["PRIMITIVE NODES"] + node_categories["PLAN NODES"] + node_categories["PARSE TREE NODES"]:
#  if tag not in raw_expression_tree_walker:
#    print "%s not handled in raw_expression_tree_walker" % (tag,)
