#!/usr/bin/env python3

import sys
import re
import argparse

def main():
    # Set up argument parser
    parser = argparse.ArgumentParser(
        description=(
            "Parse log data and output CSV with format, time_ms, "
            "and any number of user-provided additional columns."
        )
    )
    parser.add_argument(
        "columns",
        nargs='*',
        help=(
            "Additional columns to include in the output CSV. "
            "Each argument will be added as a separate column with its value."
        )
    )
    args = parser.parse_args()
    additional_columns = args.columns

    # Compile regular expressions without case-insensitive flag
    copy_log_pattern = re.compile(
        r"^copy log from '/tmp/log\.(csv|text|raw)' \(format '([^']+)'.*\);$"
    )
    copy_pattern = re.compile(r"^COPY\s+10000000$")
    time_pattern = re.compile(r"^Time:\s+(\d+\.\d+)\s+ms")

    line_number = 0  # To keep track of the current line number for better error messages

    try:
        lines = iter(sys.stdin)
        for line in lines:
            line_number += 1
            line = line.strip()

            # Check if the line starts with "copy log from"
            if line.startswith("copy log from"):
                # Match the first line to extract the format
                copy_log_match = copy_log_pattern.match(line)
                if not copy_log_match:
                    sys.stderr.write(
                        f"Error: Unable to parse 'copy log from' line at line {line_number}:\n{line}\n"
                    )
                    sys.exit(1)

                format_value = copy_log_match.group(2)  # Extracted format (e.g., 'csv' or 'raw')

                # Read the next line for the COPY statement
                try:
                    copy_line = next(lines).strip()
                    line_number += 1
                except StopIteration:
                    sys.stderr.write(
                        f"Error: Unexpected end of input after line {line_number}.\n"
                    )
                    sys.exit(1)

                if not copy_pattern.match(copy_line):
                    sys.stderr.write(
                        f"Error: Expected 'COPY 10000000' at line {line_number}, but got:\n{copy_line}\n"
                    )
                    sys.exit(1)

                # Read the next line for the Time statement
                try:
                    time_line = next(lines).strip()
                    line_number += 1
                except StopIteration:
                    sys.stderr.write(
                        f"Error: Unexpected end of input after line {line_number}.\n"
                    )
                    sys.exit(1)

                time_match = time_pattern.match(time_line)
                if not time_match:
                    sys.stderr.write(
                        f"Error: Unable to parse 'Time' line at line {line_number}:\n{time_line}\n"
                    )
                    sys.exit(1)

                time_ms = time_match.group(1)  # Extracted time in milliseconds

                # Prepare additional columns
                additional_values = additional_columns  # List of values
                # If no additional columns are provided, this will be an empty list

                # Join all columns into a CSV line
                if additional_values:
                    # Escape any commas in additional_values to maintain CSV integrity
                    escaped_additional = [f'"{value.replace("\"", "\"\"")}"' for value in additional_values]
                    print(f"{format_value},{time_ms}," + ",".join(escaped_additional))
                else:
                    print(f"{format_value},{time_ms}")

    except Exception as e:
        sys.stderr.write(f"An unexpected error occurred: {e}\n")
        sys.exit(1)

if __name__ == "__main__":
    main()

