TW-to-Clock/main.py

import re
import pandas as pd

def parse_sql_to_excel(sql_file_path, excel_file_path):
    with open(sql_file_path, 'r', encoding='utf-8') as f:
        content = f.read()

    # Find all table structures to get column names
    table_defs = re.findall(r"CREATE TABLE `(\w+)` \((.*?)\) ENGINE", content, re.DOTALL)
    table_columns = {}
    for table_name, columns_str in table_defs:
        # Extract column names from the CREATE TABLE statement
        cols = re.findall(r"`(\w+)`", columns_str.split('PRIMARY KEY')[0])
        table_columns[table_name] = cols

    # Find all INSERT statements
    insert_matches = re.findall(r"INSERT INTO `(\w+)` VALUES (.*?);", content, re.DOTALL)

    data_frames = {}
    for table_name, values_str in insert_matches:
        if table_name not in table_columns:
            continue

        # Clean up the values string to parse it as a list of tuples
        # Note: This is a simplified parser for standard SQL values
        rows = []
        # Splitting rows by ),(
        raw_rows = re.findall(r"\((.*?)\)(?:,|$)", values_str.replace('\n', ''))

        for row in raw_rows:
            # Split values by comma, but respect strings inside single quotes
            parsed_row = re.findall(r"'[^']*'|[^,]+", row)
            # Clean quotes and whitespace
            cleaned_row = [val.strip().strip("'") if val.strip() != 'NULL' else None for val in parsed_row]

            # Ensure the row matches the column length (handles trailing commas/empty values)
            if len(cleaned_row) > len(table_columns[table_name]):
                cleaned_row = cleaned_row[:len(table_columns[table_name])]
            elif len(cleaned_row) < len(table_columns[table_name]):
                cleaned_row.extend([None] * (len(table_columns[table_name]) - len(cleaned_row)))

            rows.append(cleaned_row)

        if rows:
            df = pd.DataFrame(rows, columns=table_columns[table_name])
            data_frames[table_name] = df

    # Save to Excel with each table on a different sheet
    with pd.ExcelWriter(excel_file_path, engine='openpyxl') as writer:
        for table_name, df in data_frames.items():
            # Excel sheet names have a 31-character limit
            df.to_excel(writer, sheet_name=table_name[:31], index=False)

    print(f"Successfully exported {len(data_frames)} tables to {excel_file_path}")

# Run the script
parse_sql_to_excel('1088316_TeamworkPM_Backup_20260325.sql', 'Teamwork_Data.xlsx')