Files
TW-to-Clock/main.py
T
2026-05-09 10:03:52 +00:00

58 lines
2.5 KiB
Python

import re
import pandas as pd
def parse_sql_to_excel(sql_file_path, excel_file_path):
with open(sql_file_path, 'r', encoding='utf-8') as f:
content = f.read()
# Find all table structures to get column names
table_defs = re.findall(r"CREATE TABLE `(\w+)` \((.*?)\) ENGINE", content, re.DOTALL)
table_columns = {}
for table_name, columns_str in table_defs:
# Extract column names from the CREATE TABLE statement
cols = re.findall(r"`(\w+)`", columns_str.split('PRIMARY KEY')[0])
table_columns[table_name] = cols
# Find all INSERT statements
insert_matches = re.findall(r"INSERT INTO `(\w+)` VALUES (.*?);", content, re.DOTALL)
data_frames = {}
for table_name, values_str in insert_matches:
if table_name not in table_columns:
continue
# Clean up the values string to parse it as a list of tuples
# Note: This is a simplified parser for standard SQL values
rows = []
# Splitting rows by ),(
raw_rows = re.findall(r"\((.*?)\)(?:,|$)", values_str.replace('\n', ''))
for row in raw_rows:
# Split values by comma, but respect strings inside single quotes
parsed_row = re.findall(r"'[^']*'|[^,]+", row)
# Clean quotes and whitespace
cleaned_row = [val.strip().strip("'") if val.strip() != 'NULL' else None for val in parsed_row]
# Ensure the row matches the column length (handles trailing commas/empty values)
if len(cleaned_row) > len(table_columns[table_name]):
cleaned_row = cleaned_row[:len(table_columns[table_name])]
elif len(cleaned_row) < len(table_columns[table_name]):
cleaned_row.extend([None] * (len(table_columns[table_name]) - len(cleaned_row)))
rows.append(cleaned_row)
if rows:
df = pd.DataFrame(rows, columns=table_columns[table_name])
data_frames[table_name] = df
# Save to Excel with each table on a different sheet
with pd.ExcelWriter(excel_file_path, engine='openpyxl') as writer:
for table_name, df in data_frames.items():
# Excel sheet names have a 31-character limit
df.to_excel(writer, sheet_name=table_name[:31], index=False)
print(f"Successfully exported {len(data_frames)} tables to {excel_file_path}")
# Run the script
parse_sql_to_excel('1088316_TeamworkPM_Backup_20260325.sql', 'Teamwork_Data.xlsx')