58 lines
2.5 KiB
Python
58 lines
2.5 KiB
Python
import re
|
|
import pandas as pd
|
|
|
|
def parse_sql_to_excel(sql_file_path, excel_file_path):
|
|
with open(sql_file_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Find all table structures to get column names
|
|
table_defs = re.findall(r"CREATE TABLE `(\w+)` \((.*?)\) ENGINE", content, re.DOTALL)
|
|
table_columns = {}
|
|
for table_name, columns_str in table_defs:
|
|
# Extract column names from the CREATE TABLE statement
|
|
cols = re.findall(r"`(\w+)`", columns_str.split('PRIMARY KEY')[0])
|
|
table_columns[table_name] = cols
|
|
|
|
# Find all INSERT statements
|
|
insert_matches = re.findall(r"INSERT INTO `(\w+)` VALUES (.*?);", content, re.DOTALL)
|
|
|
|
data_frames = {}
|
|
for table_name, values_str in insert_matches:
|
|
if table_name not in table_columns:
|
|
continue
|
|
|
|
# Clean up the values string to parse it as a list of tuples
|
|
# Note: This is a simplified parser for standard SQL values
|
|
rows = []
|
|
# Splitting rows by ),(
|
|
raw_rows = re.findall(r"\((.*?)\)(?:,|$)", values_str.replace('\n', ''))
|
|
|
|
for row in raw_rows:
|
|
# Split values by comma, but respect strings inside single quotes
|
|
parsed_row = re.findall(r"'[^']*'|[^,]+", row)
|
|
# Clean quotes and whitespace
|
|
cleaned_row = [val.strip().strip("'") if val.strip() != 'NULL' else None for val in parsed_row]
|
|
|
|
# Ensure the row matches the column length (handles trailing commas/empty values)
|
|
if len(cleaned_row) > len(table_columns[table_name]):
|
|
cleaned_row = cleaned_row[:len(table_columns[table_name])]
|
|
elif len(cleaned_row) < len(table_columns[table_name]):
|
|
cleaned_row.extend([None] * (len(table_columns[table_name]) - len(cleaned_row)))
|
|
|
|
rows.append(cleaned_row)
|
|
|
|
if rows:
|
|
df = pd.DataFrame(rows, columns=table_columns[table_name])
|
|
data_frames[table_name] = df
|
|
|
|
# Save to Excel with each table on a different sheet
|
|
with pd.ExcelWriter(excel_file_path, engine='openpyxl') as writer:
|
|
for table_name, df in data_frames.items():
|
|
# Excel sheet names have a 31-character limit
|
|
df.to_excel(writer, sheet_name=table_name[:31], index=False)
|
|
|
|
print(f"Successfully exported {len(data_frames)} tables to {excel_file_path}")
|
|
|
|
# Run the script
|
|
parse_sql_to_excel('1088316_TeamworkPM_Backup_20260325.sql', 'Teamwork_Data.xlsx')
|