import re import pandas as pd def parse_sql_to_excel(sql_file_path, excel_file_path): with open(sql_file_path, 'r', encoding='utf-8') as f: content = f.read() # Find all table structures to get column names table_defs = re.findall(r"CREATE TABLE `(\w+)` \((.*?)\) ENGINE", content, re.DOTALL) table_columns = {} for table_name, columns_str in table_defs: # Extract column names from the CREATE TABLE statement cols = re.findall(r"`(\w+)`", columns_str.split('PRIMARY KEY')[0]) table_columns[table_name] = cols # Find all INSERT statements insert_matches = re.findall(r"INSERT INTO `(\w+)` VALUES (.*?);", content, re.DOTALL) data_frames = {} for table_name, values_str in insert_matches: if table_name not in table_columns: continue # Clean up the values string to parse it as a list of tuples # Note: This is a simplified parser for standard SQL values rows = [] # Splitting rows by ),( raw_rows = re.findall(r"\((.*?)\)(?:,|$)", values_str.replace('\n', '')) for row in raw_rows: # Split values by comma, but respect strings inside single quotes parsed_row = re.findall(r"'[^']*'|[^,]+", row) # Clean quotes and whitespace cleaned_row = [val.strip().strip("'") if val.strip() != 'NULL' else None for val in parsed_row] # Ensure the row matches the column length (handles trailing commas/empty values) if len(cleaned_row) > len(table_columns[table_name]): cleaned_row = cleaned_row[:len(table_columns[table_name])] elif len(cleaned_row) < len(table_columns[table_name]): cleaned_row.extend([None] * (len(table_columns[table_name]) - len(cleaned_row))) rows.append(cleaned_row) if rows: df = pd.DataFrame(rows, columns=table_columns[table_name]) data_frames[table_name] = df # Save to Excel with each table on a different sheet with pd.ExcelWriter(excel_file_path, engine='openpyxl') as writer: for table_name, df in data_frames.items(): # Excel sheet names have a 31-character limit df.to_excel(writer, sheet_name=table_name[:31], index=False) print(f"Successfully exported {len(data_frames)} tables to {excel_file_path}") # Run the script parse_sql_to_excel('1088316_TeamworkPM_Backup_20260325.sql', 'Teamwork_Data.xlsx')