import json import csv import re # Parse the SERP output file (it's JSON content inside a text file, usually) # The previous view_file showed it's valid JSON. input_file = r'C:\Users\a931627\.gemini\antigravity\brain\6060ab5d-4406-4d40-803f-c8d1df8bb430\.system_generated\steps\141\output.txt' output_csv = 'cologne_duesseldorf_data/cologne_leads.csv' def parse_serp(): with open(input_file, 'r', encoding='utf-8') as f: data = json.load(f) leads = [] items = data.get('items', []) email_regex = re.compile(r'[\w\.-]+@[\w\.-]+\.\w+') for item in items: if item.get('type') == 'organic': desc = item.get('description', '') title = item.get('title', '') snippet = item.get('pre_snippet', '') full_text = f"{title} {desc} {snippet}" emails = email_regex.findall(full_text) for email in emails: email = email.rstrip('.') # Use title or domain as Innung name innung_name = item.get('website_name') or item.get('domain') or title if any(l['Email'] == email for l in leads): continue leads.append({ 'Firm/Innung': innung_name, 'Contact': "N/A", 'Email': email, 'Phone': "N/A", 'Region': 'Köln' }) with open(output_csv, 'w', newline='', encoding='utf-8') as f: writer = csv.DictWriter(f, fieldnames=['Firm/Innung', 'Contact', 'Email', 'Phone', 'Region']) writer.writeheader() writer.writerows(leads) print(f"Extracted {len(leads)} leads from Cologne SERP.") for l in leads: print(f"{l['Firm/Innung']}: {l['Email']}") if __name__ == "__main__": parse_serp()