import csv import json import os def normalize_name(name): return name.strip() def finalize_leads(): existing_leads = [] seen_names = set() if os.path.exists('leads.csv'): with open('leads.csv', 'r', encoding='utf-8') as f: reader = csv.DictReader(f) for row in reader: existing_leads.append(row) seen_names.add(normalize_name(row['Firm/Innung'])) print(f"Loaded {len(existing_leads)} existing leads.") new_leads = [] # Load person mapping innung_to_person = {} with open('cologne_duesseldorf_data/batch6_targets.json', 'r', encoding='utf-8') as f: targets = json.load(f) for t in targets: innung_to_person[normalize_name(t['innung'])] = t.get('person', 'N/A') # Batch 6 Part 1 with open('cologne_duesseldorf_data/batch6_results_part1.json', 'r', encoding='utf-8') as f: part1 = json.load(f) for item in part1: name = normalize_name(item['innung']) if name not in seen_names: person = innung_to_person.get(name, 'N/A') new_leads.append({ "Firm/Innung": name, "Contact Person": person, "Email": item['email'], "Region": "Düsseldorf/Surrounding" }) seen_names.add(name) # Batch 6 Part 2 with open('cologne_duesseldorf_data/batch6_results_part2.json', 'r', encoding='utf-8') as f: part2 = json.load(f) for item in part2: name = normalize_name(item['innung']) if name not in seen_names: person = innung_to_person.get(name, 'N/A') new_leads.append({ "Firm/Innung": name, "Contact Person": person, "Email": item['email'], "Region": "Düsseldorf/Surrounding" }) seen_names.add(name) print(f"Added {len(new_leads)} new leads.") all_leads = existing_leads + new_leads print(f"Total leads: {len(all_leads)}") fieldnames = ['Firm/Innung', 'Contact Person', 'Email', 'Region'] with open('final_leads.csv', 'w', newline='', encoding='utf-8') as f: writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() writer.writerows(all_leads) with open('leads.csv', 'w', newline='', encoding='utf-8') as f: writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() writer.writerows(all_leads) if __name__ == "__main__": finalize_leads()