stadtwerke/scripts/finalize_leads.py

79 lines
2.7 KiB
Python

import csv
import json
import os
def normalize_name(name):
return name.strip()
def finalize_leads():
existing_leads = []
seen_names = set()
if os.path.exists('leads.csv'):
with open('leads.csv', 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
existing_leads.append(row)
seen_names.add(normalize_name(row['Firm/Innung']))
print(f"Loaded {len(existing_leads)} existing leads.")
new_leads = []
# Load person mapping
innung_to_person = {}
with open('cologne_duesseldorf_data/batch6_targets.json', 'r', encoding='utf-8') as f:
targets = json.load(f)
for t in targets:
innung_to_person[normalize_name(t['innung'])] = t.get('person', 'N/A')
# Batch 6 Part 1
with open('cologne_duesseldorf_data/batch6_results_part1.json', 'r', encoding='utf-8') as f:
part1 = json.load(f)
for item in part1:
name = normalize_name(item['innung'])
if name not in seen_names:
person = innung_to_person.get(name, 'N/A')
new_leads.append({
"Firm/Innung": name,
"Contact Person": person,
"Email": item['email'],
"Region": "Düsseldorf/Surrounding"
})
seen_names.add(name)
# Batch 6 Part 2
with open('cologne_duesseldorf_data/batch6_results_part2.json', 'r', encoding='utf-8') as f:
part2 = json.load(f)
for item in part2:
name = normalize_name(item['innung'])
if name not in seen_names:
person = innung_to_person.get(name, 'N/A')
new_leads.append({
"Firm/Innung": name,
"Contact Person": person,
"Email": item['email'],
"Region": "Düsseldorf/Surrounding"
})
seen_names.add(name)
print(f"Added {len(new_leads)} new leads.")
all_leads = existing_leads + new_leads
print(f"Total leads: {len(all_leads)}")
fieldnames = ['Firm/Innung', 'Contact Person', 'Email', 'Region']
with open('final_leads.csv', 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(all_leads)
with open('leads.csv', 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(all_leads)
if __name__ == "__main__":
finalize_leads()