stadtwerke/scripts/organize_project.py

86 lines
2.9 KiB
Python

import os
import shutil
import csv
import glob
def normalize(text):
return text.strip() if text else ""
def organize():
# 1. Create Directories
for d in ['leads', 'leads/raw', 'scripts']:
if not os.path.exists(d):
os.makedirs(d)
print(f"Created directory: {d}")
# 2. Consolidate and Deduplicate Leads
all_leads = []
seen = set()
# Files to load leads from (priority order)
lead_files = ['final_leads.csv', 'leads.csv']
for fname in lead_files:
if os.path.exists(fname):
with open(fname, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
# Key for dedupe: Email is strongest, then Name
email = normalize(row.get('Email', ''))
name = normalize(row.get('Firm/Innung') or row.get('Innung', ''))
if not email and not name:
continue
key = email if email else name
if key not in seen:
# Normalize headers
clean_row = {
'Firm/Innung': name,
'Contact Person': normalize(row.get('Contact Person', '')),
'Email': email,
'Region': normalize(row.get('Region') or row.get('Source', ''))
}
all_leads.append(clean_row)
seen.add(key)
# Write optimized master file
if all_leads:
out_path = 'leads/all_leads.csv'
with open(out_path, 'w', newline='', encoding='utf-8') as f:
fieldnames = ['Firm/Innung', 'Contact Person', 'Email', 'Region']
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(all_leads)
print(f"Successfully created {out_path} with {len(all_leads)} unique leads.")
# 3. Move Files
# Move Python scripts
for py_file in glob.glob("*.py"):
if py_file == "organize_project.py": continue
shutil.move(py_file, os.path.join("scripts", py_file))
print(f"Moved {py_file} to scripts/")
# Move raw CSVs and PDFs
raw_files = [
'leads_unterfranken.csv',
'innungen_leads_koeln_duesseldorf.csv',
'unterfranken.pdf',
'leads.csv',
'final_leads.csv'
]
for rf in raw_files:
if os.path.exists(rf):
dst = os.path.join("leads/raw", rf)
# Handle collision
if os.path.exists(dst):
os.remove(dst)
shutil.move(rf, dst)
print(f"Moved {rf} to leads/raw/")
if __name__ == "__main__":
organize()