stadtwerke/scripts/organize_project.py

import os
import shutil
import csv
import glob

def normalize(text):
    return text.strip() if text else ""

def organize():
    # 1. Create Directories
    for d in ['leads', 'leads/raw', 'scripts']:
        if not os.path.exists(d):
            os.makedirs(d)
            print(f"Created directory: {d}")

    # 2. Consolidate and Deduplicate Leads
    all_leads = []
    seen = set()

    # Files to load leads from (priority order)
    lead_files = ['final_leads.csv', 'leads.csv']

    for fname in lead_files:
        if os.path.exists(fname):
            with open(fname, 'r', encoding='utf-8') as f:
                reader = csv.DictReader(f)
                for row in reader:
                    # Key for dedupe: Email is strongest, then Name
                    email = normalize(row.get('Email', ''))
                    name = normalize(row.get('Firm/Innung') or row.get('Innung', ''))

                    if not email and not name:
                        continue

                    key = email if email else name

                    if key not in seen:
                        # Normalize headers
                        clean_row = {
                            'Firm/Innung': name,
                            'Contact Person': normalize(row.get('Contact Person', '')),
                            'Email': email,
                            'Region': normalize(row.get('Region') or row.get('Source', ''))
                        }

                        all_leads.append(clean_row)
                        seen.add(key)

    # Write optimized master file
    if all_leads:
        out_path = 'leads/all_leads.csv'
        with open(out_path, 'w', newline='', encoding='utf-8') as f:
            fieldnames = ['Firm/Innung', 'Contact Person', 'Email', 'Region']
            writer = csv.DictWriter(f, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(all_leads)
        print(f"Successfully created {out_path} with {len(all_leads)} unique leads.")

    # 3. Move Files
    # Move Python scripts
    for py_file in glob.glob("*.py"):
        if py_file == "organize_project.py": continue
        shutil.move(py_file, os.path.join("scripts", py_file))
        print(f"Moved {py_file} to scripts/")

    # Move raw CSVs and PDFs
    raw_files = [
        'leads_unterfranken.csv',
        'innungen_leads_koeln_duesseldorf.csv',
        'unterfranken.pdf',
        'leads.csv',
        'final_leads.csv'
    ]

    for rf in raw_files:
        if os.path.exists(rf):
            dst = os.path.join("leads/raw", rf)
            # Handle collision
            if os.path.exists(dst):
                os.remove(dst)
            shutil.move(rf, dst)
            print(f"Moved {rf} to leads/raw/")

if __name__ == "__main__":
    organize()