import os import shutil import csv import glob def normalize(text): return text.strip() if text else "" def organize(): # 1. Create Directories for d in ['leads', 'leads/raw', 'scripts']: if not os.path.exists(d): os.makedirs(d) print(f"Created directory: {d}") # 2. Consolidate and Deduplicate Leads all_leads = [] seen = set() # Files to load leads from (priority order) lead_files = ['final_leads.csv', 'leads.csv'] for fname in lead_files: if os.path.exists(fname): with open(fname, 'r', encoding='utf-8') as f: reader = csv.DictReader(f) for row in reader: # Key for dedupe: Email is strongest, then Name email = normalize(row.get('Email', '')) name = normalize(row.get('Firm/Innung') or row.get('Innung', '')) if not email and not name: continue key = email if email else name if key not in seen: # Normalize headers clean_row = { 'Firm/Innung': name, 'Contact Person': normalize(row.get('Contact Person', '')), 'Email': email, 'Region': normalize(row.get('Region') or row.get('Source', '')) } all_leads.append(clean_row) seen.add(key) # Write optimized master file if all_leads: out_path = 'leads/all_leads.csv' with open(out_path, 'w', newline='', encoding='utf-8') as f: fieldnames = ['Firm/Innung', 'Contact Person', 'Email', 'Region'] writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() writer.writerows(all_leads) print(f"Successfully created {out_path} with {len(all_leads)} unique leads.") # 3. Move Files # Move Python scripts for py_file in glob.glob("*.py"): if py_file == "organize_project.py": continue shutil.move(py_file, os.path.join("scripts", py_file)) print(f"Moved {py_file} to scripts/") # Move raw CSVs and PDFs raw_files = [ 'leads_unterfranken.csv', 'innungen_leads_koeln_duesseldorf.csv', 'unterfranken.pdf', 'leads.csv', 'final_leads.csv' ] for rf in raw_files: if os.path.exists(rf): dst = os.path.join("leads/raw", rf) # Handle collision if os.path.exists(dst): os.remove(dst) shutil.move(rf, dst) print(f"Moved {rf} to leads/raw/") if __name__ == "__main__": organize()