86 lines
2.9 KiB
Python
86 lines
2.9 KiB
Python
import os
|
|
import shutil
|
|
import csv
|
|
import glob
|
|
|
|
def normalize(text):
|
|
return text.strip() if text else ""
|
|
|
|
def organize():
|
|
# 1. Create Directories
|
|
for d in ['leads', 'leads/raw', 'scripts']:
|
|
if not os.path.exists(d):
|
|
os.makedirs(d)
|
|
print(f"Created directory: {d}")
|
|
|
|
# 2. Consolidate and Deduplicate Leads
|
|
all_leads = []
|
|
seen = set()
|
|
|
|
# Files to load leads from (priority order)
|
|
lead_files = ['final_leads.csv', 'leads.csv']
|
|
|
|
for fname in lead_files:
|
|
if os.path.exists(fname):
|
|
with open(fname, 'r', encoding='utf-8') as f:
|
|
reader = csv.DictReader(f)
|
|
for row in reader:
|
|
# Key for dedupe: Email is strongest, then Name
|
|
email = normalize(row.get('Email', ''))
|
|
name = normalize(row.get('Firm/Innung') or row.get('Innung', ''))
|
|
|
|
if not email and not name:
|
|
continue
|
|
|
|
key = email if email else name
|
|
|
|
if key not in seen:
|
|
# Normalize headers
|
|
clean_row = {
|
|
'Firm/Innung': name,
|
|
'Contact Person': normalize(row.get('Contact Person', '')),
|
|
'Email': email,
|
|
'Region': normalize(row.get('Region') or row.get('Source', ''))
|
|
}
|
|
|
|
all_leads.append(clean_row)
|
|
seen.add(key)
|
|
|
|
# Write optimized master file
|
|
if all_leads:
|
|
out_path = 'leads/all_leads.csv'
|
|
with open(out_path, 'w', newline='', encoding='utf-8') as f:
|
|
fieldnames = ['Firm/Innung', 'Contact Person', 'Email', 'Region']
|
|
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
writer.writerows(all_leads)
|
|
print(f"Successfully created {out_path} with {len(all_leads)} unique leads.")
|
|
|
|
# 3. Move Files
|
|
# Move Python scripts
|
|
for py_file in glob.glob("*.py"):
|
|
if py_file == "organize_project.py": continue
|
|
shutil.move(py_file, os.path.join("scripts", py_file))
|
|
print(f"Moved {py_file} to scripts/")
|
|
|
|
# Move raw CSVs and PDFs
|
|
raw_files = [
|
|
'leads_unterfranken.csv',
|
|
'innungen_leads_koeln_duesseldorf.csv',
|
|
'unterfranken.pdf',
|
|
'leads.csv',
|
|
'final_leads.csv'
|
|
]
|
|
|
|
for rf in raw_files:
|
|
if os.path.exists(rf):
|
|
dst = os.path.join("leads/raw", rf)
|
|
# Handle collision
|
|
if os.path.exists(dst):
|
|
os.remove(dst)
|
|
shutil.move(rf, dst)
|
|
print(f"Moved {rf} to leads/raw/")
|
|
|
|
if __name__ == "__main__":
|
|
organize()
|