stadtwerke/scripts/merge_leads.py

40 lines
1.3 KiB
Python

import pandas as pd
import os
def merge_and_sort_leads():
main_csv = 'leads/leads.csv'
new_unterfranken_csv = 'leads/raw/leads_unterfranken_v2.csv'
# Read existing leads
if os.path.exists(main_csv):
df_main = pd.read_csv(main_csv)
else:
df_main = pd.DataFrame(columns=["Firm/Innung", "Contact Person", "Email", "Region"])
# Read new Unterfranken leads
df_uf = pd.read_csv(new_unterfranken_csv)
# Remove old Unterfranken entries from main df
# We assume 'Region' column exists and is populated correctly
df_others = df_main[df_main['Region'] != 'Unterfranken']
# Combine
df_final = pd.concat([df_others, df_uf], ignore_index=True)
# Clean whitespace in columns (just in case)
for col in df_final.columns:
if df_final[col].dtype == 'object':
df_final[col] = df_final[col].str.strip()
# Sort by Firm/Innung
df_final = df_final.sort_values(by='Firm/Innung', key=lambda col: col.str.lower())
# Save
df_final.to_csv(main_csv, index=False)
print(f"Merged and sorted. Total rows: {len(df_final)}")
print(f"Unterfranken rows: {len(df_final[df_final['Region'] == 'Unterfranken'])}")
if __name__ == "__main__":
merge_and_sort_leads()