import pandas as pd import os def merge_and_sort_leads(): main_csv = 'leads/leads.csv' new_unterfranken_csv = 'leads/raw/leads_unterfranken_v2.csv' # Read existing leads if os.path.exists(main_csv): df_main = pd.read_csv(main_csv) else: df_main = pd.DataFrame(columns=["Firm/Innung", "Contact Person", "Email", "Region"]) # Read new Unterfranken leads df_uf = pd.read_csv(new_unterfranken_csv) # Remove old Unterfranken entries from main df # We assume 'Region' column exists and is populated correctly df_others = df_main[df_main['Region'] != 'Unterfranken'] # Combine df_final = pd.concat([df_others, df_uf], ignore_index=True) # Clean whitespace in columns (just in case) for col in df_final.columns: if df_final[col].dtype == 'object': df_final[col] = df_final[col].str.strip() # Sort by Firm/Innung df_final = df_final.sort_values(by='Firm/Innung', key=lambda col: col.str.lower()) # Save df_final.to_csv(main_csv, index=False) print(f"Merged and sorted. Total rows: {len(df_final)}") print(f"Unterfranken rows: {len(df_final[df_final['Region'] == 'Unterfranken'])}") if __name__ == "__main__": merge_and_sort_leads()