23 lines
658 B
Python
23 lines
658 B
Python
|
|
import pandas as pd
|
|
|
|
def deduplicate_leads():
|
|
filepath = 'leads/leads.csv'
|
|
df = pd.read_csv(filepath)
|
|
|
|
initial_count = len(df)
|
|
|
|
# Remove duplicates based on 'Firm/Innung' column, keeping the first occurrence
|
|
# (Assuming first occurrence is valid or same as others since they were duplicates)
|
|
df_dedup = df.drop_duplicates(subset=['Firm/Innung'], keep='first')
|
|
|
|
final_count = len(df_dedup)
|
|
|
|
print(f"Removed {initial_count - final_count} duplicates.")
|
|
|
|
df_dedup.to_csv(filepath, index=False)
|
|
print("Deduplication complete.")
|
|
|
|
if __name__ == "__main__":
|
|
deduplicate_leads()
|