import pypdf pdf_path = 'cologne_duesseldorf_data/duesseldorf_innungen.pdf' try: reader = pypdf.PdfReader(pdf_path) text = "" for page in reader.pages: text += page.extract_text() + "\n" with open('cologne_duesseldorf_data/duesseldorf_raw.txt', 'w', encoding='utf-8') as f: f.write(text) print(f"Dumped {len(text)} characters to duesseldorf_raw.txt") except Exception as e: print(f"Error: {e}")