16 lines
457 B
Python
16 lines
457 B
Python
import pypdf
|
|
|
|
pdf_path = 'cologne_duesseldorf_data/duesseldorf_innungen.pdf'
|
|
|
|
try:
|
|
reader = pypdf.PdfReader(pdf_path)
|
|
text = ""
|
|
for page in reader.pages:
|
|
text += page.extract_text() + "\n"
|
|
|
|
with open('cologne_duesseldorf_data/duesseldorf_raw.txt', 'w', encoding='utf-8') as f:
|
|
f.write(text)
|
|
print(f"Dumped {len(text)} characters to duesseldorf_raw.txt")
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|