#13 use webp instead of jpeg/png

This commit is contained in:
Andreas Knuth 2025-01-30 20:37:52 +01:00
parent e80ff12242
commit 297e2b9489
6 changed files with 561 additions and 129 deletions

2
.gitignore vendored
View File

@ -2,5 +2,7 @@
__pycache__ __pycache__
database.db database.db
debug_images debug_images
images
en_PP-OCRv3*
*.db *.db
*.lock *.lock

62
convert.py Normal file
View File

@ -0,0 +1,62 @@
import os
from PIL import Image
from pathlib import Path
def convert_debug_images():
debug_base = Path('debug_images')
images_base = Path('images')
if not debug_base.exists():
print(f"Fehler: {debug_base} existiert nicht")
return
# Alle Unterordner in debug_images durchlaufen
for root, dirs, files in os.walk(debug_base):
current_dir = Path(root)
relative_path = current_dir.relative_to(debug_base)
dest_dir = images_base / relative_path
# Prüfen ob original.png existiert
if 'original.png' not in files:
continue
# Zielverzeichnis erstellen
dest_dir.mkdir(parents=True, exist_ok=True)
# Pfade definieren
png_path = current_dir / 'original.png'
webp_path = dest_dir / 'original.webp'
thumb_path = dest_dir / 'thumbnail.webp'
try:
# Originalbild öffnen und konvertieren
with Image.open(png_path) as img:
# Konvertierung zu RGB falls notwendig
if img.mode in ('RGBA', 'LA'):
img = img.convert('RGB')
# Original als WebP speichern
img.save(
webp_path,
'WEBP',
quality=50,
method=6 # Qualitätsoptimierung
)
print(f"Konvertiert: {webp_path}")
# Thumbnail erstellen
img.thumbnail((256, 256), resample=Image.LANCZOS)
img.save(
thumb_path,
'WEBP',
quality=50,
method=6
)
print(f"Thumbnail erstellt: {thumb_path}")
except Exception as e:
print(f"Fehler bei {png_path}: {str(e)}")
if __name__ == '__main__':
convert_debug_images()
print("Konvertierung abgeschlossen")

181
ocr_comparison.py Normal file
View File

@ -0,0 +1,181 @@
import os
import sys
import pandas as pd
from paddleocr import PaddleOCR
from PIL import Image
from tqdm import tqdm
import logging
import argparse
# Konfiguriere das Logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(sys.stdout),
logging.FileHandler('ocr_comparison.log')
]
)
logger = logging.getLogger(__name__)
# Pfad zum debug_images-Verzeichnis
DEBUG_IMAGES_DIR = 'debug_images'
# Bilddateinamen, die verglichen werden sollen
IMAGE_FILES = {
'original': 'original.png',
'original_compressed': 'original_compressed.jpg',
'denoised': 'denoised.png'
}
# Initialisiere PaddleOCR
logger.info("Initialisiere PaddleOCR...")
ocr = PaddleOCR(
use_angle_cls=True,
lang='en',
det_db_thresh=0.3,
det_db_box_thresh=0.3,
det_db_unclip_ratio=2.0,
rec_char_type='en',
det_limit_side_len=960,
det_limit_type='max',
use_dilation=True,
det_db_score_mode='fast',
show_log=False # Setze auf False, um die PaddleOCR-Logs zu unterdrücken
)
def perform_ocr(image_path):
"""Führt OCR auf dem gegebenen Bildpfad durch und gibt die Ergebnisse zurück."""
try:
result = ocr.ocr(image_path, rec=True, cls=True)
if not result:
return {'num_texts': 0, 'avg_confidence': 0.0}
num_texts = 0
total_confidence = 0.0
for line in result:
for word in line:
text, confidence = word[1]
num_texts += 1
total_confidence += float(confidence)
avg_confidence = total_confidence / num_texts if num_texts > 0 else 0.0
return {'num_texts': num_texts, 'avg_confidence': avg_confidence}
except Exception as e:
logger.error(f"Fehler bei OCR für Bild {image_path}: {e}")
return {'num_texts': 0, 'avg_confidence': 0.0}
def compare_ocr_results(results):
"""
Vergleicht die OCR-Ergebnisse zwischen den verschiedenen Bildversionen.
Gibt an, welche Version tendenziell bessere Ergebnisse liefert.
"""
comparison = {}
versions = list(IMAGE_FILES.keys())
for version in versions:
comparison[version] = {
'num_texts': results[version]['num_texts'],
'avg_confidence': results[version]['avg_confidence']
}
# Entscheidung basierend auf den Metriken
# Kriterien können angepasst werden
# Hier priorisieren wir höhere avg_confidence und mehr num_texts
best_version = None
best_score = -1
for version in versions:
score = comparison[version]['avg_confidence'] + (comparison[version]['num_texts'] / 100) # Gewichtung anpassen
if score > best_score:
best_score = score
best_version = version
return best_version, comparison
def parse_arguments():
"""Parst Kommandozeilenargumente."""
parser = argparse.ArgumentParser(description='Vergleicht OCR-Ergebnisse verschiedener Bildversionen in debug_images-Ordnern.')
parser.add_argument(
'folders',
nargs='?',
default=None,
help='Durch Kommata getrennte Liste von Ordner-IDs (max. 10), z.B. 20250112_121938_2172d7b3,20250112_122055_ea9e2a72,20250130_182431_2498fcba'
)
return parser.parse_args()
def main():
args = parse_arguments()
if args.folders:
# Verarbeite die durch Kommata getrennte Liste von Ordner-IDs
folder_ids = [folder.strip() for folder in args.folders.split(',')]
if len(folder_ids) > 10:
logger.warning("Mehr als 10 Ordner-IDs angegeben. Es werden nur die ersten 10 verarbeitet.")
folder_ids = folder_ids[:10]
else:
# Automatisch die ersten 10 Ordner im debug_images-Verzeichnis auswählen
if not os.path.exists(DEBUG_IMAGES_DIR):
logger.error(f"Verzeichnis '{DEBUG_IMAGES_DIR}' existiert nicht.")
sys.exit(1)
# Sammle alle Unterverzeichnisse und wähle die ersten 10 aus
subdirs = [d for d in os.listdir(DEBUG_IMAGES_DIR) if os.path.isdir(os.path.join(DEBUG_IMAGES_DIR, d))]
folder_ids = subdirs[:10]
logger.info(f"Keine Ordner-IDs angegeben. Es werden die ersten {len(folder_ids)} Ordner verarbeitet.")
logger.info(f"Starte die OCR-Vergleichsanalyse für {len(folder_ids)} Ordner: {', '.join(folder_ids)}")
# Liste zum Speichern der Ergebnisse
results_list = []
for subdir in tqdm(folder_ids, desc="Verarbeitung der Ordner"):
subdir_path = os.path.join(DEBUG_IMAGES_DIR, subdir)
if not os.path.isdir(subdir_path):
logger.warning(f"Ordner '{subdir}' existiert nicht im '{DEBUG_IMAGES_DIR}' Verzeichnis.")
continue
ocr_results = {}
for version, filename in IMAGE_FILES.items():
image_path = os.path.join(subdir_path, filename)
if not os.path.isfile(image_path):
logger.warning(f"Bild '{filename}' fehlt im Ordner '{subdir}'.")
ocr_results[version] = {'num_texts': 0, 'avg_confidence': 0.0}
continue
ocr_result = perform_ocr(image_path)
ocr_results[version] = ocr_result
best_version, comparison = compare_ocr_results(ocr_results)
results_list.append({
'folder_id': subdir,
'best_version': best_version,
'original_num_texts': ocr_results['original']['num_texts'],
'original_avg_confidence': ocr_results['original']['avg_confidence'],
'original_compressed_num_texts': ocr_results['original_compressed']['num_texts'],
'original_compressed_avg_confidence': ocr_results['original_compressed']['avg_confidence'],
'denoised_num_texts': ocr_results['denoised']['num_texts'],
'denoised_avg_confidence': ocr_results['denoised']['avg_confidence']
})
if not results_list:
logger.warning("Keine Ergebnisse zum Speichern vorhanden.")
sys.exit(0)
# Erstelle einen DataFrame und speichere ihn als CSV
output_csv = 'ocr_comparison_results.csv'
df = pd.DataFrame(results_list)
df.to_csv(output_csv, index=False)
logger.info(f"OCR-Vergleichsanalyse abgeschlossen. Ergebnisse gespeichert in '{output_csv}'.")
# Optional: Statistiken anzeigen
total = len(df)
best_counts = df['best_version'].value_counts()
logger.info("Zusammenfassung der besten Versionen:")
for version, count in best_counts.items():
percentage = (count / total) * 100 if total > 0 else 0
logger.info(f"{version}: {count} von {total} ({percentage:.2f}%)")
if __name__ == "__main__":
main()

View File

@ -26,116 +26,84 @@ def get_dir_name():
def create_debug_directory(dir_name): def create_debug_directory(dir_name):
"""Erstellt ein eindeutiges Verzeichnis für Debug-Bilder""" """Erstellt ein eindeutiges Verzeichnis für Debug-Bilder"""
base_dir = 'debug_images' base_dir = 'images'
timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
unique_id = str(uuid.uuid4())[:8]
full_path = os.path.join(base_dir, dir_name) full_path = os.path.join(base_dir, dir_name)
# Erstelle Hauptverzeichnis falls nicht vorhanden
if not os.path.exists(base_dir): if not os.path.exists(base_dir):
os.makedirs(base_dir) os.makedirs(base_dir)
# Erstelle spezifisches Verzeichnis für diesen Durchlauf
os.makedirs(full_path) os.makedirs(full_path)
return full_path return full_path
def preprocess_image(image, debug_dir): def preprocess_image(image, debug_dir):
""" """Bildverarbeitung mit optionalen Optimierungen"""
Verarbeitet das Bild und speichert Zwischenergebnisse im angegebenen Verzeichnis,
einschließlich einer komprimierten JPG-Version und eines Thumbnails.
"""
try: try:
# Umwandlung in Graustufen # Graustufenkonvertierung
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# Anwendung von CLAHE zur Kontrastverbesserung
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) # Kontrastverbesserung mit CLAHE
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8)) # Erhöhter Clip-Limit
enhanced = clahe.apply(gray) enhanced = clahe.apply(gray)
# Rauschunterdrückung
denoised = cv2.fastNlMeansDenoising(enhanced)
# Optional: Binärschwellenwert (auskommentiert)
# _, binary = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# Speichern der Zwischenergebnisse im spezifischen Verzeichnis # Rauschunterdrückung mit optimierten Parametern
cv2.imwrite(os.path.join(debug_dir, 'gray.png'), gray) denoised = cv2.fastNlMeansDenoising(
cv2.imwrite(os.path.join(debug_dir, 'enhanced.png'), enhanced) enhanced,
cv2.imwrite(os.path.join(debug_dir, 'denoised.png'), denoised) h=15, # Stärkere Rauschreduzierung
# cv2.imwrite(os.path.join(debug_dir, 'binary.png'), binary) templateWindowSize=7,
searchWindowSize=21
)
# Speichern der komprimierten JPG-Version des Originalbildes # Debug-Bilder speichern
compressed_jpg_path = os.path.join(debug_dir, 'original_compressed.jpg') # cv2.imwrite(os.path.join(debug_dir, 'gray.png'), gray)
original_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # cv2.imwrite(os.path.join(debug_dir, 'enhanced.png'), enhanced)
cv2.imwrite(compressed_jpg_path, original_bgr, [int(cv2.IMWRITE_JPEG_QUALITY), 80]) # Qualität auf 80 setzen # cv2.imwrite(os.path.join(debug_dir, 'denoised.png'), denoised)
logger.info(f"Komprimiertes Original JPG gespeichert: {compressed_jpg_path}")
# Erstellen und Speichern des Thumbnails # Thumbnail als WebP
thumbnail_path = os.path.join(debug_dir, 'thumbnail.jpg') denoised_rgb = cv2.cvtColor(denoised, cv2.COLOR_GRAY2RGB)
image_pil = Image.fromarray(denoised) thumbnail = Image.fromarray(denoised_rgb)
image_pil.thumbnail((128, 128)) # Thumbnail-Größe auf 128x128 Pixel setzen thumbnail.thumbnail((256, 256))
image_pil.save(thumbnail_path, 'JPEG') thumbnail_path = os.path.join(debug_dir, 'thumbnail.webp')
logger.info(f"Thumbnail gespeichert: {thumbnail_path}") thumbnail.save(thumbnail_path, 'WEBP', quality=85)
logger.info(f"Debug images saved in: {debug_dir}")
return denoised return denoised
except Exception as e: except Exception as e:
logger.error(f"Preprocessing error: {str(e)}") logger.error(f"Preprocessing error: {str(e)}")
raise raise
@app.route('/api/ocr', methods=['POST']) @app.route('/api/ocr', methods=['POST'])
def ocr_endpoint(): def ocr_endpoint():
debug_dir = None
try: try:
# Erstelle eindeutiges Debug-Verzeichnis für diesen Request # Verzeichnis erstellen
dir_name = get_dir_name() dir_name = get_dir_name()
debug_dir = create_debug_directory(dir_name) debug_dir = create_debug_directory(dir_name)
logger.info(f"Created debug directory: {debug_dir}")
if not request.is_json:
return jsonify({'error': 'Content-Type must be application/json'}), 400
data = request.get_json()
if not data or 'image' not in data:
return jsonify({'error': 'No image provided'}), 400
image_b64 = data['image']
# Base64 Dekodierung
try:
image_data = base64.b64decode(image_b64)
except Exception as decode_err:
logger.error(f"Base64 decode error: {str(decode_err)}")
return jsonify({'error': 'Base64 decode error'}), 400
# Bildverarbeitung # Bildverarbeitung
try: data = request.get_json()
image = Image.open(BytesIO(image_data)).convert('RGB') image_data = base64.b64decode(data['image'])
image = np.array(image)
logger.info(f"Image loaded successfully. Shape: {image.shape}")
# Originalbild speichern # Originalbild als WebP speichern
cv2.imwrite(os.path.join(debug_dir, 'original.png'), original_image = Image.open(BytesIO(image_data)).convert('RGB')
cv2.cvtColor(image, cv2.COLOR_RGB2BGR)) webp_path = os.path.join(debug_dir, 'original.webp')
except Exception as img_err: original_image.save(webp_path, 'WEBP', quality=50)
logger.error(f"Image processing error: {str(img_err)}")
return jsonify({'error': 'Invalid image data'}), 400
# Bildvorverarbeitung # WebP-Bild für Verarbeitung laden
processed_image = preprocess_image(image, debug_dir) with open(webp_path, 'rb') as f:
logger.info("Preprocessing completed") webp_image = Image.open(BytesIO(f.read())).convert('RGB')
# PaddleOCR Konfiguration # Vorverarbeitung
processed_image = preprocess_image(np.array(webp_image), debug_dir)
# OCR mit optimierter Konfiguration
ocr = PaddleOCR( ocr = PaddleOCR(
use_angle_cls=True, use_angle_cls=True,
lang='en', lang='en',
det_db_thresh=0.3, det_model_dir='en_PP-OCRv3_det',
det_db_box_thresh=0.3, rec_model_dir='en_PP-OCRv3_rec',
det_db_unclip_ratio=2.0, det_limit_side_len=processed_image.shape[0] * 2,
rec_char_type='en',
det_limit_side_len=960,
det_limit_type='max',
use_dilation=True, use_dilation=True,
det_db_score_mode='fast', det_db_score_mode='fast'
show_log=True
) )
# OCR durchführen # OCR durchführen
@ -206,59 +174,12 @@ def ocr_endpoint():
}), 500 }), 500
except Exception as e: except Exception as e:
logger.error(f"Unexpected error: {str(e)}") logger.error(f"Fehler: {str(e)}")
logger.error(traceback.format_exc())
return jsonify({ return jsonify({
'error': 'Internal server error', 'error': 'Verarbeitungsfehler',
'debug_dir': debug_dir if 'debug_dir' in locals() else None 'details': str(e),
'debug_dir': dir_name if debug_dir else None
}), 500 }), 500
@app.route('/api/debug_image/<name>/<filename>', methods=['GET'])
def get_debug_image(name, filename):
"""
Gibt das angeforderte Bild unter 'debug_images/[name]/[filename]' direkt zurück.
"""
try:
# Sicherheitsmaßnahme: Nur erlaubte Zeichen im Verzeichnisnamen
if not all(c.isalnum() or c in ('_', '-') for c in name):
logger.warning(f"Ungültiger Verzeichnisname angefordert: {name}")
return jsonify({'error': 'Invalid directory name'}), 400
# Sicherheitsmaßnahme: Nur erlaubte Zeichen im Dateinamen
if not all(c.isalnum() or c in ('_', '-', '.',) for c in filename):
logger.warning(f"Ungültiger Dateiname angefordert: {filename}")
return jsonify({'error': 'Invalid file name'}), 400
# Vollständigen Pfad zum Bild erstellen
image_path = os.path.join('debug_images', name, filename)
# Überprüfen, ob die Datei existiert
if not os.path.isfile(image_path):
logger.warning(f"Bild nicht gefunden: {image_path}")
return jsonify({'error': 'Image not found'}), 404
# Bestimmen des MIME-Typs basierend auf der Dateiendung
mime_type = 'image/png' # Standard-MIME-Typ
if filename.lower().endswith('.jpg') or filename.lower().endswith('.jpeg'):
mime_type = 'image/jpeg'
elif filename.lower().endswith('.gif'):
mime_type = 'image/gif'
elif filename.lower().endswith('.bmp'):
mime_type = 'image/bmp'
elif filename.lower().endswith('.tiff') or filename.lower().endswith('.tif'):
mime_type = 'image/tiff'
return send_file(
image_path,
mimetype=mime_type,
as_attachment=False
)
except Exception as e:
logger.error(f"Fehler beim Abrufen des Bildes '{name}/{filename}': {str(e)}")
logger.error(traceback.format_exc())
return jsonify({'error': 'Failed to retrieve image'}), 500
if __name__ == '__main__': if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=False) app.run(host='0.0.0.0', port=5000, debug=False)

264
ocr_server_old.py Normal file
View File

@ -0,0 +1,264 @@
from flask import Flask, request, jsonify, send_file
from paddleocr import PaddleOCR
import base64
from PIL import Image
from io import BytesIO
import traceback
import numpy as np
import cv2
import logging
import os
import uuid
import datetime
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
app = Flask(__name__)
def get_dir_name():
timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
unique_id = str(uuid.uuid4())[:8]
return f"{timestamp}_{unique_id}"
def create_debug_directory(dir_name):
"""Erstellt ein eindeutiges Verzeichnis für Debug-Bilder"""
base_dir = 'debug_images'
timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
unique_id = str(uuid.uuid4())[:8]
full_path = os.path.join(base_dir, dir_name)
# Erstelle Hauptverzeichnis falls nicht vorhanden
if not os.path.exists(base_dir):
os.makedirs(base_dir)
# Erstelle spezifisches Verzeichnis für diesen Durchlauf
os.makedirs(full_path)
return full_path
def preprocess_image(image, debug_dir):
"""
Verarbeitet das Bild und speichert Zwischenergebnisse im angegebenen Verzeichnis,
einschließlich einer komprimierten JPG-Version und eines Thumbnails.
"""
try:
# Umwandlung in Graustufen
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# Anwendung von CLAHE zur Kontrastverbesserung
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
enhanced = clahe.apply(gray)
# Rauschunterdrückung
denoised = cv2.fastNlMeansDenoising(enhanced)
# Optional: Binärschwellenwert (auskommentiert)
# _, binary = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# Speichern der Zwischenergebnisse im spezifischen Verzeichnis
cv2.imwrite(os.path.join(debug_dir, 'gray.png'), gray)
cv2.imwrite(os.path.join(debug_dir, 'enhanced.png'), enhanced)
cv2.imwrite(os.path.join(debug_dir, 'denoised.png'), denoised)
# cv2.imwrite(os.path.join(debug_dir, 'binary.png'), binary)
# Speichern der komprimierten JPG-Version des Originalbildes
compressed_jpg_path = os.path.join(debug_dir, 'original_compressed.jpg')
original_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
cv2.imwrite(compressed_jpg_path, original_bgr, [int(cv2.IMWRITE_JPEG_QUALITY), 80]) # Qualität auf 80 setzen
logger.info(f"Komprimiertes Original JPG gespeichert: {compressed_jpg_path}")
# Erstellen und Speichern des Thumbnails
thumbnail_path = os.path.join(debug_dir, 'thumbnail.jpg')
image_pil = Image.fromarray(denoised)
image_pil.thumbnail((128, 128)) # Thumbnail-Größe auf 128x128 Pixel setzen
image_pil.save(thumbnail_path, 'JPEG')
logger.info(f"Thumbnail gespeichert: {thumbnail_path}")
logger.info(f"Debug images saved in: {debug_dir}")
return denoised
except Exception as e:
logger.error(f"Preprocessing error: {str(e)}")
raise
@app.route('/api/ocr', methods=['POST'])
def ocr_endpoint():
try:
# Erstelle eindeutiges Debug-Verzeichnis für diesen Request
dir_name = get_dir_name()
debug_dir = create_debug_directory(dir_name)
logger.info(f"Created debug directory: {debug_dir}")
if not request.is_json:
return jsonify({'error': 'Content-Type must be application/json'}), 400
data = request.get_json()
if not data or 'image' not in data:
return jsonify({'error': 'No image provided'}), 400
image_b64 = data['image']
# Base64 Dekodierung
try:
image_data = base64.b64decode(image_b64)
except Exception as decode_err:
logger.error(f"Base64 decode error: {str(decode_err)}")
return jsonify({'error': 'Base64 decode error'}), 400
# Bildverarbeitung
try:
image = Image.open(BytesIO(image_data)).convert('RGB')
image = np.array(image)
logger.info(f"Image loaded successfully. Shape: {image.shape}")
# Originalbild speichern
cv2.imwrite(os.path.join(debug_dir, 'original.png'),
cv2.cvtColor(image, cv2.COLOR_RGB2BGR))
except Exception as img_err:
logger.error(f"Image processing error: {str(img_err)}")
return jsonify({'error': 'Invalid image data'}), 400
# Bildvorverarbeitung
processed_image = preprocess_image(image, debug_dir)
logger.info("Preprocessing completed")
# PaddleOCR Konfiguration
ocr = PaddleOCR(
use_angle_cls=True,
lang='en',
det_db_thresh=0.3,
det_db_box_thresh=0.3,
det_db_unclip_ratio=2.0,
rec_char_type='en',
det_limit_side_len=960,
det_limit_type='max',
use_dilation=True,
det_db_score_mode='fast',
show_log=True
)
# OCR durchführen
try:
result = ocr.ocr(processed_image, rec=True, cls=True)
# Debug-Informationen in Datei speichern
with open(os.path.join(debug_dir, 'ocr_results.txt'), 'w') as f:
f.write(f"Raw OCR result:\n{result}\n\n")
if not result:
logger.warning("No results returned from OCR")
return jsonify({
'warning': 'No text detected',
'debug_dir': debug_dir
}), 200
if not result[0]:
logger.warning("Empty results list from OCR")
return jsonify({
'warning': 'Empty results list',
'debug_dir': debug_dir
}), 200
# Ergebnisse verarbeiten
extracted_results = []
for idx, item in enumerate(result[0]):
try:
box = item[0]
text = item[1][0] if item[1] else ''
confidence = float(item[1][1]) if item[1] and len(item[1]) > 1 else 0.0
extracted_results.append({
'box': box,
'text': text,
'confidence': confidence,
'name': dir_name
})
except Exception as proc_err:
logger.error(f"Error processing result {idx}: {str(proc_err)}")
# Statistiken in Debug-Datei speichern
with open(os.path.join(debug_dir, 'statistics.txt'), 'w') as f:
f.write(f"Total results: {len(extracted_results)}\n")
if extracted_results:
avg_confidence = np.mean([r['confidence'] for r in extracted_results])
f.write(f"Average confidence: {avg_confidence}\n")
f.write("\nDetailed results:\n")
for idx, result in enumerate(extracted_results):
f.write(f"Result {idx+1}:\n")
f.write(f"Text: {result['text']}\n")
f.write(f"Confidence: {result['confidence']}\n")
f.write(f"Name: {dir_name}\n")
f.write(f"Box coordinates: {result['box']}\n\n")
return jsonify({
'status': 'success',
'results': extracted_results,
})
except Exception as ocr_err:
logger.error(f"OCR processing error: {str(ocr_err)}")
logger.error(traceback.format_exc())
return jsonify({
'error': 'OCR processing failed',
'details': str(ocr_err),
'debug_dir': debug_dir
}), 500
except Exception as e:
logger.error(f"Unexpected error: {str(e)}")
logger.error(traceback.format_exc())
return jsonify({
'error': 'Internal server error',
'debug_dir': debug_dir if 'debug_dir' in locals() else None
}), 500
@app.route('/api/debug_image/<name>/<filename>', methods=['GET'])
def get_debug_image(name, filename):
"""
Gibt das angeforderte Bild unter 'debug_images/[name]/[filename]' direkt zurück.
"""
try:
# Sicherheitsmaßnahme: Nur erlaubte Zeichen im Verzeichnisnamen
if not all(c.isalnum() or c in ('_', '-') for c in name):
logger.warning(f"Ungültiger Verzeichnisname angefordert: {name}")
return jsonify({'error': 'Invalid directory name'}), 400
# Sicherheitsmaßnahme: Nur erlaubte Zeichen im Dateinamen
if not all(c.isalnum() or c in ('_', '-', '.',) for c in filename):
logger.warning(f"Ungültiger Dateiname angefordert: {filename}")
return jsonify({'error': 'Invalid file name'}), 400
# Vollständigen Pfad zum Bild erstellen
image_path = os.path.join('debug_images', name, filename)
# Überprüfen, ob die Datei existiert
if not os.path.isfile(image_path):
logger.warning(f"Bild nicht gefunden: {image_path}")
return jsonify({'error': 'Image not found'}), 404
# Bestimmen des MIME-Typs basierend auf der Dateiendung
mime_type = 'image/png' # Standard-MIME-Typ
if filename.lower().endswith('.jpg') or filename.lower().endswith('.jpeg'):
mime_type = 'image/jpeg'
elif filename.lower().endswith('.gif'):
mime_type = 'image/gif'
elif filename.lower().endswith('.bmp'):
mime_type = 'image/bmp'
elif filename.lower().endswith('.tiff') or filename.lower().endswith('.tif'):
mime_type = 'image/tiff'
return send_file(
image_path,
mimetype=mime_type,
as_attachment=False
)
except Exception as e:
logger.error(f"Fehler beim Abrufen des Bildes '{name}/{filename}': {str(e)}")
logger.error(traceback.format_exc())
return jsonify({'error': 'Failed to retrieve image'}), 500
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=False)

View File

@ -5,3 +5,5 @@ numpy>=1.24.4,<2.0.0
opencv-python==4.6.0.66 opencv-python==4.6.0.66
paddlepaddle==2.6.2 paddlepaddle==2.6.2
werkzeug<2.3 werkzeug<2.3
pandas>=1.3.0
tqdm>=4.64.0