From 297e2b9489d48fc600f3d6c38a6bf6a1c139918a Mon Sep 17 00:00:00 2001 From: Andreas Knuth Date: Thu, 30 Jan 2025 20:37:52 +0100 Subject: [PATCH] #13 use webp instead of jpeg/png --- .gitignore | 2 + convert.py | 62 +++++++++++ ocr_comparison.py | 181 +++++++++++++++++++++++++++++++ ocr_server.py | 177 +++++++++---------------------- ocr_server_old.py | 264 ++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 4 +- 6 files changed, 561 insertions(+), 129 deletions(-) create mode 100644 convert.py create mode 100644 ocr_comparison.py create mode 100644 ocr_server_old.py diff --git a/.gitignore b/.gitignore index 0f7769b..b1b6fb0 100644 --- a/.gitignore +++ b/.gitignore @@ -2,5 +2,7 @@ __pycache__ database.db debug_images +images +en_PP-OCRv3* *.db *.lock \ No newline at end of file diff --git a/convert.py b/convert.py new file mode 100644 index 0000000..4dd7b39 --- /dev/null +++ b/convert.py @@ -0,0 +1,62 @@ +import os +from PIL import Image +from pathlib import Path + +def convert_debug_images(): + debug_base = Path('debug_images') + images_base = Path('images') + + if not debug_base.exists(): + print(f"Fehler: {debug_base} existiert nicht") + return + + # Alle Unterordner in debug_images durchlaufen + for root, dirs, files in os.walk(debug_base): + current_dir = Path(root) + relative_path = current_dir.relative_to(debug_base) + dest_dir = images_base / relative_path + + # Prüfen ob original.png existiert + if 'original.png' not in files: + continue + + # Zielverzeichnis erstellen + dest_dir.mkdir(parents=True, exist_ok=True) + + # Pfade definieren + png_path = current_dir / 'original.png' + webp_path = dest_dir / 'original.webp' + thumb_path = dest_dir / 'thumbnail.webp' + + try: + # Originalbild öffnen und konvertieren + with Image.open(png_path) as img: + # Konvertierung zu RGB falls notwendig + if img.mode in ('RGBA', 'LA'): + img = img.convert('RGB') + + # Original als WebP speichern + img.save( + webp_path, + 'WEBP', + quality=50, + method=6 # Qualitätsoptimierung + ) + print(f"Konvertiert: {webp_path}") + + # Thumbnail erstellen + img.thumbnail((256, 256), resample=Image.LANCZOS) + img.save( + thumb_path, + 'WEBP', + quality=50, + method=6 + ) + print(f"Thumbnail erstellt: {thumb_path}") + + except Exception as e: + print(f"Fehler bei {png_path}: {str(e)}") + +if __name__ == '__main__': + convert_debug_images() + print("Konvertierung abgeschlossen") \ No newline at end of file diff --git a/ocr_comparison.py b/ocr_comparison.py new file mode 100644 index 0000000..e9a4cc1 --- /dev/null +++ b/ocr_comparison.py @@ -0,0 +1,181 @@ +import os +import sys +import pandas as pd +from paddleocr import PaddleOCR +from PIL import Image +from tqdm import tqdm +import logging +import argparse + +# Konfiguriere das Logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s', + handlers=[ + logging.StreamHandler(sys.stdout), + logging.FileHandler('ocr_comparison.log') + ] +) +logger = logging.getLogger(__name__) + +# Pfad zum debug_images-Verzeichnis +DEBUG_IMAGES_DIR = 'debug_images' + +# Bilddateinamen, die verglichen werden sollen +IMAGE_FILES = { + 'original': 'original.png', + 'original_compressed': 'original_compressed.jpg', + 'denoised': 'denoised.png' +} + +# Initialisiere PaddleOCR +logger.info("Initialisiere PaddleOCR...") +ocr = PaddleOCR( + use_angle_cls=True, + lang='en', + det_db_thresh=0.3, + det_db_box_thresh=0.3, + det_db_unclip_ratio=2.0, + rec_char_type='en', + det_limit_side_len=960, + det_limit_type='max', + use_dilation=True, + det_db_score_mode='fast', + show_log=False # Setze auf False, um die PaddleOCR-Logs zu unterdrücken +) + +def perform_ocr(image_path): + """Führt OCR auf dem gegebenen Bildpfad durch und gibt die Ergebnisse zurück.""" + try: + result = ocr.ocr(image_path, rec=True, cls=True) + if not result: + return {'num_texts': 0, 'avg_confidence': 0.0} + + num_texts = 0 + total_confidence = 0.0 + + for line in result: + for word in line: + text, confidence = word[1] + num_texts += 1 + total_confidence += float(confidence) + + avg_confidence = total_confidence / num_texts if num_texts > 0 else 0.0 + return {'num_texts': num_texts, 'avg_confidence': avg_confidence} + except Exception as e: + logger.error(f"Fehler bei OCR für Bild {image_path}: {e}") + return {'num_texts': 0, 'avg_confidence': 0.0} + +def compare_ocr_results(results): + """ + Vergleicht die OCR-Ergebnisse zwischen den verschiedenen Bildversionen. + Gibt an, welche Version tendenziell bessere Ergebnisse liefert. + """ + comparison = {} + versions = list(IMAGE_FILES.keys()) + + for version in versions: + comparison[version] = { + 'num_texts': results[version]['num_texts'], + 'avg_confidence': results[version]['avg_confidence'] + } + + # Entscheidung basierend auf den Metriken + # Kriterien können angepasst werden + # Hier priorisieren wir höhere avg_confidence und mehr num_texts + best_version = None + best_score = -1 + + for version in versions: + score = comparison[version]['avg_confidence'] + (comparison[version]['num_texts'] / 100) # Gewichtung anpassen + if score > best_score: + best_score = score + best_version = version + + return best_version, comparison + +def parse_arguments(): + """Parst Kommandozeilenargumente.""" + parser = argparse.ArgumentParser(description='Vergleicht OCR-Ergebnisse verschiedener Bildversionen in debug_images-Ordnern.') + parser.add_argument( + 'folders', + nargs='?', + default=None, + help='Durch Kommata getrennte Liste von Ordner-IDs (max. 10), z.B. 20250112_121938_2172d7b3,20250112_122055_ea9e2a72,20250130_182431_2498fcba' + ) + return parser.parse_args() + +def main(): + args = parse_arguments() + + if args.folders: + # Verarbeite die durch Kommata getrennte Liste von Ordner-IDs + folder_ids = [folder.strip() for folder in args.folders.split(',')] + if len(folder_ids) > 10: + logger.warning("Mehr als 10 Ordner-IDs angegeben. Es werden nur die ersten 10 verarbeitet.") + folder_ids = folder_ids[:10] + else: + # Automatisch die ersten 10 Ordner im debug_images-Verzeichnis auswählen + if not os.path.exists(DEBUG_IMAGES_DIR): + logger.error(f"Verzeichnis '{DEBUG_IMAGES_DIR}' existiert nicht.") + sys.exit(1) + + # Sammle alle Unterverzeichnisse und wähle die ersten 10 aus + subdirs = [d for d in os.listdir(DEBUG_IMAGES_DIR) if os.path.isdir(os.path.join(DEBUG_IMAGES_DIR, d))] + folder_ids = subdirs[:10] + logger.info(f"Keine Ordner-IDs angegeben. Es werden die ersten {len(folder_ids)} Ordner verarbeitet.") + + logger.info(f"Starte die OCR-Vergleichsanalyse für {len(folder_ids)} Ordner: {', '.join(folder_ids)}") + + # Liste zum Speichern der Ergebnisse + results_list = [] + + for subdir in tqdm(folder_ids, desc="Verarbeitung der Ordner"): + subdir_path = os.path.join(DEBUG_IMAGES_DIR, subdir) + if not os.path.isdir(subdir_path): + logger.warning(f"Ordner '{subdir}' existiert nicht im '{DEBUG_IMAGES_DIR}' Verzeichnis.") + continue + + ocr_results = {} + for version, filename in IMAGE_FILES.items(): + image_path = os.path.join(subdir_path, filename) + if not os.path.isfile(image_path): + logger.warning(f"Bild '{filename}' fehlt im Ordner '{subdir}'.") + ocr_results[version] = {'num_texts': 0, 'avg_confidence': 0.0} + continue + ocr_result = perform_ocr(image_path) + ocr_results[version] = ocr_result + + best_version, comparison = compare_ocr_results(ocr_results) + + results_list.append({ + 'folder_id': subdir, + 'best_version': best_version, + 'original_num_texts': ocr_results['original']['num_texts'], + 'original_avg_confidence': ocr_results['original']['avg_confidence'], + 'original_compressed_num_texts': ocr_results['original_compressed']['num_texts'], + 'original_compressed_avg_confidence': ocr_results['original_compressed']['avg_confidence'], + 'denoised_num_texts': ocr_results['denoised']['num_texts'], + 'denoised_avg_confidence': ocr_results['denoised']['avg_confidence'] + }) + + if not results_list: + logger.warning("Keine Ergebnisse zum Speichern vorhanden.") + sys.exit(0) + + # Erstelle einen DataFrame und speichere ihn als CSV + output_csv = 'ocr_comparison_results.csv' + df = pd.DataFrame(results_list) + df.to_csv(output_csv, index=False) + logger.info(f"OCR-Vergleichsanalyse abgeschlossen. Ergebnisse gespeichert in '{output_csv}'.") + + # Optional: Statistiken anzeigen + total = len(df) + best_counts = df['best_version'].value_counts() + logger.info("Zusammenfassung der besten Versionen:") + for version, count in best_counts.items(): + percentage = (count / total) * 100 if total > 0 else 0 + logger.info(f"{version}: {count} von {total} ({percentage:.2f}%)") + +if __name__ == "__main__": + main() diff --git a/ocr_server.py b/ocr_server.py index 2a57eb3..1967b06 100644 --- a/ocr_server.py +++ b/ocr_server.py @@ -26,116 +26,84 @@ def get_dir_name(): def create_debug_directory(dir_name): """Erstellt ein eindeutiges Verzeichnis für Debug-Bilder""" - base_dir = 'debug_images' - timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') - unique_id = str(uuid.uuid4())[:8] + base_dir = 'images' full_path = os.path.join(base_dir, dir_name) - # Erstelle Hauptverzeichnis falls nicht vorhanden if not os.path.exists(base_dir): os.makedirs(base_dir) - # Erstelle spezifisches Verzeichnis für diesen Durchlauf os.makedirs(full_path) - return full_path def preprocess_image(image, debug_dir): - """ - Verarbeitet das Bild und speichert Zwischenergebnisse im angegebenen Verzeichnis, - einschließlich einer komprimierten JPG-Version und eines Thumbnails. - """ + """Bildverarbeitung mit optionalen Optimierungen""" try: - # Umwandlung in Graustufen + # Graustufenkonvertierung gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) - # Anwendung von CLAHE zur Kontrastverbesserung - clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) + + # Kontrastverbesserung mit CLAHE + clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8)) # Erhöhter Clip-Limit enhanced = clahe.apply(gray) - # Rauschunterdrückung - denoised = cv2.fastNlMeansDenoising(enhanced) - # Optional: Binärschwellenwert (auskommentiert) - # _, binary = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) + + # Rauschunterdrückung mit optimierten Parametern + denoised = cv2.fastNlMeansDenoising( + enhanced, + h=15, # Stärkere Rauschreduzierung + templateWindowSize=7, + searchWindowSize=21 + ) - # Speichern der Zwischenergebnisse im spezifischen Verzeichnis - cv2.imwrite(os.path.join(debug_dir, 'gray.png'), gray) - cv2.imwrite(os.path.join(debug_dir, 'enhanced.png'), enhanced) - cv2.imwrite(os.path.join(debug_dir, 'denoised.png'), denoised) - # cv2.imwrite(os.path.join(debug_dir, 'binary.png'), binary) + # Debug-Bilder speichern + # cv2.imwrite(os.path.join(debug_dir, 'gray.png'), gray) + # cv2.imwrite(os.path.join(debug_dir, 'enhanced.png'), enhanced) + # cv2.imwrite(os.path.join(debug_dir, 'denoised.png'), denoised) - # Speichern der komprimierten JPG-Version des Originalbildes - compressed_jpg_path = os.path.join(debug_dir, 'original_compressed.jpg') - original_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) - cv2.imwrite(compressed_jpg_path, original_bgr, [int(cv2.IMWRITE_JPEG_QUALITY), 80]) # Qualität auf 80 setzen - logger.info(f"Komprimiertes Original JPG gespeichert: {compressed_jpg_path}") + # Thumbnail als WebP + denoised_rgb = cv2.cvtColor(denoised, cv2.COLOR_GRAY2RGB) + thumbnail = Image.fromarray(denoised_rgb) + thumbnail.thumbnail((256, 256)) + thumbnail_path = os.path.join(debug_dir, 'thumbnail.webp') + thumbnail.save(thumbnail_path, 'WEBP', quality=85) - # Erstellen und Speichern des Thumbnails - thumbnail_path = os.path.join(debug_dir, 'thumbnail.jpg') - image_pil = Image.fromarray(denoised) - image_pil.thumbnail((128, 128)) # Thumbnail-Größe auf 128x128 Pixel setzen - image_pil.save(thumbnail_path, 'JPEG') - logger.info(f"Thumbnail gespeichert: {thumbnail_path}") - - logger.info(f"Debug images saved in: {debug_dir}") return denoised + except Exception as e: logger.error(f"Preprocessing error: {str(e)}") raise - @app.route('/api/ocr', methods=['POST']) def ocr_endpoint(): + debug_dir = None try: - # Erstelle eindeutiges Debug-Verzeichnis für diesen Request + # Verzeichnis erstellen dir_name = get_dir_name() debug_dir = create_debug_directory(dir_name) - logger.info(f"Created debug directory: {debug_dir}") - - if not request.is_json: - return jsonify({'error': 'Content-Type must be application/json'}), 400 - data = request.get_json() - if not data or 'image' not in data: - return jsonify({'error': 'No image provided'}), 400 - - image_b64 = data['image'] - - # Base64 Dekodierung - try: - image_data = base64.b64decode(image_b64) - except Exception as decode_err: - logger.error(f"Base64 decode error: {str(decode_err)}") - return jsonify({'error': 'Base64 decode error'}), 400 - # Bildverarbeitung - try: - image = Image.open(BytesIO(image_data)).convert('RGB') - image = np.array(image) - logger.info(f"Image loaded successfully. Shape: {image.shape}") - - # Originalbild speichern - cv2.imwrite(os.path.join(debug_dir, 'original.png'), - cv2.cvtColor(image, cv2.COLOR_RGB2BGR)) - except Exception as img_err: - logger.error(f"Image processing error: {str(img_err)}") - return jsonify({'error': 'Invalid image data'}), 400 + data = request.get_json() + image_data = base64.b64decode(data['image']) + + # Originalbild als WebP speichern + original_image = Image.open(BytesIO(image_data)).convert('RGB') + webp_path = os.path.join(debug_dir, 'original.webp') + original_image.save(webp_path, 'WEBP', quality=50) - # Bildvorverarbeitung - processed_image = preprocess_image(image, debug_dir) - logger.info("Preprocessing completed") + # WebP-Bild für Verarbeitung laden + with open(webp_path, 'rb') as f: + webp_image = Image.open(BytesIO(f.read())).convert('RGB') + + # Vorverarbeitung + processed_image = preprocess_image(np.array(webp_image), debug_dir) - # PaddleOCR Konfiguration + # OCR mit optimierter Konfiguration ocr = PaddleOCR( use_angle_cls=True, lang='en', - det_db_thresh=0.3, - det_db_box_thresh=0.3, - det_db_unclip_ratio=2.0, - rec_char_type='en', - det_limit_side_len=960, - det_limit_type='max', + det_model_dir='en_PP-OCRv3_det', + rec_model_dir='en_PP-OCRv3_rec', + det_limit_side_len=processed_image.shape[0] * 2, use_dilation=True, - det_db_score_mode='fast', - show_log=True + det_db_score_mode='fast' ) # OCR durchführen @@ -206,59 +174,12 @@ def ocr_endpoint(): }), 500 except Exception as e: - logger.error(f"Unexpected error: {str(e)}") - logger.error(traceback.format_exc()) + logger.error(f"Fehler: {str(e)}") return jsonify({ - 'error': 'Internal server error', - 'debug_dir': debug_dir if 'debug_dir' in locals() else None + 'error': 'Verarbeitungsfehler', + 'details': str(e), + 'debug_dir': dir_name if debug_dir else None }), 500 -@app.route('/api/debug_image//', methods=['GET']) -def get_debug_image(name, filename): - """ - Gibt das angeforderte Bild unter 'debug_images/[name]/[filename]' direkt zurück. - """ - try: - # Sicherheitsmaßnahme: Nur erlaubte Zeichen im Verzeichnisnamen - if not all(c.isalnum() or c in ('_', '-') for c in name): - logger.warning(f"Ungültiger Verzeichnisname angefordert: {name}") - return jsonify({'error': 'Invalid directory name'}), 400 - - # Sicherheitsmaßnahme: Nur erlaubte Zeichen im Dateinamen - if not all(c.isalnum() or c in ('_', '-', '.',) for c in filename): - logger.warning(f"Ungültiger Dateiname angefordert: {filename}") - return jsonify({'error': 'Invalid file name'}), 400 - - # Vollständigen Pfad zum Bild erstellen - image_path = os.path.join('debug_images', name, filename) - - # Überprüfen, ob die Datei existiert - if not os.path.isfile(image_path): - logger.warning(f"Bild nicht gefunden: {image_path}") - return jsonify({'error': 'Image not found'}), 404 - - # Bestimmen des MIME-Typs basierend auf der Dateiendung - mime_type = 'image/png' # Standard-MIME-Typ - if filename.lower().endswith('.jpg') or filename.lower().endswith('.jpeg'): - mime_type = 'image/jpeg' - elif filename.lower().endswith('.gif'): - mime_type = 'image/gif' - elif filename.lower().endswith('.bmp'): - mime_type = 'image/bmp' - elif filename.lower().endswith('.tiff') or filename.lower().endswith('.tif'): - mime_type = 'image/tiff' - - return send_file( - image_path, - mimetype=mime_type, - as_attachment=False - ) - - except Exception as e: - logger.error(f"Fehler beim Abrufen des Bildes '{name}/{filename}': {str(e)}") - logger.error(traceback.format_exc()) - return jsonify({'error': 'Failed to retrieve image'}), 500 - - if __name__ == '__main__': app.run(host='0.0.0.0', port=5000, debug=False) \ No newline at end of file diff --git a/ocr_server_old.py b/ocr_server_old.py new file mode 100644 index 0000000..2a57eb3 --- /dev/null +++ b/ocr_server_old.py @@ -0,0 +1,264 @@ +from flask import Flask, request, jsonify, send_file +from paddleocr import PaddleOCR +import base64 +from PIL import Image +from io import BytesIO +import traceback +import numpy as np +import cv2 +import logging +import os +import uuid +import datetime + +logging.basicConfig( + level=logging.DEBUG, + format='%(asctime)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) + +app = Flask(__name__) + +def get_dir_name(): + timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') + unique_id = str(uuid.uuid4())[:8] + return f"{timestamp}_{unique_id}" + +def create_debug_directory(dir_name): + """Erstellt ein eindeutiges Verzeichnis für Debug-Bilder""" + base_dir = 'debug_images' + timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') + unique_id = str(uuid.uuid4())[:8] + full_path = os.path.join(base_dir, dir_name) + + # Erstelle Hauptverzeichnis falls nicht vorhanden + if not os.path.exists(base_dir): + os.makedirs(base_dir) + + # Erstelle spezifisches Verzeichnis für diesen Durchlauf + os.makedirs(full_path) + + return full_path + +def preprocess_image(image, debug_dir): + """ + Verarbeitet das Bild und speichert Zwischenergebnisse im angegebenen Verzeichnis, + einschließlich einer komprimierten JPG-Version und eines Thumbnails. + """ + try: + # Umwandlung in Graustufen + gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) + # Anwendung von CLAHE zur Kontrastverbesserung + clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) + enhanced = clahe.apply(gray) + # Rauschunterdrückung + denoised = cv2.fastNlMeansDenoising(enhanced) + # Optional: Binärschwellenwert (auskommentiert) + # _, binary = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) + + # Speichern der Zwischenergebnisse im spezifischen Verzeichnis + cv2.imwrite(os.path.join(debug_dir, 'gray.png'), gray) + cv2.imwrite(os.path.join(debug_dir, 'enhanced.png'), enhanced) + cv2.imwrite(os.path.join(debug_dir, 'denoised.png'), denoised) + # cv2.imwrite(os.path.join(debug_dir, 'binary.png'), binary) + + # Speichern der komprimierten JPG-Version des Originalbildes + compressed_jpg_path = os.path.join(debug_dir, 'original_compressed.jpg') + original_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) + cv2.imwrite(compressed_jpg_path, original_bgr, [int(cv2.IMWRITE_JPEG_QUALITY), 80]) # Qualität auf 80 setzen + logger.info(f"Komprimiertes Original JPG gespeichert: {compressed_jpg_path}") + + # Erstellen und Speichern des Thumbnails + thumbnail_path = os.path.join(debug_dir, 'thumbnail.jpg') + image_pil = Image.fromarray(denoised) + image_pil.thumbnail((128, 128)) # Thumbnail-Größe auf 128x128 Pixel setzen + image_pil.save(thumbnail_path, 'JPEG') + logger.info(f"Thumbnail gespeichert: {thumbnail_path}") + + logger.info(f"Debug images saved in: {debug_dir}") + return denoised + except Exception as e: + logger.error(f"Preprocessing error: {str(e)}") + raise + + +@app.route('/api/ocr', methods=['POST']) +def ocr_endpoint(): + try: + # Erstelle eindeutiges Debug-Verzeichnis für diesen Request + dir_name = get_dir_name() + debug_dir = create_debug_directory(dir_name) + logger.info(f"Created debug directory: {debug_dir}") + + if not request.is_json: + return jsonify({'error': 'Content-Type must be application/json'}), 400 + + data = request.get_json() + if not data or 'image' not in data: + return jsonify({'error': 'No image provided'}), 400 + + image_b64 = data['image'] + + # Base64 Dekodierung + try: + image_data = base64.b64decode(image_b64) + except Exception as decode_err: + logger.error(f"Base64 decode error: {str(decode_err)}") + return jsonify({'error': 'Base64 decode error'}), 400 + + # Bildverarbeitung + try: + image = Image.open(BytesIO(image_data)).convert('RGB') + image = np.array(image) + logger.info(f"Image loaded successfully. Shape: {image.shape}") + + # Originalbild speichern + cv2.imwrite(os.path.join(debug_dir, 'original.png'), + cv2.cvtColor(image, cv2.COLOR_RGB2BGR)) + except Exception as img_err: + logger.error(f"Image processing error: {str(img_err)}") + return jsonify({'error': 'Invalid image data'}), 400 + + # Bildvorverarbeitung + processed_image = preprocess_image(image, debug_dir) + logger.info("Preprocessing completed") + + # PaddleOCR Konfiguration + ocr = PaddleOCR( + use_angle_cls=True, + lang='en', + det_db_thresh=0.3, + det_db_box_thresh=0.3, + det_db_unclip_ratio=2.0, + rec_char_type='en', + det_limit_side_len=960, + det_limit_type='max', + use_dilation=True, + det_db_score_mode='fast', + show_log=True + ) + + # OCR durchführen + try: + result = ocr.ocr(processed_image, rec=True, cls=True) + + # Debug-Informationen in Datei speichern + with open(os.path.join(debug_dir, 'ocr_results.txt'), 'w') as f: + f.write(f"Raw OCR result:\n{result}\n\n") + + if not result: + logger.warning("No results returned from OCR") + return jsonify({ + 'warning': 'No text detected', + 'debug_dir': debug_dir + }), 200 + + if not result[0]: + logger.warning("Empty results list from OCR") + return jsonify({ + 'warning': 'Empty results list', + 'debug_dir': debug_dir + }), 200 + + # Ergebnisse verarbeiten + extracted_results = [] + for idx, item in enumerate(result[0]): + try: + box = item[0] + text = item[1][0] if item[1] else '' + confidence = float(item[1][1]) if item[1] and len(item[1]) > 1 else 0.0 + + extracted_results.append({ + 'box': box, + 'text': text, + 'confidence': confidence, + 'name': dir_name + }) + except Exception as proc_err: + logger.error(f"Error processing result {idx}: {str(proc_err)}") + + # Statistiken in Debug-Datei speichern + with open(os.path.join(debug_dir, 'statistics.txt'), 'w') as f: + f.write(f"Total results: {len(extracted_results)}\n") + if extracted_results: + avg_confidence = np.mean([r['confidence'] for r in extracted_results]) + f.write(f"Average confidence: {avg_confidence}\n") + f.write("\nDetailed results:\n") + for idx, result in enumerate(extracted_results): + f.write(f"Result {idx+1}:\n") + f.write(f"Text: {result['text']}\n") + f.write(f"Confidence: {result['confidence']}\n") + f.write(f"Name: {dir_name}\n") + f.write(f"Box coordinates: {result['box']}\n\n") + + return jsonify({ + 'status': 'success', + 'results': extracted_results, + }) + + except Exception as ocr_err: + logger.error(f"OCR processing error: {str(ocr_err)}") + logger.error(traceback.format_exc()) + return jsonify({ + 'error': 'OCR processing failed', + 'details': str(ocr_err), + 'debug_dir': debug_dir + }), 500 + + except Exception as e: + logger.error(f"Unexpected error: {str(e)}") + logger.error(traceback.format_exc()) + return jsonify({ + 'error': 'Internal server error', + 'debug_dir': debug_dir if 'debug_dir' in locals() else None + }), 500 + +@app.route('/api/debug_image//', methods=['GET']) +def get_debug_image(name, filename): + """ + Gibt das angeforderte Bild unter 'debug_images/[name]/[filename]' direkt zurück. + """ + try: + # Sicherheitsmaßnahme: Nur erlaubte Zeichen im Verzeichnisnamen + if not all(c.isalnum() or c in ('_', '-') for c in name): + logger.warning(f"Ungültiger Verzeichnisname angefordert: {name}") + return jsonify({'error': 'Invalid directory name'}), 400 + + # Sicherheitsmaßnahme: Nur erlaubte Zeichen im Dateinamen + if not all(c.isalnum() or c in ('_', '-', '.',) for c in filename): + logger.warning(f"Ungültiger Dateiname angefordert: {filename}") + return jsonify({'error': 'Invalid file name'}), 400 + + # Vollständigen Pfad zum Bild erstellen + image_path = os.path.join('debug_images', name, filename) + + # Überprüfen, ob die Datei existiert + if not os.path.isfile(image_path): + logger.warning(f"Bild nicht gefunden: {image_path}") + return jsonify({'error': 'Image not found'}), 404 + + # Bestimmen des MIME-Typs basierend auf der Dateiendung + mime_type = 'image/png' # Standard-MIME-Typ + if filename.lower().endswith('.jpg') or filename.lower().endswith('.jpeg'): + mime_type = 'image/jpeg' + elif filename.lower().endswith('.gif'): + mime_type = 'image/gif' + elif filename.lower().endswith('.bmp'): + mime_type = 'image/bmp' + elif filename.lower().endswith('.tiff') or filename.lower().endswith('.tif'): + mime_type = 'image/tiff' + + return send_file( + image_path, + mimetype=mime_type, + as_attachment=False + ) + + except Exception as e: + logger.error(f"Fehler beim Abrufen des Bildes '{name}/{filename}': {str(e)}") + logger.error(traceback.format_exc()) + return jsonify({'error': 'Failed to retrieve image'}), 500 + + +if __name__ == '__main__': + app.run(host='0.0.0.0', port=5000, debug=False) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index fad7532..c8cf7d2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,6 @@ pillow>=10.0.0 numpy>=1.24.4,<2.0.0 opencv-python==4.6.0.66 paddlepaddle==2.6.2 -werkzeug<2.3 \ No newline at end of file +werkzeug<2.3 +pandas>=1.3.0 +tqdm>=4.64.0 \ No newline at end of file