from flask import Flask, request, jsonify, send_file from paddleocr import PaddleOCR import base64 from PIL import Image from io import BytesIO import traceback import numpy as np import cv2 import logging import os import uuid import datetime logging.basicConfig( level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) app = Flask(__name__) def get_dir_name(): timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') unique_id = str(uuid.uuid4())[:8] return f"{timestamp}_{unique_id}" def create_debug_directory(dir_name): """Erstellt ein eindeutiges Verzeichnis für Debug-Bilder""" base_dir = 'images' full_path = os.path.join(base_dir, dir_name) if not os.path.exists(base_dir): os.makedirs(base_dir) os.makedirs(full_path) return full_path def preprocess_image(image, debug_dir): """Bildverarbeitung mit optionalen Optimierungen""" try: # Graustufenkonvertierung gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) # Kontrastverbesserung mit CLAHE clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8)) # Erhöhter Clip-Limit enhanced = clahe.apply(gray) # Rauschunterdrückung mit optimierten Parametern denoised = cv2.fastNlMeansDenoising( enhanced, h=15, # Stärkere Rauschreduzierung templateWindowSize=7, searchWindowSize=21 ) # Debug-Bilder speichern # cv2.imwrite(os.path.join(debug_dir, 'gray.png'), gray) # cv2.imwrite(os.path.join(debug_dir, 'enhanced.png'), enhanced) # cv2.imwrite(os.path.join(debug_dir, 'denoised.png'), denoised) # Thumbnail als WebP denoised_rgb = cv2.cvtColor(denoised, cv2.COLOR_GRAY2RGB) thumbnail = Image.fromarray(denoised_rgb) thumbnail.thumbnail((256, 256)) thumbnail_path = os.path.join(debug_dir, 'thumbnail.webp') thumbnail.save(thumbnail_path, 'WEBP', quality=85) return denoised except Exception as e: logger.error(f"Preprocessing error: {str(e)}") raise @app.route('/api/ocr', methods=['POST']) def ocr_endpoint(): debug_dir = None try: # Verzeichnis erstellen dir_name = get_dir_name() debug_dir = create_debug_directory(dir_name) # Bildverarbeitung data = request.get_json() image_data = base64.b64decode(data['image']) # Originalbild als WebP speichern original_image = Image.open(BytesIO(image_data)).convert('RGB') webp_path = os.path.join(debug_dir, 'original.webp') original_image.save(webp_path, 'WEBP', quality=50) # WebP-Bild für Verarbeitung laden with open(webp_path, 'rb') as f: webp_image = Image.open(BytesIO(f.read())).convert('RGB') # Vorverarbeitung processed_image = preprocess_image(np.array(webp_image), debug_dir) # OCR mit optimierter Konfiguration ocr = PaddleOCR( use_angle_cls=True, lang='en', det_model_dir='en_PP-OCRv3_det', rec_model_dir='en_PP-OCRv3_rec', det_limit_side_len=processed_image.shape[0] * 2, use_dilation=True, det_db_score_mode='fast' ) # OCR durchführen try: result = ocr.ocr(processed_image, rec=True, cls=True) # Debug-Informationen in Datei speichern with open(os.path.join(debug_dir, 'ocr_results.txt'), 'w') as f: f.write(f"Raw OCR result:\n{result}\n\n") if not result: logger.warning("No results returned from OCR") return jsonify({ 'warning': 'No text detected', 'debug_dir': debug_dir }), 200 if not result[0]: logger.warning("Empty results list from OCR") return jsonify({ 'warning': 'Empty results list', 'debug_dir': debug_dir }), 200 # Ergebnisse verarbeiten extracted_results = [] for idx, item in enumerate(result[0]): try: box = item[0] text = item[1][0] if item[1] else '' confidence = float(item[1][1]) if item[1] and len(item[1]) > 1 else 0.0 extracted_results.append({ 'box': box, 'text': text, 'confidence': confidence, 'name': dir_name }) except Exception as proc_err: logger.error(f"Error processing result {idx}: {str(proc_err)}") # Statistiken in Debug-Datei speichern with open(os.path.join(debug_dir, 'statistics.txt'), 'w') as f: f.write(f"Total results: {len(extracted_results)}\n") if extracted_results: avg_confidence = np.mean([r['confidence'] for r in extracted_results]) f.write(f"Average confidence: {avg_confidence}\n") f.write("\nDetailed results:\n") for idx, result in enumerate(extracted_results): f.write(f"Result {idx+1}:\n") f.write(f"Text: {result['text']}\n") f.write(f"Confidence: {result['confidence']}\n") f.write(f"Name: {dir_name}\n") f.write(f"Box coordinates: {result['box']}\n\n") return jsonify({ 'status': 'success', 'results': extracted_results, }) except Exception as ocr_err: logger.error(f"OCR processing error: {str(ocr_err)}") logger.error(traceback.format_exc()) return jsonify({ 'error': 'OCR processing failed', 'details': str(ocr_err), 'debug_dir': debug_dir }), 500 except Exception as e: logger.error(f"Fehler: {str(e)}") return jsonify({ 'error': 'Verarbeitungsfehler', 'details': str(e), 'debug_dir': dir_name if debug_dir else None }), 500 if __name__ == '__main__': app.run(host='0.0.0.0', port=5000, debug=False)