from flask import Flask, request, jsonify from paddleocr import PaddleOCR import base64 from PIL import Image from io import BytesIO import traceback import numpy as np import cv2 # Import von OpenCV import os # Import für das Speichern von Dateien import time # Import für Zeitstempel app = Flask(__name__) # Initialisiere PaddleOCR einmal außerhalb der Anfrage, um die Leistung zu verbessern ocr = PaddleOCR(use_angle_cls=True, lang='en') # Initialisierung außerhalb des Handlers @app.route('/ocr', methods=['POST']) def ocr_endpoint(): try: if not request.is_json: return jsonify({'error': 'Content-Type must be application/json'}), 400 data = request.get_json() if not data or 'image' not in data: return jsonify({'error': 'No image provided'}), 400 image_b64 = data['image'] if not image_b64: return jsonify({'error': 'Empty image data'}), 400 try: image_data = base64.b64decode(image_b64) except Exception as decode_err: return jsonify({'error': 'Base64 decode error', 'details': str(decode_err)}), 400 try: image = Image.open(BytesIO(image_data)).convert('RGB') image_np = np.array(image) # Konvertieren zu numpy.ndarray except Exception as img_err: return jsonify({'error': 'Invalid image data'}), 400 # Vorverarbeitung: Behalte nur dunkle (schwarze) Bereiche des Bildes # Konvertiere das Bild zu Graustufen gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY) # Wende einen Schwellenwert an, um nur die dunklen Bereiche zu behalten threshold_value = 150 # Passe diesen Wert nach Bedarf an _, mask = cv2.threshold(gray, threshold_value, 255, cv2.THRESH_BINARY_INV) # Optional: Morphologische Operationen zur Verbesserung der Maske kernel = np.ones((3,3), np.uint8) mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel, iterations=1) mask = cv2.morphologyEx(mask, cv2.MORPH_DILATE, kernel, iterations=1) # Wende die Maske auf das Originalbild an filtered_image_np = cv2.bitwise_and(image_np, image_np, mask=mask) # Konvertiere das gefilterte Bild zurück zu PIL Image filtered_image = Image.fromarray(filtered_image_np) # Optional: Bildgröße anpassen, falls erforderlich max_width = 1920 max_height = 1080 height, width, _ = filtered_image_np.shape if width > max_width or height > max_height: aspect_ratio = width / height if aspect_ratio > 1: new_width = max_width new_height = int(max_width / aspect_ratio) else: new_height = max_height new_width = int(max_height * aspect_ratio) filtered_image = filtered_image.resize((new_width, new_height)) filtered_image_np = np.array(filtered_image) # **Speichern des vorverarbeiteten Bildes zur Überprüfung** output_dir = 'processed_images' if not os.path.exists(output_dir): os.makedirs(output_dir) # Generiere einen einzigartigen Dateinamen basierend auf dem aktuellen Zeitstempel timestamp = int(time.time() * 1000) processed_image_path = os.path.join(output_dir, f'processed_{timestamp}.png') filtered_image.save(processed_image_path) print(f'Processed image saved at: {processed_image_path}') # **Speichern der Maske zur Überprüfung** mask_image = Image.fromarray(mask) mask_image_path = os.path.join(output_dir, f'mask_{timestamp}.png') mask_image.save(mask_image_path) print(f'Mask image saved at: {mask_image_path}') # Führe OCR auf dem gefilterten Bild durch result = ocr.ocr(filtered_image_np, rec=True, cls=True) # Extrahieren der Texte und Konfidenzwerte extracted_results = [] for item in result: box = item[0] # Die Koordinaten der Textbox text = item[1][0] # Der erkannte Text confidence = item[1][1] # Der Konfidenzwert extracted_results.append({ 'box': box, 'text': text, 'confidence': confidence }) return jsonify(extracted_results) except Exception as e: traceback.print_exc() return jsonify({'error': str(e)}), 500 if __name__ == '__main__': app.run(host='0.0.0.0', port=5000, debug=True, threaded=False) # Single-Threaded