vocab-backend/ocr_server1.py

from flask import Flask, request, jsonify
from paddleocr import PaddleOCR
import base64
from PIL import Image
from io import BytesIO
import traceback
import numpy as np
import cv2  # Import von OpenCV

app = Flask(__name__)
def preprocess_image(image):
    # Konvertierung zu Graustufen
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

    # Kontrastverstärkung
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    enhanced = clahe.apply(gray)

    # Rauschreduzierung
    denoised = cv2.fastNlMeansDenoising(enhanced)

    # Binarisierung
    _, binary = cv2.threshold(denoised, 127, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    return binary

@app.route('/ocr', methods=['POST'])
def ocr_endpoint():
    try:
        if not request.is_json:
            return jsonify({'error': 'Content-Type must be application/json'}), 400

        data = request.get_json()
        if not data or 'image' not in data:
            return jsonify({'error': 'No image provided'}), 400

        image_b64 = data['image']
        if not image_b64:
            return jsonify({'error': 'Empty image data'}), 400

        try:
            image_data = base64.b64decode(image_b64)
        except Exception as decode_err:
            return jsonify({'error': 'Base64 decode error', 'details': str(decode_err)}), 400

        try:
            image = Image.open(BytesIO(image_data)).convert('RGB')
            image = preprocess_image(image)
            image = np.array(image)  # Konvertieren zu numpy.ndarray
        except Exception as img_err:
            return jsonify({'error': 'Invalid image data', 'details': str(img_err)}), 400

        # Optional: Bildgröße anpassen, falls erforderlich
        max_width = 1920
        max_height = 1080
        height, width, _ = image.shape
        if width > max_width or height > max_height:
            aspect_ratio = width / height
            if aspect_ratio > 1:
                new_width = max_width
                new_height = int(max_width / aspect_ratio)
            else:
                new_height = max_height
                new_width = int(max_height * aspect_ratio)
            image = np.array(Image.fromarray(image).resize((new_width, new_height)))

        # Initialisieren Sie PaddleOCR innerhalb des Handlers
        ocr = PaddleOCR(use_angle_cls=True, lang='en')  # Initialisierung innerhalb des Handlers

        result = ocr.ocr(image, rec=True, cls=True)

        # Extrahieren der Texte und Konfidenzwerte
        extracted_results = []
        for item in result[0]:
            box = item[0]        # Die Koordinaten der Textbox
            text = item[1][0]    # Der erkannte Text
            confidence = item[1][1]  # Der Konfidenzwert
            extracted_results.append({
                'box': box,
                'text': text,
                'confidence': confidence
            })

        return jsonify(extracted_results)
    except Exception as e:
        traceback.print_exc()
        return jsonify({'error': str(e)}), 500

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000, debug=True)