vocab-backend/ocr_server2.py

115 lines
4.5 KiB
Python

from flask import Flask, request, jsonify
from paddleocr import PaddleOCR
import base64
from PIL import Image
from io import BytesIO
import traceback
import numpy as np
import cv2 # Import von OpenCV
import os # Import für das Speichern von Dateien
import time # Import für Zeitstempel
app = Flask(__name__)
# Initialisiere PaddleOCR einmal außerhalb der Anfrage, um die Leistung zu verbessern
ocr = PaddleOCR(use_angle_cls=True, lang='en') # Initialisierung außerhalb des Handlers
@app.route('/ocr', methods=['POST'])
def ocr_endpoint():
try:
if not request.is_json:
return jsonify({'error': 'Content-Type must be application/json'}), 400
data = request.get_json()
if not data or 'image' not in data:
return jsonify({'error': 'No image provided'}), 400
image_b64 = data['image']
if not image_b64:
return jsonify({'error': 'Empty image data'}), 400
try:
image_data = base64.b64decode(image_b64)
except Exception as decode_err:
return jsonify({'error': 'Base64 decode error', 'details': str(decode_err)}), 400
try:
image = Image.open(BytesIO(image_data)).convert('RGB')
image_np = np.array(image) # Konvertieren zu numpy.ndarray
except Exception as img_err:
return jsonify({'error': 'Invalid image data'}), 400
# Vorverarbeitung: Behalte nur dunkle (schwarze) Bereiche des Bildes
# Konvertiere das Bild zu Graustufen
gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
# Wende einen Schwellenwert an, um nur die dunklen Bereiche zu behalten
threshold_value = 150 # Passe diesen Wert nach Bedarf an
_, mask = cv2.threshold(gray, threshold_value, 255, cv2.THRESH_BINARY_INV)
# Optional: Morphologische Operationen zur Verbesserung der Maske
kernel = np.ones((3,3), np.uint8)
mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel, iterations=1)
mask = cv2.morphologyEx(mask, cv2.MORPH_DILATE, kernel, iterations=1)
# Wende die Maske auf das Originalbild an
filtered_image_np = cv2.bitwise_and(image_np, image_np, mask=mask)
# Konvertiere das gefilterte Bild zurück zu PIL Image
filtered_image = Image.fromarray(filtered_image_np)
# Optional: Bildgröße anpassen, falls erforderlich
max_width = 1920
max_height = 1080
height, width, _ = filtered_image_np.shape
if width > max_width or height > max_height:
aspect_ratio = width / height
if aspect_ratio > 1:
new_width = max_width
new_height = int(max_width / aspect_ratio)
else:
new_height = max_height
new_width = int(max_height * aspect_ratio)
filtered_image = filtered_image.resize((new_width, new_height))
filtered_image_np = np.array(filtered_image)
# **Speichern des vorverarbeiteten Bildes zur Überprüfung**
output_dir = 'processed_images'
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# Generiere einen einzigartigen Dateinamen basierend auf dem aktuellen Zeitstempel
timestamp = int(time.time() * 1000)
processed_image_path = os.path.join(output_dir, f'processed_{timestamp}.png')
filtered_image.save(processed_image_path)
print(f'Processed image saved at: {processed_image_path}')
# **Speichern der Maske zur Überprüfung**
mask_image = Image.fromarray(mask)
mask_image_path = os.path.join(output_dir, f'mask_{timestamp}.png')
mask_image.save(mask_image_path)
print(f'Mask image saved at: {mask_image_path}')
# Führe OCR auf dem gefilterten Bild durch
result = ocr.ocr(filtered_image_np, rec=True, cls=True)
# Extrahieren der Texte und Konfidenzwerte
extracted_results = []
for item in result:
box = item[0] # Die Koordinaten der Textbox
text = item[1][0] # Der erkannte Text
confidence = item[1][1] # Der Konfidenzwert
extracted_results.append({
'box': box,
'text': text,
'confidence': confidence
})
return jsonify(extracted_results)
except Exception as e:
traceback.print_exc()
return jsonify({'error': str(e)}), 500
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=True, threaded=False) # Single-Threaded