vocab-backend/ocr_server1.py

91 lines
3.0 KiB
Python

from flask import Flask, request, jsonify
from paddleocr import PaddleOCR
import base64
from PIL import Image
from io import BytesIO
import traceback
import numpy as np
import cv2 # Import von OpenCV
app = Flask(__name__)
def preprocess_image(image):
# Konvertierung zu Graustufen
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# Kontrastverstärkung
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
enhanced = clahe.apply(gray)
# Rauschreduzierung
denoised = cv2.fastNlMeansDenoising(enhanced)
# Binarisierung
_, binary = cv2.threshold(denoised, 127, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
return binary
@app.route('/ocr', methods=['POST'])
def ocr_endpoint():
try:
if not request.is_json:
return jsonify({'error': 'Content-Type must be application/json'}), 400
data = request.get_json()
if not data or 'image' not in data:
return jsonify({'error': 'No image provided'}), 400
image_b64 = data['image']
if not image_b64:
return jsonify({'error': 'Empty image data'}), 400
try:
image_data = base64.b64decode(image_b64)
except Exception as decode_err:
return jsonify({'error': 'Base64 decode error', 'details': str(decode_err)}), 400
try:
image = Image.open(BytesIO(image_data)).convert('RGB')
image = preprocess_image(image)
image = np.array(image) # Konvertieren zu numpy.ndarray
except Exception as img_err:
return jsonify({'error': 'Invalid image data', 'details': str(img_err)}), 400
# Optional: Bildgröße anpassen, falls erforderlich
max_width = 1920
max_height = 1080
height, width, _ = image.shape
if width > max_width or height > max_height:
aspect_ratio = width / height
if aspect_ratio > 1:
new_width = max_width
new_height = int(max_width / aspect_ratio)
else:
new_height = max_height
new_width = int(max_height * aspect_ratio)
image = np.array(Image.fromarray(image).resize((new_width, new_height)))
# Initialisieren Sie PaddleOCR innerhalb des Handlers
ocr = PaddleOCR(use_angle_cls=True, lang='en') # Initialisierung innerhalb des Handlers
result = ocr.ocr(image, rec=True, cls=True)
# Extrahieren der Texte und Konfidenzwerte
extracted_results = []
for item in result[0]:
box = item[0] # Die Koordinaten der Textbox
text = item[1][0] # Der erkannte Text
confidence = item[1][1] # Der Konfidenzwert
extracted_results.append({
'box': box,
'text': text,
'confidence': confidence
})
return jsonify(extracted_results)
except Exception as e:
traceback.print_exc()
return jsonify({'error': str(e)}), 500
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=True)