vocab-backend/ocr_server3.py

233 lines
8.5 KiB
Python

from flask import Flask, request, jsonify, send_file
from paddleocr import PaddleOCR
import base64
from PIL import Image
from io import BytesIO
import traceback
import numpy as np
import cv2
import logging
import os
import uuid
import datetime
from deck_endpoints import deck_bp # Importieren des Blueprints
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
app = Flask(__name__)
app.register_blueprint(deck_bp) # Registrieren des Blueprints
def get_dir_name():
timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
unique_id = str(uuid.uuid4())[:8]
return f"{timestamp}_{unique_id}"
def create_debug_directory(dir_name):
"""Erstellt ein eindeutiges Verzeichnis für Debug-Bilder"""
base_dir = 'debug_images'
timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
unique_id = str(uuid.uuid4())[:8]
full_path = os.path.join(base_dir, dir_name)
# Erstelle Hauptverzeichnis falls nicht vorhanden
if not os.path.exists(base_dir):
os.makedirs(base_dir)
# Erstelle spezifisches Verzeichnis für diesen Durchlauf
os.makedirs(full_path)
return full_path
def preprocess_image(image, debug_dir):
"""
Verarbeitet das Bild und speichert Zwischenergebnisse im angegebenen Verzeichnis
"""
try:
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
enhanced = clahe.apply(gray)
denoised = cv2.fastNlMeansDenoising(enhanced)
# _, binary = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# Speichern der Zwischenergebnisse im spezifischen Verzeichnis
cv2.imwrite(os.path.join(debug_dir, 'gray.png'), gray)
cv2.imwrite(os.path.join(debug_dir, 'enhanced.png'), enhanced)
cv2.imwrite(os.path.join(debug_dir, 'denoised.png'), denoised)
# cv2.imwrite(os.path.join(debug_dir, 'binary.png'), binary)
logger.info(f"Debug images saved in: {debug_dir}")
return denoised
except Exception as e:
logger.error(f"Preprocessing error: {str(e)}")
raise
@app.route('/api/ocr', methods=['POST'])
def ocr_endpoint():
try:
# Erstelle eindeutiges Debug-Verzeichnis für diesen Request
dir_name = get_dir_name()
debug_dir = create_debug_directory(dir_name)
logger.info(f"Created debug directory: {debug_dir}")
if not request.is_json:
return jsonify({'error': 'Content-Type must be application/json'}), 400
data = request.get_json()
if not data or 'image' not in data:
return jsonify({'error': 'No image provided'}), 400
image_b64 = data['image']
# Base64 Dekodierung
try:
image_data = base64.b64decode(image_b64)
except Exception as decode_err:
logger.error(f"Base64 decode error: {str(decode_err)}")
return jsonify({'error': 'Base64 decode error'}), 400
# Bildverarbeitung
try:
image = Image.open(BytesIO(image_data)).convert('RGB')
image = np.array(image)
logger.info(f"Image loaded successfully. Shape: {image.shape}")
# Originalbild speichern
cv2.imwrite(os.path.join(debug_dir, 'original.png'),
cv2.cvtColor(image, cv2.COLOR_RGB2BGR))
except Exception as img_err:
logger.error(f"Image processing error: {str(img_err)}")
return jsonify({'error': 'Invalid image data'}), 400
# Bildvorverarbeitung
processed_image = preprocess_image(image, debug_dir)
logger.info("Preprocessing completed")
# PaddleOCR Konfiguration
ocr = PaddleOCR(
use_angle_cls=True,
lang='en',
det_db_thresh=0.3,
det_db_box_thresh=0.3,
det_db_unclip_ratio=2.0,
rec_char_type='en',
det_limit_side_len=960,
det_limit_type='max',
use_dilation=True,
det_db_score_mode='fast',
show_log=True
)
# OCR durchführen
try:
result = ocr.ocr(processed_image, rec=True, cls=True)
# Debug-Informationen in Datei speichern
with open(os.path.join(debug_dir, 'ocr_results.txt'), 'w') as f:
f.write(f"Raw OCR result:\n{result}\n\n")
if not result:
logger.warning("No results returned from OCR")
return jsonify({
'warning': 'No text detected',
'debug_dir': debug_dir
}), 200
if not result[0]:
logger.warning("Empty results list from OCR")
return jsonify({
'warning': 'Empty results list',
'debug_dir': debug_dir
}), 200
# Ergebnisse verarbeiten
extracted_results = []
for idx, item in enumerate(result[0]):
try:
box = item[0]
text = item[1][0] if item[1] else ''
confidence = float(item[1][1]) if item[1] and len(item[1]) > 1 else 0.0
extracted_results.append({
'box': box,
'text': text,
'confidence': confidence,
'name': dir_name
})
except Exception as proc_err:
logger.error(f"Error processing result {idx}: {str(proc_err)}")
# Statistiken in Debug-Datei speichern
with open(os.path.join(debug_dir, 'statistics.txt'), 'w') as f:
f.write(f"Total results: {len(extracted_results)}\n")
if extracted_results:
avg_confidence = np.mean([r['confidence'] for r in extracted_results])
f.write(f"Average confidence: {avg_confidence}\n")
f.write("\nDetailed results:\n")
for idx, result in enumerate(extracted_results):
f.write(f"Result {idx+1}:\n")
f.write(f"Text: {result['text']}\n")
f.write(f"Confidence: {result['confidence']}\n")
f.write(f"Name: {dir_name}\n")
f.write(f"Box coordinates: {result['box']}\n\n")
return jsonify({
'status': 'success',
'results': extracted_results,
# 'debug_info': {
# 'total_boxes_detected': len(result[0]) if result and result[0] else 0,
# 'processed_results': len(extracted_results),
# 'debug_dir': debug_dir
# }
})
except Exception as ocr_err:
logger.error(f"OCR processing error: {str(ocr_err)}")
logger.error(traceback.format_exc())
return jsonify({
'error': 'OCR processing failed',
'details': str(ocr_err),
'debug_dir': debug_dir
}), 500
except Exception as e:
logger.error(f"Unexpected error: {str(e)}")
logger.error(traceback.format_exc())
return jsonify({
'error': 'Internal server error',
'debug_dir': debug_dir if 'debug_dir' in locals() else None
}), 500
@app.route('/api/debug_image/<name>', methods=['GET'])
def get_debug_image(name):
"""
Gibt das Originalbild unter 'debug_images/[name]/original.png' direkt als image/png zurück.
"""
try:
# Sicherheitsmaßnahme: Nur erlaubte Zeichen im Namen
if not all(c.isalnum() or c in ('_', '-') for c in name):
logger.warning(f"Ungültiger Bildname angefordert: {name}")
return jsonify({'error': 'Invalid image name'}), 400
image_path = os.path.join('debug_images', name, 'original.png')
if not os.path.isfile(image_path):
logger.warning(f"Bild nicht gefunden: {image_path}")
return jsonify({'error': 'Image not found'}), 404
return send_file(
image_path,
mimetype='image/png',
as_attachment=False
)
except Exception as e:
logger.error(f"Fehler beim Abrufen des Bildes '{name}': {str(e)}")
logger.error(traceback.format_exc())
return jsonify({'error': 'Failed to retrieve image'}), 500
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=True)