This commit is contained in:
Andreas Knuth 2025-01-31 19:02:11 +01:00
parent 297e2b9489
commit 8b347d3a4e
2 changed files with 103 additions and 18 deletions

1
.gitignore vendored
View File

@ -3,6 +3,7 @@ __pycache__
database.db database.db
debug_images debug_images
images images
restored-images
en_PP-OCRv3* en_PP-OCRv3*
*.db *.db
*.lock *.lock

View File

@ -10,6 +10,8 @@ import logging
import os import os
import uuid import uuid
import datetime import datetime
import shutil
from functools import wraps
logging.basicConfig( logging.basicConfig(
level=logging.DEBUG, level=logging.DEBUG,
@ -28,10 +30,10 @@ def create_debug_directory(dir_name):
"""Erstellt ein eindeutiges Verzeichnis für Debug-Bilder""" """Erstellt ein eindeutiges Verzeichnis für Debug-Bilder"""
base_dir = 'images' base_dir = 'images'
full_path = os.path.join(base_dir, dir_name) full_path = os.path.join(base_dir, dir_name)
if not os.path.exists(base_dir): if not os.path.exists(base_dir):
os.makedirs(base_dir) os.makedirs(base_dir)
os.makedirs(full_path) os.makedirs(full_path)
return full_path return full_path
@ -40,11 +42,11 @@ def preprocess_image(image, debug_dir):
try: try:
# Graustufenkonvertierung # Graustufenkonvertierung
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# Kontrastverbesserung mit CLAHE # Kontrastverbesserung mit CLAHE
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8)) # Erhöhter Clip-Limit clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8)) # Erhöhter Clip-Limit
enhanced = clahe.apply(gray) enhanced = clahe.apply(gray)
# Rauschunterdrückung mit optimierten Parametern # Rauschunterdrückung mit optimierten Parametern
denoised = cv2.fastNlMeansDenoising( denoised = cv2.fastNlMeansDenoising(
enhanced, enhanced,
@ -53,11 +55,6 @@ def preprocess_image(image, debug_dir):
searchWindowSize=21 searchWindowSize=21
) )
# Debug-Bilder speichern
# cv2.imwrite(os.path.join(debug_dir, 'gray.png'), gray)
# cv2.imwrite(os.path.join(debug_dir, 'enhanced.png'), enhanced)
# cv2.imwrite(os.path.join(debug_dir, 'denoised.png'), denoised)
# Thumbnail als WebP # Thumbnail als WebP
denoised_rgb = cv2.cvtColor(denoised, cv2.COLOR_GRAY2RGB) denoised_rgb = cv2.cvtColor(denoised, cv2.COLOR_GRAY2RGB)
thumbnail = Image.fromarray(denoised_rgb) thumbnail = Image.fromarray(denoised_rgb)
@ -71,6 +68,17 @@ def preprocess_image(image, debug_dir):
logger.error(f"Preprocessing error: {str(e)}") logger.error(f"Preprocessing error: {str(e)}")
raise raise
def require_localhost(f):
@wraps(f)
def decorated_function(*args, **kwargs):
if request.remote_addr not in ('127.0.0.1', '::1'):
logger.warning(f"Nicht autorisierte Anfrage von {request.remote_addr}")
return jsonify({
'error': 'Zugriff verweigert. Nur localhost ist erlaubt.'
}), 403
return f(*args, **kwargs)
return decorated_function
@app.route('/api/ocr', methods=['POST']) @app.route('/api/ocr', methods=['POST'])
def ocr_endpoint(): def ocr_endpoint():
debug_dir = None debug_dir = None
@ -78,11 +86,11 @@ def ocr_endpoint():
# Verzeichnis erstellen # Verzeichnis erstellen
dir_name = get_dir_name() dir_name = get_dir_name()
debug_dir = create_debug_directory(dir_name) debug_dir = create_debug_directory(dir_name)
# Bildverarbeitung # Bildverarbeitung
data = request.get_json() data = request.get_json()
image_data = base64.b64decode(data['image']) image_data = base64.b64decode(data['image'])
# Originalbild als WebP speichern # Originalbild als WebP speichern
original_image = Image.open(BytesIO(image_data)).convert('RGB') original_image = Image.open(BytesIO(image_data)).convert('RGB')
webp_path = os.path.join(debug_dir, 'original.webp') webp_path = os.path.join(debug_dir, 'original.webp')
@ -91,7 +99,7 @@ def ocr_endpoint():
# WebP-Bild für Verarbeitung laden # WebP-Bild für Verarbeitung laden
with open(webp_path, 'rb') as f: with open(webp_path, 'rb') as f:
webp_image = Image.open(BytesIO(f.read())).convert('RGB') webp_image = Image.open(BytesIO(f.read())).convert('RGB')
# Vorverarbeitung # Vorverarbeitung
processed_image = preprocess_image(np.array(webp_image), debug_dir) processed_image = preprocess_image(np.array(webp_image), debug_dir)
@ -109,18 +117,18 @@ def ocr_endpoint():
# OCR durchführen # OCR durchführen
try: try:
result = ocr.ocr(processed_image, rec=True, cls=True) result = ocr.ocr(processed_image, rec=True, cls=True)
# Debug-Informationen in Datei speichern # Debug-Informationen in Datei speichern
with open(os.path.join(debug_dir, 'ocr_results.txt'), 'w') as f: with open(os.path.join(debug_dir, 'ocr_results.txt'), 'w') as f:
f.write(f"Raw OCR result:\n{result}\n\n") f.write(f"Raw OCR result:\n{result}\n\n")
if not result: if not result:
logger.warning("No results returned from OCR") logger.warning("No results returned from OCR")
return jsonify({ return jsonify({
'warning': 'No text detected', 'warning': 'No text detected',
'debug_dir': debug_dir 'debug_dir': debug_dir
}), 200 }), 200
if not result[0]: if not result[0]:
logger.warning("Empty results list from OCR") logger.warning("Empty results list from OCR")
return jsonify({ return jsonify({
@ -135,7 +143,7 @@ def ocr_endpoint():
box = item[0] box = item[0]
text = item[1][0] if item[1] else '' text = item[1][0] if item[1] else ''
confidence = float(item[1][1]) if item[1] and len(item[1]) > 1 else 0.0 confidence = float(item[1][1]) if item[1] and len(item[1]) > 1 else 0.0
extracted_results.append({ extracted_results.append({
'box': box, 'box': box,
'text': text, 'text': text,
@ -168,7 +176,7 @@ def ocr_endpoint():
logger.error(f"OCR processing error: {str(ocr_err)}") logger.error(f"OCR processing error: {str(ocr_err)}")
logger.error(traceback.format_exc()) logger.error(traceback.format_exc())
return jsonify({ return jsonify({
'error': 'OCR processing failed', 'error': 'OCR processing failed',
'details': str(ocr_err), 'details': str(ocr_err),
'debug_dir': debug_dir 'debug_dir': debug_dir
}), 500 }), 500
@ -181,5 +189,81 @@ def ocr_endpoint():
'debug_dir': dir_name if debug_dir else None 'debug_dir': dir_name if debug_dir else None
}), 500 }), 500
@app.route('/api/cleanup', methods=['POST'])
@require_localhost
def cleanup_endpoint():
try:
data = request.get_json()
used_ids = data.get('usedIds', [])
dryrun = data.get('dryrun', True) # Standardwert: True
# Validierung der Eingabedaten
if not isinstance(used_ids, list):
logger.error("Invalid data format: 'usedIds' should be a list")
return jsonify({
'error': "'usedIds' muss eine Liste von IDs sein."
}), 400
if not isinstance(dryrun, bool):
logger.error("Invalid data format: 'dryrun' should be a boolean")
return jsonify({
'error': "'dryrun' muss ein boolescher Wert sein (true oder false)."
}), 400
base_dir = 'images'
if not os.path.exists(base_dir):
logger.info(f"Das Basisverzeichnis '{base_dir}' existiert nicht. Nichts zu bereinigen.")
return jsonify({
'status': 'success',
'message': f"Das Basisverzeichnis '{base_dir}' existiert nicht.",
'deletedIds': [],
'dryrun': dryrun
}), 200
all_dirs = [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]
dirs_to_delete = [d for d in all_dirs if d not in used_ids]
if dryrun:
logger.info("Dry-Run aktiviert. Keine Verzeichnisse werden gelöscht.")
return jsonify({
'status': 'dryrun',
'message': 'Die folgenden Verzeichnisse würden gelöscht werden.',
'dirsToDelete': dirs_to_delete,
'dryrun': dryrun
}), 200
else:
deleted_ids = []
failed_deletions = []
for dir_name in dirs_to_delete:
dir_path = os.path.join(base_dir, dir_name)
try:
shutil.rmtree(dir_path)
deleted_ids.append(dir_name)
logger.info(f"Verzeichnis '{dir_name}' erfolgreich gelöscht.")
except Exception as delete_err:
logger.error(f"Fehler beim Löschen des Verzeichnisses '{dir_name}': {str(delete_err)}")
failed_deletions.append({
'dir': dir_name,
'error': str(delete_err)
})
response = {
'status': 'success',
'deletedIds': deleted_ids,
'dryrun': dryrun
}
if failed_deletions:
response['failedDeletions'] = failed_deletions
return jsonify(response), 200
except Exception as e:
logger.error(f"Cleanup-Fehler: {str(e)}")
logger.error(traceback.format_exc())
return jsonify({
'error': 'Bereinigungsfehler',
'details': str(e)
}), 500
if __name__ == '__main__': if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=False) app.run(host='0.0.0.0', port=5000, debug=False)