diff --git a/.gitignore b/.gitignore index b1b6fb0..e5abc4e 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ __pycache__ database.db debug_images images +restored-images en_PP-OCRv3* *.db *.lock \ No newline at end of file diff --git a/ocr_server.py b/ocr_server.py index 1967b06..6d10c1b 100644 --- a/ocr_server.py +++ b/ocr_server.py @@ -10,6 +10,8 @@ import logging import os import uuid import datetime +import shutil +from functools import wraps logging.basicConfig( level=logging.DEBUG, @@ -28,10 +30,10 @@ def create_debug_directory(dir_name): """Erstellt ein eindeutiges Verzeichnis für Debug-Bilder""" base_dir = 'images' full_path = os.path.join(base_dir, dir_name) - + if not os.path.exists(base_dir): os.makedirs(base_dir) - + os.makedirs(full_path) return full_path @@ -40,11 +42,11 @@ def preprocess_image(image, debug_dir): try: # Graustufenkonvertierung gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) - + # Kontrastverbesserung mit CLAHE clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8)) # Erhöhter Clip-Limit enhanced = clahe.apply(gray) - + # Rauschunterdrückung mit optimierten Parametern denoised = cv2.fastNlMeansDenoising( enhanced, @@ -53,11 +55,6 @@ def preprocess_image(image, debug_dir): searchWindowSize=21 ) - # Debug-Bilder speichern - # cv2.imwrite(os.path.join(debug_dir, 'gray.png'), gray) - # cv2.imwrite(os.path.join(debug_dir, 'enhanced.png'), enhanced) - # cv2.imwrite(os.path.join(debug_dir, 'denoised.png'), denoised) - # Thumbnail als WebP denoised_rgb = cv2.cvtColor(denoised, cv2.COLOR_GRAY2RGB) thumbnail = Image.fromarray(denoised_rgb) @@ -71,6 +68,17 @@ def preprocess_image(image, debug_dir): logger.error(f"Preprocessing error: {str(e)}") raise +def require_localhost(f): + @wraps(f) + def decorated_function(*args, **kwargs): + if request.remote_addr not in ('127.0.0.1', '::1'): + logger.warning(f"Nicht autorisierte Anfrage von {request.remote_addr}") + return jsonify({ + 'error': 'Zugriff verweigert. Nur localhost ist erlaubt.' + }), 403 + return f(*args, **kwargs) + return decorated_function + @app.route('/api/ocr', methods=['POST']) def ocr_endpoint(): debug_dir = None @@ -78,11 +86,11 @@ def ocr_endpoint(): # Verzeichnis erstellen dir_name = get_dir_name() debug_dir = create_debug_directory(dir_name) - + # Bildverarbeitung data = request.get_json() image_data = base64.b64decode(data['image']) - + # Originalbild als WebP speichern original_image = Image.open(BytesIO(image_data)).convert('RGB') webp_path = os.path.join(debug_dir, 'original.webp') @@ -91,7 +99,7 @@ def ocr_endpoint(): # WebP-Bild für Verarbeitung laden with open(webp_path, 'rb') as f: webp_image = Image.open(BytesIO(f.read())).convert('RGB') - + # Vorverarbeitung processed_image = preprocess_image(np.array(webp_image), debug_dir) @@ -109,18 +117,18 @@ def ocr_endpoint(): # OCR durchführen try: result = ocr.ocr(processed_image, rec=True, cls=True) - + # Debug-Informationen in Datei speichern with open(os.path.join(debug_dir, 'ocr_results.txt'), 'w') as f: f.write(f"Raw OCR result:\n{result}\n\n") - + if not result: logger.warning("No results returned from OCR") return jsonify({ 'warning': 'No text detected', 'debug_dir': debug_dir }), 200 - + if not result[0]: logger.warning("Empty results list from OCR") return jsonify({ @@ -135,7 +143,7 @@ def ocr_endpoint(): box = item[0] text = item[1][0] if item[1] else '' confidence = float(item[1][1]) if item[1] and len(item[1]) > 1 else 0.0 - + extracted_results.append({ 'box': box, 'text': text, @@ -168,7 +176,7 @@ def ocr_endpoint(): logger.error(f"OCR processing error: {str(ocr_err)}") logger.error(traceback.format_exc()) return jsonify({ - 'error': 'OCR processing failed', + 'error': 'OCR processing failed', 'details': str(ocr_err), 'debug_dir': debug_dir }), 500 @@ -181,5 +189,81 @@ def ocr_endpoint(): 'debug_dir': dir_name if debug_dir else None }), 500 +@app.route('/api/cleanup', methods=['POST']) +@require_localhost +def cleanup_endpoint(): + try: + data = request.get_json() + used_ids = data.get('usedIds', []) + dryrun = data.get('dryrun', True) # Standardwert: True + + # Validierung der Eingabedaten + if not isinstance(used_ids, list): + logger.error("Invalid data format: 'usedIds' should be a list") + return jsonify({ + 'error': "'usedIds' muss eine Liste von IDs sein." + }), 400 + + if not isinstance(dryrun, bool): + logger.error("Invalid data format: 'dryrun' should be a boolean") + return jsonify({ + 'error': "'dryrun' muss ein boolescher Wert sein (true oder false)." + }), 400 + + base_dir = 'images' + if not os.path.exists(base_dir): + logger.info(f"Das Basisverzeichnis '{base_dir}' existiert nicht. Nichts zu bereinigen.") + return jsonify({ + 'status': 'success', + 'message': f"Das Basisverzeichnis '{base_dir}' existiert nicht.", + 'deletedIds': [], + 'dryrun': dryrun + }), 200 + + all_dirs = [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))] + dirs_to_delete = [d for d in all_dirs if d not in used_ids] + + if dryrun: + logger.info("Dry-Run aktiviert. Keine Verzeichnisse werden gelöscht.") + return jsonify({ + 'status': 'dryrun', + 'message': 'Die folgenden Verzeichnisse würden gelöscht werden.', + 'dirsToDelete': dirs_to_delete, + 'dryrun': dryrun + }), 200 + else: + deleted_ids = [] + failed_deletions = [] + for dir_name in dirs_to_delete: + dir_path = os.path.join(base_dir, dir_name) + try: + shutil.rmtree(dir_path) + deleted_ids.append(dir_name) + logger.info(f"Verzeichnis '{dir_name}' erfolgreich gelöscht.") + except Exception as delete_err: + logger.error(f"Fehler beim Löschen des Verzeichnisses '{dir_name}': {str(delete_err)}") + failed_deletions.append({ + 'dir': dir_name, + 'error': str(delete_err) + }) + + response = { + 'status': 'success', + 'deletedIds': deleted_ids, + 'dryrun': dryrun + } + if failed_deletions: + response['failedDeletions'] = failed_deletions + + return jsonify(response), 200 + + except Exception as e: + logger.error(f"Cleanup-Fehler: {str(e)}") + logger.error(traceback.format_exc()) + return jsonify({ + 'error': 'Bereinigungsfehler', + 'details': str(e) + }), 500 + if __name__ == '__main__': - app.run(host='0.0.0.0', port=5000, debug=False) \ No newline at end of file + app.run(host='0.0.0.0', port=5000, debug=False)