#20 cleanup
This commit is contained in:
parent
297e2b9489
commit
8b347d3a4e
|
|
@ -3,6 +3,7 @@ __pycache__
|
||||||
database.db
|
database.db
|
||||||
debug_images
|
debug_images
|
||||||
images
|
images
|
||||||
|
restored-images
|
||||||
en_PP-OCRv3*
|
en_PP-OCRv3*
|
||||||
*.db
|
*.db
|
||||||
*.lock
|
*.lock
|
||||||
120
ocr_server.py
120
ocr_server.py
|
|
@ -10,6 +10,8 @@ import logging
|
||||||
import os
|
import os
|
||||||
import uuid
|
import uuid
|
||||||
import datetime
|
import datetime
|
||||||
|
import shutil
|
||||||
|
from functools import wraps
|
||||||
|
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
level=logging.DEBUG,
|
level=logging.DEBUG,
|
||||||
|
|
@ -28,10 +30,10 @@ def create_debug_directory(dir_name):
|
||||||
"""Erstellt ein eindeutiges Verzeichnis für Debug-Bilder"""
|
"""Erstellt ein eindeutiges Verzeichnis für Debug-Bilder"""
|
||||||
base_dir = 'images'
|
base_dir = 'images'
|
||||||
full_path = os.path.join(base_dir, dir_name)
|
full_path = os.path.join(base_dir, dir_name)
|
||||||
|
|
||||||
if not os.path.exists(base_dir):
|
if not os.path.exists(base_dir):
|
||||||
os.makedirs(base_dir)
|
os.makedirs(base_dir)
|
||||||
|
|
||||||
os.makedirs(full_path)
|
os.makedirs(full_path)
|
||||||
return full_path
|
return full_path
|
||||||
|
|
||||||
|
|
@ -40,11 +42,11 @@ def preprocess_image(image, debug_dir):
|
||||||
try:
|
try:
|
||||||
# Graustufenkonvertierung
|
# Graustufenkonvertierung
|
||||||
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
|
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
|
||||||
|
|
||||||
# Kontrastverbesserung mit CLAHE
|
# Kontrastverbesserung mit CLAHE
|
||||||
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8)) # Erhöhter Clip-Limit
|
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8)) # Erhöhter Clip-Limit
|
||||||
enhanced = clahe.apply(gray)
|
enhanced = clahe.apply(gray)
|
||||||
|
|
||||||
# Rauschunterdrückung mit optimierten Parametern
|
# Rauschunterdrückung mit optimierten Parametern
|
||||||
denoised = cv2.fastNlMeansDenoising(
|
denoised = cv2.fastNlMeansDenoising(
|
||||||
enhanced,
|
enhanced,
|
||||||
|
|
@ -53,11 +55,6 @@ def preprocess_image(image, debug_dir):
|
||||||
searchWindowSize=21
|
searchWindowSize=21
|
||||||
)
|
)
|
||||||
|
|
||||||
# Debug-Bilder speichern
|
|
||||||
# cv2.imwrite(os.path.join(debug_dir, 'gray.png'), gray)
|
|
||||||
# cv2.imwrite(os.path.join(debug_dir, 'enhanced.png'), enhanced)
|
|
||||||
# cv2.imwrite(os.path.join(debug_dir, 'denoised.png'), denoised)
|
|
||||||
|
|
||||||
# Thumbnail als WebP
|
# Thumbnail als WebP
|
||||||
denoised_rgb = cv2.cvtColor(denoised, cv2.COLOR_GRAY2RGB)
|
denoised_rgb = cv2.cvtColor(denoised, cv2.COLOR_GRAY2RGB)
|
||||||
thumbnail = Image.fromarray(denoised_rgb)
|
thumbnail = Image.fromarray(denoised_rgb)
|
||||||
|
|
@ -71,6 +68,17 @@ def preprocess_image(image, debug_dir):
|
||||||
logger.error(f"Preprocessing error: {str(e)}")
|
logger.error(f"Preprocessing error: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
def require_localhost(f):
|
||||||
|
@wraps(f)
|
||||||
|
def decorated_function(*args, **kwargs):
|
||||||
|
if request.remote_addr not in ('127.0.0.1', '::1'):
|
||||||
|
logger.warning(f"Nicht autorisierte Anfrage von {request.remote_addr}")
|
||||||
|
return jsonify({
|
||||||
|
'error': 'Zugriff verweigert. Nur localhost ist erlaubt.'
|
||||||
|
}), 403
|
||||||
|
return f(*args, **kwargs)
|
||||||
|
return decorated_function
|
||||||
|
|
||||||
@app.route('/api/ocr', methods=['POST'])
|
@app.route('/api/ocr', methods=['POST'])
|
||||||
def ocr_endpoint():
|
def ocr_endpoint():
|
||||||
debug_dir = None
|
debug_dir = None
|
||||||
|
|
@ -78,11 +86,11 @@ def ocr_endpoint():
|
||||||
# Verzeichnis erstellen
|
# Verzeichnis erstellen
|
||||||
dir_name = get_dir_name()
|
dir_name = get_dir_name()
|
||||||
debug_dir = create_debug_directory(dir_name)
|
debug_dir = create_debug_directory(dir_name)
|
||||||
|
|
||||||
# Bildverarbeitung
|
# Bildverarbeitung
|
||||||
data = request.get_json()
|
data = request.get_json()
|
||||||
image_data = base64.b64decode(data['image'])
|
image_data = base64.b64decode(data['image'])
|
||||||
|
|
||||||
# Originalbild als WebP speichern
|
# Originalbild als WebP speichern
|
||||||
original_image = Image.open(BytesIO(image_data)).convert('RGB')
|
original_image = Image.open(BytesIO(image_data)).convert('RGB')
|
||||||
webp_path = os.path.join(debug_dir, 'original.webp')
|
webp_path = os.path.join(debug_dir, 'original.webp')
|
||||||
|
|
@ -91,7 +99,7 @@ def ocr_endpoint():
|
||||||
# WebP-Bild für Verarbeitung laden
|
# WebP-Bild für Verarbeitung laden
|
||||||
with open(webp_path, 'rb') as f:
|
with open(webp_path, 'rb') as f:
|
||||||
webp_image = Image.open(BytesIO(f.read())).convert('RGB')
|
webp_image = Image.open(BytesIO(f.read())).convert('RGB')
|
||||||
|
|
||||||
# Vorverarbeitung
|
# Vorverarbeitung
|
||||||
processed_image = preprocess_image(np.array(webp_image), debug_dir)
|
processed_image = preprocess_image(np.array(webp_image), debug_dir)
|
||||||
|
|
||||||
|
|
@ -109,18 +117,18 @@ def ocr_endpoint():
|
||||||
# OCR durchführen
|
# OCR durchführen
|
||||||
try:
|
try:
|
||||||
result = ocr.ocr(processed_image, rec=True, cls=True)
|
result = ocr.ocr(processed_image, rec=True, cls=True)
|
||||||
|
|
||||||
# Debug-Informationen in Datei speichern
|
# Debug-Informationen in Datei speichern
|
||||||
with open(os.path.join(debug_dir, 'ocr_results.txt'), 'w') as f:
|
with open(os.path.join(debug_dir, 'ocr_results.txt'), 'w') as f:
|
||||||
f.write(f"Raw OCR result:\n{result}\n\n")
|
f.write(f"Raw OCR result:\n{result}\n\n")
|
||||||
|
|
||||||
if not result:
|
if not result:
|
||||||
logger.warning("No results returned from OCR")
|
logger.warning("No results returned from OCR")
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'warning': 'No text detected',
|
'warning': 'No text detected',
|
||||||
'debug_dir': debug_dir
|
'debug_dir': debug_dir
|
||||||
}), 200
|
}), 200
|
||||||
|
|
||||||
if not result[0]:
|
if not result[0]:
|
||||||
logger.warning("Empty results list from OCR")
|
logger.warning("Empty results list from OCR")
|
||||||
return jsonify({
|
return jsonify({
|
||||||
|
|
@ -135,7 +143,7 @@ def ocr_endpoint():
|
||||||
box = item[0]
|
box = item[0]
|
||||||
text = item[1][0] if item[1] else ''
|
text = item[1][0] if item[1] else ''
|
||||||
confidence = float(item[1][1]) if item[1] and len(item[1]) > 1 else 0.0
|
confidence = float(item[1][1]) if item[1] and len(item[1]) > 1 else 0.0
|
||||||
|
|
||||||
extracted_results.append({
|
extracted_results.append({
|
||||||
'box': box,
|
'box': box,
|
||||||
'text': text,
|
'text': text,
|
||||||
|
|
@ -168,7 +176,7 @@ def ocr_endpoint():
|
||||||
logger.error(f"OCR processing error: {str(ocr_err)}")
|
logger.error(f"OCR processing error: {str(ocr_err)}")
|
||||||
logger.error(traceback.format_exc())
|
logger.error(traceback.format_exc())
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'error': 'OCR processing failed',
|
'error': 'OCR processing failed',
|
||||||
'details': str(ocr_err),
|
'details': str(ocr_err),
|
||||||
'debug_dir': debug_dir
|
'debug_dir': debug_dir
|
||||||
}), 500
|
}), 500
|
||||||
|
|
@ -181,5 +189,81 @@ def ocr_endpoint():
|
||||||
'debug_dir': dir_name if debug_dir else None
|
'debug_dir': dir_name if debug_dir else None
|
||||||
}), 500
|
}), 500
|
||||||
|
|
||||||
|
@app.route('/api/cleanup', methods=['POST'])
|
||||||
|
@require_localhost
|
||||||
|
def cleanup_endpoint():
|
||||||
|
try:
|
||||||
|
data = request.get_json()
|
||||||
|
used_ids = data.get('usedIds', [])
|
||||||
|
dryrun = data.get('dryrun', True) # Standardwert: True
|
||||||
|
|
||||||
|
# Validierung der Eingabedaten
|
||||||
|
if not isinstance(used_ids, list):
|
||||||
|
logger.error("Invalid data format: 'usedIds' should be a list")
|
||||||
|
return jsonify({
|
||||||
|
'error': "'usedIds' muss eine Liste von IDs sein."
|
||||||
|
}), 400
|
||||||
|
|
||||||
|
if not isinstance(dryrun, bool):
|
||||||
|
logger.error("Invalid data format: 'dryrun' should be a boolean")
|
||||||
|
return jsonify({
|
||||||
|
'error': "'dryrun' muss ein boolescher Wert sein (true oder false)."
|
||||||
|
}), 400
|
||||||
|
|
||||||
|
base_dir = 'images'
|
||||||
|
if not os.path.exists(base_dir):
|
||||||
|
logger.info(f"Das Basisverzeichnis '{base_dir}' existiert nicht. Nichts zu bereinigen.")
|
||||||
|
return jsonify({
|
||||||
|
'status': 'success',
|
||||||
|
'message': f"Das Basisverzeichnis '{base_dir}' existiert nicht.",
|
||||||
|
'deletedIds': [],
|
||||||
|
'dryrun': dryrun
|
||||||
|
}), 200
|
||||||
|
|
||||||
|
all_dirs = [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]
|
||||||
|
dirs_to_delete = [d for d in all_dirs if d not in used_ids]
|
||||||
|
|
||||||
|
if dryrun:
|
||||||
|
logger.info("Dry-Run aktiviert. Keine Verzeichnisse werden gelöscht.")
|
||||||
|
return jsonify({
|
||||||
|
'status': 'dryrun',
|
||||||
|
'message': 'Die folgenden Verzeichnisse würden gelöscht werden.',
|
||||||
|
'dirsToDelete': dirs_to_delete,
|
||||||
|
'dryrun': dryrun
|
||||||
|
}), 200
|
||||||
|
else:
|
||||||
|
deleted_ids = []
|
||||||
|
failed_deletions = []
|
||||||
|
for dir_name in dirs_to_delete:
|
||||||
|
dir_path = os.path.join(base_dir, dir_name)
|
||||||
|
try:
|
||||||
|
shutil.rmtree(dir_path)
|
||||||
|
deleted_ids.append(dir_name)
|
||||||
|
logger.info(f"Verzeichnis '{dir_name}' erfolgreich gelöscht.")
|
||||||
|
except Exception as delete_err:
|
||||||
|
logger.error(f"Fehler beim Löschen des Verzeichnisses '{dir_name}': {str(delete_err)}")
|
||||||
|
failed_deletions.append({
|
||||||
|
'dir': dir_name,
|
||||||
|
'error': str(delete_err)
|
||||||
|
})
|
||||||
|
|
||||||
|
response = {
|
||||||
|
'status': 'success',
|
||||||
|
'deletedIds': deleted_ids,
|
||||||
|
'dryrun': dryrun
|
||||||
|
}
|
||||||
|
if failed_deletions:
|
||||||
|
response['failedDeletions'] = failed_deletions
|
||||||
|
|
||||||
|
return jsonify(response), 200
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Cleanup-Fehler: {str(e)}")
|
||||||
|
logger.error(traceback.format_exc())
|
||||||
|
return jsonify({
|
||||||
|
'error': 'Bereinigungsfehler',
|
||||||
|
'details': str(e)
|
||||||
|
}), 500
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
app.run(host='0.0.0.0', port=5000, debug=False)
|
app.run(host='0.0.0.0', port=5000, debug=False)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue