This commit is contained in:
Andreas Knuth 2024-11-19 20:11:58 +01:00
commit 4d7a52ee99
9 changed files with 670 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
.env
__pycache__
database.db
debug_images

162
deck_endpoints.py Normal file
View File

@ -0,0 +1,162 @@
# deck_endpoints.py
from flask import Blueprint, request, jsonify
import sqlite3
import os
deck_bp = Blueprint('deck_bp', __name__)
DATABASE = 'mydatabase.db'
def get_db_connection():
conn = sqlite3.connect(DATABASE)
conn.row_factory = sqlite3.Row
return conn
# Erstellen der Tabellen, falls sie nicht existieren
def init_db():
conn = get_db_connection()
cursor = conn.cursor()
# Tabelle Deck erstellen
cursor.execute('''
CREATE TABLE IF NOT EXISTS Deck (
id INTEGER PRIMARY KEY AUTOINCREMENT,
deckname TEXT UNIQUE NOT NULL
)
''')
# Tabelle Image erstellen
cursor.execute('''
CREATE TABLE IF NOT EXISTS Image (
id INTEGER PRIMARY KEY AUTOINCREMENT,
deckid INTEGER,
bildname TEXT,
iconindex INTEGER,
x1 REAL,
x2 REAL,
y1 REAL,
y2 REAL,
FOREIGN KEY(deckid) REFERENCES Deck(id)
)
''')
conn.commit()
conn.close()
@deck_bp.route('/api/decks', methods=['POST'])
def create_deck():
data = request.get_json()
if not data or 'deckname' not in data:
return jsonify({'error': 'No deckname provided'}), 400
deckname = data['deckname']
conn = get_db_connection()
cursor = conn.cursor()
try:
cursor.execute('INSERT INTO Deck (deckname) VALUES (?)', (deckname,))
conn.commit()
deck_id = cursor.lastrowid
conn.close()
return jsonify({'status': 'success', 'deck_id': deck_id}), 201
except sqlite3.IntegrityError:
conn.close()
return jsonify({'error': 'Deckname already exists'}), 400
@deck_bp.route('/api/decks', methods=['GET'])
def get_decks():
conn = get_db_connection()
cursor = conn.cursor()
decks = cursor.execute('SELECT * FROM Deck').fetchall()
deck_list = []
for deck in decks:
deck_id = deck['id']
deck_name = deck['deckname']
# Alle Images für dieses Deck abrufen
images = cursor.execute('''
SELECT
bildname AS name,
iconindex,
x1,
x2,
y1,
y2
FROM Image
WHERE deckid = ?
''', (deck_id,)).fetchall()
images_list = [dict(image) for image in images]
# Deck mit Namen und zugehörigen Images hinzufügen
deck_dict = {
'name': deck_name,
'images': images_list
}
deck_list.append(deck_dict)
conn.close()
return jsonify(deck_list)
@deck_bp.route('/api/decks/<deckname>', methods=['DELETE'])
def delete_deck(deckname):
conn = get_db_connection()
cursor = conn.cursor()
# Zuerst die Images löschen, die zu diesem Deck gehören
cursor.execute('SELECT id FROM Deck WHERE deckname = ?', (deckname,))
deck = cursor.fetchone()
if deck:
deck_id = deck['id']
cursor.execute('DELETE FROM Image WHERE deckid = ?', (deck_id,))
# Dann das Deck löschen
cursor.execute('DELETE FROM Deck WHERE id = ?', (deck_id,))
conn.commit()
conn.close()
return jsonify({'status': 'success'}), 200
else:
conn.close()
return jsonify({'error': 'Deck not found'}), 404
@deck_bp.route('/image', methods=['PUT'])
def update_image():
data = request.get_json()
if not data:
return jsonify({'error': 'No data provided'}), 400
required_fields = ['deckid', 'bildname', 'iconindex', 'x1', 'x2', 'y1', 'y2']
if not all(field in data for field in required_fields):
return jsonify({'error': 'Missing fields in data'}), 400
deckid = data['deckid']
bildname = data['bildname']
iconindex = data['iconindex']
x1 = data['x1']
x2 = data['x2']
y1 = data['y1']
y2 = data['y2']
conn = get_db_connection()
cursor = conn.cursor()
cursor.execute('''
INSERT INTO Image (deckid, bildname, iconindex, x1, x2, y1, y2)
VALUES (?, ?, ?, ?, ?, ?, ?)
''', (deckid, bildname, iconindex, x1, x2, y1, y2))
conn.commit()
image_id = cursor.lastrowid
conn.close()
return jsonify({'status': 'success', 'image_id': image_id}), 201
@deck_bp.route('/image/<bildname>', methods=['GET'])
def get_images_by_bildname(bildname):
conn = get_db_connection()
images = conn.execute('SELECT * FROM Image WHERE bildname = ?', (bildname,)).fetchall()
conn.close()
image_list = [dict(image) for image in images]
return jsonify(image_list)
@deck_bp.route('/image/<bildname>/<int:iconindex>', methods=['GET'])
def get_image_by_bildname_and_index(bildname, iconindex):
conn = get_db_connection()
image = conn.execute('SELECT * FROM Image WHERE bildname = ? AND iconindex = ?', (bildname, iconindex)).fetchone()
conn.close()
if image is None:
return jsonify({'error': 'Image not found'}), 404
else:
return jsonify(dict(image)), 200
# Sicherstellen, dass die Datenbank existiert
if not os.path.exists(DATABASE):
init_db()

BIN
mydatabase.db Normal file

Binary file not shown.

60
ocr_server.py Normal file
View File

@ -0,0 +1,60 @@
from flask import Flask, request, jsonify
from paddleocr import PaddleOCR
import base64
from PIL import Image
from io import BytesIO
import traceback
import numpy as np # Importieren von numpy
app = Flask(__name__)
ocr = PaddleOCR(use_angle_cls=True, lang='en') # Passen Sie die Sprache nach Bedarf an
@app.route('/ocr', methods=['POST'])
def ocr_endpoint():
try:
if not request.is_json:
return jsonify({'error': 'Content-Type must be application/json'}), 400
data = request.get_json()
if not data or 'image' not in data:
return jsonify({'error': 'No image provided'}), 400
image_b64 = data['image']
if not image_b64:
return jsonify({'error': 'Empty image data'}), 400
try:
image_data = base64.b64decode(image_b64)
except Exception as decode_err:
return jsonify({'error': 'Base64 decode error', 'details': str(decode_err)}), 400
try:
image = Image.open(BytesIO(image_data)).convert('RGB')
image = np.array(image) # Konvertieren zu numpy.ndarray
except Exception as img_err:
return jsonify({'error': 'Invalid image data', 'details': str(img_err)}), 400
# Optional: Bildgröße anpassen, falls erforderlich
# PaddleOCR kann große Bilder verarbeiten, aber zur Effizienz können Sie eine maximale Größe setzen
max_width = 1920
max_height = 1080
height, width, _ = image.shape
if width > max_width or height > max_height:
aspect_ratio = width / height
if aspect_ratio > 1:
new_width = max_width
new_height = int(max_width / aspect_ratio)
else:
new_height = max_height
new_width = int(max_height * aspect_ratio)
image = np.array(Image.fromarray(image).resize((new_width, new_height)))
result = ocr.ocr(image, rec=True, cls=True)
return jsonify(result)
except Exception as e:
traceback.print_exc()
return jsonify({'error': str(e)}), 500
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=True)

90
ocr_server1.py Normal file
View File

@ -0,0 +1,90 @@
from flask import Flask, request, jsonify
from paddleocr import PaddleOCR
import base64
from PIL import Image
from io import BytesIO
import traceback
import numpy as np
import cv2 # Import von OpenCV
app = Flask(__name__)
def preprocess_image(image):
# Konvertierung zu Graustufen
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# Kontrastverstärkung
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
enhanced = clahe.apply(gray)
# Rauschreduzierung
denoised = cv2.fastNlMeansDenoising(enhanced)
# Binarisierung
_, binary = cv2.threshold(denoised, 127, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
return binary
@app.route('/ocr', methods=['POST'])
def ocr_endpoint():
try:
if not request.is_json:
return jsonify({'error': 'Content-Type must be application/json'}), 400
data = request.get_json()
if not data or 'image' not in data:
return jsonify({'error': 'No image provided'}), 400
image_b64 = data['image']
if not image_b64:
return jsonify({'error': 'Empty image data'}), 400
try:
image_data = base64.b64decode(image_b64)
except Exception as decode_err:
return jsonify({'error': 'Base64 decode error', 'details': str(decode_err)}), 400
try:
image = Image.open(BytesIO(image_data)).convert('RGB')
image = preprocess_image(image)
image = np.array(image) # Konvertieren zu numpy.ndarray
except Exception as img_err:
return jsonify({'error': 'Invalid image data', 'details': str(img_err)}), 400
# Optional: Bildgröße anpassen, falls erforderlich
max_width = 1920
max_height = 1080
height, width, _ = image.shape
if width > max_width or height > max_height:
aspect_ratio = width / height
if aspect_ratio > 1:
new_width = max_width
new_height = int(max_width / aspect_ratio)
else:
new_height = max_height
new_width = int(max_height * aspect_ratio)
image = np.array(Image.fromarray(image).resize((new_width, new_height)))
# Initialisieren Sie PaddleOCR innerhalb des Handlers
ocr = PaddleOCR(use_angle_cls=True, lang='en') # Initialisierung innerhalb des Handlers
result = ocr.ocr(image, rec=True, cls=True)
# Extrahieren der Texte und Konfidenzwerte
extracted_results = []
for item in result[0]:
box = item[0] # Die Koordinaten der Textbox
text = item[1][0] # Der erkannte Text
confidence = item[1][1] # Der Konfidenzwert
extracted_results.append({
'box': box,
'text': text,
'confidence': confidence
})
return jsonify(extracted_results)
except Exception as e:
traceback.print_exc()
return jsonify({'error': str(e)}), 500
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=True)

114
ocr_server2.py Normal file
View File

@ -0,0 +1,114 @@
from flask import Flask, request, jsonify
from paddleocr import PaddleOCR
import base64
from PIL import Image
from io import BytesIO
import traceback
import numpy as np
import cv2 # Import von OpenCV
import os # Import für das Speichern von Dateien
import time # Import für Zeitstempel
app = Flask(__name__)
# Initialisiere PaddleOCR einmal außerhalb der Anfrage, um die Leistung zu verbessern
ocr = PaddleOCR(use_angle_cls=True, lang='en') # Initialisierung außerhalb des Handlers
@app.route('/ocr', methods=['POST'])
def ocr_endpoint():
try:
if not request.is_json:
return jsonify({'error': 'Content-Type must be application/json'}), 400
data = request.get_json()
if not data or 'image' not in data:
return jsonify({'error': 'No image provided'}), 400
image_b64 = data['image']
if not image_b64:
return jsonify({'error': 'Empty image data'}), 400
try:
image_data = base64.b64decode(image_b64)
except Exception as decode_err:
return jsonify({'error': 'Base64 decode error', 'details': str(decode_err)}), 400
try:
image = Image.open(BytesIO(image_data)).convert('RGB')
image_np = np.array(image) # Konvertieren zu numpy.ndarray
except Exception as img_err:
return jsonify({'error': 'Invalid image data'}), 400
# Vorverarbeitung: Behalte nur dunkle (schwarze) Bereiche des Bildes
# Konvertiere das Bild zu Graustufen
gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
# Wende einen Schwellenwert an, um nur die dunklen Bereiche zu behalten
threshold_value = 150 # Passe diesen Wert nach Bedarf an
_, mask = cv2.threshold(gray, threshold_value, 255, cv2.THRESH_BINARY_INV)
# Optional: Morphologische Operationen zur Verbesserung der Maske
kernel = np.ones((3,3), np.uint8)
mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel, iterations=1)
mask = cv2.morphologyEx(mask, cv2.MORPH_DILATE, kernel, iterations=1)
# Wende die Maske auf das Originalbild an
filtered_image_np = cv2.bitwise_and(image_np, image_np, mask=mask)
# Konvertiere das gefilterte Bild zurück zu PIL Image
filtered_image = Image.fromarray(filtered_image_np)
# Optional: Bildgröße anpassen, falls erforderlich
max_width = 1920
max_height = 1080
height, width, _ = filtered_image_np.shape
if width > max_width or height > max_height:
aspect_ratio = width / height
if aspect_ratio > 1:
new_width = max_width
new_height = int(max_width / aspect_ratio)
else:
new_height = max_height
new_width = int(max_height * aspect_ratio)
filtered_image = filtered_image.resize((new_width, new_height))
filtered_image_np = np.array(filtered_image)
# **Speichern des vorverarbeiteten Bildes zur Überprüfung**
output_dir = 'processed_images'
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# Generiere einen einzigartigen Dateinamen basierend auf dem aktuellen Zeitstempel
timestamp = int(time.time() * 1000)
processed_image_path = os.path.join(output_dir, f'processed_{timestamp}.png')
filtered_image.save(processed_image_path)
print(f'Processed image saved at: {processed_image_path}')
# **Speichern der Maske zur Überprüfung**
mask_image = Image.fromarray(mask)
mask_image_path = os.path.join(output_dir, f'mask_{timestamp}.png')
mask_image.save(mask_image_path)
print(f'Mask image saved at: {mask_image_path}')
# Führe OCR auf dem gefilterten Bild durch
result = ocr.ocr(filtered_image_np, rec=True, cls=True)
# Extrahieren der Texte und Konfidenzwerte
extracted_results = []
for item in result:
box = item[0] # Die Koordinaten der Textbox
text = item[1][0] # Der erkannte Text
confidence = item[1][1] # Der Konfidenzwert
extracted_results.append({
'box': box,
'text': text,
'confidence': confidence
})
return jsonify(extracted_results)
except Exception as e:
traceback.print_exc()
return jsonify({'error': str(e)}), 500
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=True, threaded=False) # Single-Threaded

206
ocr_server3.py Normal file
View File

@ -0,0 +1,206 @@
from flask import Flask, request, jsonify
from paddleocr import PaddleOCR
import base64
from PIL import Image
from io import BytesIO
import traceback
import numpy as np
import cv2
import logging
import os
import uuid
import datetime
from deck_endpoints import deck_bp # Importieren des Blueprints
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
app = Flask(__name__)
app.register_blueprint(deck_bp) # Registrieren des Blueprints
def get_dir_name():
timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
unique_id = str(uuid.uuid4())[:8]
return f"{timestamp}_{unique_id}"
def create_debug_directory(dir_name):
"""Erstellt ein eindeutiges Verzeichnis für Debug-Bilder"""
base_dir = 'debug_images'
timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
unique_id = str(uuid.uuid4())[:8]
full_path = os.path.join(base_dir, dir_name)
# Erstelle Hauptverzeichnis falls nicht vorhanden
if not os.path.exists(base_dir):
os.makedirs(base_dir)
# Erstelle spezifisches Verzeichnis für diesen Durchlauf
os.makedirs(full_path)
return full_path
def preprocess_image(image, debug_dir):
"""
Verarbeitet das Bild und speichert Zwischenergebnisse im angegebenen Verzeichnis
"""
try:
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
enhanced = clahe.apply(gray)
denoised = cv2.fastNlMeansDenoising(enhanced)
_, binary = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# Speichern der Zwischenergebnisse im spezifischen Verzeichnis
cv2.imwrite(os.path.join(debug_dir, 'gray.png'), gray)
cv2.imwrite(os.path.join(debug_dir, 'enhanced.png'), enhanced)
cv2.imwrite(os.path.join(debug_dir, 'denoised.png'), denoised)
cv2.imwrite(os.path.join(debug_dir, 'binary.png'), binary)
logger.info(f"Debug images saved in: {debug_dir}")
return binary
except Exception as e:
logger.error(f"Preprocessing error: {str(e)}")
raise
@app.route('/api/ocr', methods=['POST'])
def ocr_endpoint():
try:
# Erstelle eindeutiges Debug-Verzeichnis für diesen Request
dir_name = get_dir_name()
debug_dir = create_debug_directory(dir_name)
logger.info(f"Created debug directory: {debug_dir}")
if not request.is_json:
return jsonify({'error': 'Content-Type must be application/json'}), 400
data = request.get_json()
if not data or 'image' not in data:
return jsonify({'error': 'No image provided'}), 400
image_b64 = data['image']
# Base64 Dekodierung
try:
image_data = base64.b64decode(image_b64)
except Exception as decode_err:
logger.error(f"Base64 decode error: {str(decode_err)}")
return jsonify({'error': 'Base64 decode error'}), 400
# Bildverarbeitung
try:
image = Image.open(BytesIO(image_data)).convert('RGB')
image = np.array(image)
logger.info(f"Image loaded successfully. Shape: {image.shape}")
# Originalbild speichern
cv2.imwrite(os.path.join(debug_dir, 'original.png'),
cv2.cvtColor(image, cv2.COLOR_RGB2BGR))
except Exception as img_err:
logger.error(f"Image processing error: {str(img_err)}")
return jsonify({'error': 'Invalid image data'}), 400
# Bildvorverarbeitung
processed_image = preprocess_image(image, debug_dir)
logger.info("Preprocessing completed")
# PaddleOCR Konfiguration
ocr = PaddleOCR(
use_angle_cls=True,
lang='en',
det_db_thresh=0.3,
det_db_box_thresh=0.3,
det_db_unclip_ratio=2.0,
rec_char_type='en',
det_limit_side_len=960,
det_limit_type='max',
use_dilation=True,
det_db_score_mode='fast',
show_log=True
)
# OCR durchführen
try:
result = ocr.ocr(processed_image, rec=True, cls=True)
# Debug-Informationen in Datei speichern
with open(os.path.join(debug_dir, 'ocr_results.txt'), 'w') as f:
f.write(f"Raw OCR result:\n{result}\n\n")
if not result:
logger.warning("No results returned from OCR")
return jsonify({
'warning': 'No text detected',
'debug_dir': debug_dir
}), 200
if not result[0]:
logger.warning("Empty results list from OCR")
return jsonify({
'warning': 'Empty results list',
'debug_dir': debug_dir
}), 200
# Ergebnisse verarbeiten
extracted_results = []
for idx, item in enumerate(result[0]):
try:
box = item[0]
text = item[1][0] if item[1] else ''
confidence = float(item[1][1]) if item[1] and len(item[1]) > 1 else 0.0
extracted_results.append({
'box': box,
'text': text,
'confidence': confidence,
'name': dir_name
})
except Exception as proc_err:
logger.error(f"Error processing result {idx}: {str(proc_err)}")
# Statistiken in Debug-Datei speichern
with open(os.path.join(debug_dir, 'statistics.txt'), 'w') as f:
f.write(f"Total results: {len(extracted_results)}\n")
if extracted_results:
avg_confidence = np.mean([r['confidence'] for r in extracted_results])
f.write(f"Average confidence: {avg_confidence}\n")
f.write("\nDetailed results:\n")
for idx, result in enumerate(extracted_results):
f.write(f"Result {idx+1}:\n")
f.write(f"Text: {result['text']}\n")
f.write(f"Confidence: {result['confidence']}\n")
f.write(f"Name: {dir_name}\n")
f.write(f"Box coordinates: {result['box']}\n\n")
return jsonify({
'status': 'success',
'results': extracted_results,
# 'debug_info': {
# 'total_boxes_detected': len(result[0]) if result and result[0] else 0,
# 'processed_results': len(extracted_results),
# 'debug_dir': debug_dir
# }
})
except Exception as ocr_err:
logger.error(f"OCR processing error: {str(ocr_err)}")
logger.error(traceback.format_exc())
return jsonify({
'error': 'OCR processing failed',
'details': str(ocr_err),
'debug_dir': debug_dir
}), 500
except Exception as e:
logger.error(f"Unexpected error: {str(e)}")
logger.error(traceback.format_exc())
return jsonify({
'error': 'Internal server error',
'debug_dir': debug_dir if 'debug_dir' in locals() else None
}), 500
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=True)

5
paddleDownload.py Normal file
View File

@ -0,0 +1,5 @@
from paddleocr import PaddleOCR
# Lade das PaddleOCR-Modell herunter
ocr = PaddleOCR(use_angle_cls=True, lang='en') # Hier wird das vortrainierte Modell heruntergeladen

29
paddleExport.py Normal file
View File

@ -0,0 +1,29 @@
import paddle
from paddleocr import PaddleOCR
from ppocr.architectures import build_model
import paddle.static as static
# Initialisiere das OCR-Modell
ocr = PaddleOCR(use_angle_cls=True, lang='en')
# Spezifikation der Eingabe - das Modell erwartet ein RGB-Bild der Größe 640x640
input_spec = static.InputSpec(shape=[1, 3, 640, 640], dtype='float32', name='image')
# Exportiere das Modell ins ONNX-Format
def export_to_onnx():
paddle.enable_static()
model_dir = './inference/ch_ppocr_mobile_v2.0_det_infer/' # Pfad zum vortrainierten Modell
model_file = f'{model_dir}/model'
params_file = f'{model_dir}/params'
paddle.onnx.export(
model=model_file,
path='paddleocr_model.onnx',
input_spec=[input_spec],
model_params=params_file,
opset_version=11,
)
print("Modell wurde erfolgreich nach ONNX exportiert.")
export_to_onnx()