bug fixes

This commit is contained in:
Andreas Knuth 2025-11-26 11:13:01 -06:00
parent 6a3a9264f7
commit ac697f9590
3 changed files with 92 additions and 144 deletions

View File

@ -18,7 +18,6 @@ services:
- ./docker-data/dms/mail-state/:/var/mail-state/
- ./docker-data/dms/mail-logs/:/var/log/mail/
- ./docker-data/dms/config/:/tmp/docker-mailserver/
# - ./docker-data/dms/config/dovecot/10-master.conf:/etc/dovecot/conf.d/10-master.conf
- /etc/localtime:/etc/localtime:ro
environment:
# Wichtig: Rspamd und andere Services deaktivieren für ersten Test

View File

@ -1,106 +1,65 @@
#!/usr/bin/env python3
# monitor-queues.py
"""
Überwacht alle Email-Queues und zeigt Statistiken
"""
import boto3
import json
from datetime import datetime
sqs = boto3.client('sqs', region_name='eu-central-1')
DOMAINS = ['andreasknuth.de', 'bizmatch.net']
def get_queue_stats(domain):
"""Zeigt Queue-Statistiken für eine Domain"""
queue_name = domain.replace('.', '-') + '-queue'
dlq_name = queue_name + '-dlq'
try:
# Main Queue URL
queue_url = sqs.get_queue_url(QueueName=queue_name)['QueueUrl']
# Queue Attributes
attrs = sqs.get_queue_attributes(
QueueUrl=queue_url,
AttributeNames=['All']
)['Attributes']
# DLQ URL
dlq_url = sqs.get_queue_url(QueueName=dlq_name)['QueueUrl']
# DLQ Attributes
dlq_attrs = sqs.get_queue_attributes(
QueueUrl=dlq_url,
AttributeNames=['ApproximateNumberOfMessages']
)['Attributes']
return {
'domain': domain,
'queue': {
'available': int(attrs.get('ApproximateNumberOfMessages', 0)),
'in_flight': int(attrs.get('ApproximateNumberOfMessagesNotVisible', 0)),
'oldest_age': int(attrs.get('ApproximateAgeOfOldestMessage', 0))
},
'dlq': {
'count': int(dlq_attrs.get('ApproximateNumberOfMessages', 0))
}
}
except Exception as e:
return {
'domain': domain,
'error': str(e)
}
# WICHTIG: Region korrigiert
sqs = boto3.client('sqs', region_name='us-east-2')
def get_all_queues():
"""Findet automatisch alle Queues, die auf '-queue' enden (keine DLQs)"""
queues = []
paginator = sqs.get_paginator('list_queues')
for page in paginator.paginate():
for url in page.get('QueueUrls', []):
name = url.split('/')[-1]
if name.endswith('-queue'):
queues.append((name, url))
return queues
def main():
print(f"\n{'='*70}")
print(f"Email Queue Monitoring - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Email Queue Monitoring (us-east-2) - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"{'='*70}\n")
total_available = 0
total_in_flight = 0
total_dlq = 0
queues = get_all_queues()
for domain in DOMAINS:
stats = get_queue_stats(domain)
for name, url in queues:
dlq_name = name + '-dlq'
domain = name.replace('-queue', '').replace('-', '.') # Grobe Rückumwandlung
if 'error' in stats:
print(f"{domain}: {stats['error']}")
continue
try:
# Main Queue Stats
attrs = sqs.get_queue_attributes(
QueueUrl=url,
AttributeNames=['ApproximateNumberOfMessages', 'ApproximateNumberOfMessagesNotVisible', 'ApproximateAgeOfOldestMessage']
)['Attributes']
queue = stats['queue']
dlq = stats['dlq']
# DLQ Stats (Versuch URL zu finden)
try:
dlq_url = sqs.get_queue_url(QueueName=dlq_name)['QueueUrl']
dlq_attrs = sqs.get_queue_attributes(QueueUrl=dlq_url, AttributeNames=['ApproximateNumberOfMessages'])['Attributes']
dlq_count = int(dlq_attrs.get('ApproximateNumberOfMessages', 0))
except:
dlq_count = -1 # Keine DLQ gefunden
total_available += queue['available']
total_in_flight += queue['in_flight']
total_dlq += dlq['count']
available = int(attrs.get('ApproximateNumberOfMessages', 0))
flight = int(attrs.get('ApproximateNumberOfMessagesNotVisible', 0))
status = "" if dlq['count'] == 0 else "⚠️"
status = ""
if dlq_count > 0: status = "⚠️ "
if available > 50: status = "🔥"
print(f"{status} {domain}")
print(f" Available: {queue['available']:>5} messages")
print(f" In Flight: {queue['in_flight']:>5} messages")
print(f" Oldest Age: {queue['oldest_age']:>5}s")
print(f" DLQ: {dlq['count']:>5} messages")
if dlq['count'] > 0:
print(f" ⚠️ WARNING: {dlq['count']} failed message(s) in DLQ!")
print()
print(f"{'='*70}")
print(f"TOTALS:")
print(f" Available: {total_available} messages")
print(f" In Flight: {total_in_flight} messages")
print(f" Failed: {total_dlq} messages")
print(f"{'='*70}\n")
if total_dlq > 0:
print(f"⚠️ Action required: {total_dlq} message(s) in Dead Letter Queues!")
print(f" Run: python check-dlq.py to investigate\n")
print(f"{status} Queue: {name}")
print(f" Pending: {available:<5} (Waiting for worker)")
print(f" Processing: {flight:<5} (Currently in worker)")
if dlq_count >= 0:
print(f" DLQ Errors: {dlq_count:<5} (In {dlq_name})")
else:
print(f" DLQ: Not found")
print("-" * 30)
except Exception as e:
print(f"❌ Error checking {name}: {e}")
if __name__ == '__main__':
main()

View File

@ -1,75 +1,65 @@
#!/usr/bin/env python3
# requeue-dlq.py <domain>
"""
Verschiebt Messages aus DLQ zurück in Main Queue
"""
import sys
import boto3
sqs = boto3.client('sqs', region_name='eu-central-1')
# WICHTIG: Region auf us-east-2 gesetzt
sqs = boto3.client('sqs', region_name='us-east-2')
def requeue_dlq(domain, max_messages=10):
"""Verschiebt Messages aus DLQ zurück in Main Queue"""
# Namen normalisieren (Punkte zu Bindestrichen)
queue_name = domain.replace('.', '-') + '-queue'
dlq_name = queue_name + '-dlq'
print(f"Connecting to AWS in us-east-2 for domain: {domain}")
try:
queue_url = sqs.get_queue_url(QueueName=queue_name)['QueueUrl']
# URLs holen
q_url = sqs.get_queue_url(QueueName=queue_name)['QueueUrl']
dlq_url = sqs.get_queue_url(QueueName=dlq_name)['QueueUrl']
except Exception as e:
print(f"❌ Error: {e}")
print(f"❌ Error finding queues: {e}")
return
print(f"Re-queuing up to {max_messages} messages from DLQ to main queue...")
print(f"Domain: {domain}")
print(f"From: {dlq_name}")
print(f"To: {queue_name}\n")
# Status prüfen
attrs = sqs.get_queue_attributes(QueueUrl=dlq_url, AttributeNames=['ApproximateNumberOfMessages'])
count = int(attrs['Attributes']['ApproximateNumberOfMessages'])
confirm = input("Continue? (yes/no): ")
if confirm.lower() != 'yes':
if count == 0:
print(f"✅ No messages in DLQ ({dlq_name}).")
return
print(f"⚠️ Found {count} messages in {dlq_name}")
print(f" Target: {queue_name}")
if input(" Move messages now? (y/n): ").lower() != 'y':
print("Cancelled.")
return
# Messages aus DLQ holen
response = sqs.receive_message(
moved = 0
while moved < max_messages:
# Messages holen
resp = sqs.receive_message(
QueueUrl=dlq_url,
MaxNumberOfMessages=max_messages,
WaitTimeSeconds=0
MaxNumberOfMessages=10, # Max allowed by AWS per call
WaitTimeSeconds=1
)
messages = response.get('Messages', [])
msgs = resp.get('Messages', [])
if not msgs:
break
if not messages:
print("No messages in DLQ.")
return
print(f"\nRe-queuing {len(messages)} message(s)...\n")
for msg in messages:
# In Haupt-Queue schreiben
sqs.send_message(
QueueUrl=queue_url,
MessageBody=msg['Body']
)
# Aus DLQ löschen
sqs.delete_message(
QueueUrl=dlq_url,
ReceiptHandle=msg['ReceiptHandle']
)
print(f" ✓ Re-queued message {msg['MessageId']}")
print(f"\n✅ Done! {len(messages)} message(s) re-queued to {queue_name}")
for msg in msgs:
# 1. In Main Queue senden
sqs.send_message(QueueUrl=q_url, MessageBody=msg['Body'])
# 2. Aus DLQ löschen
sqs.delete_message(QueueUrl=dlq_url, ReceiptHandle=msg['ReceiptHandle'])
moved += 1
print(f" ✓ Moved message {msg['MessageId']}")
print(f"✅ Successfully moved {moved} messages.")
if __name__ == '__main__':
if len(sys.argv) < 2:
print("Usage: python requeue-dlq.py <domain>")
print("Example: python requeue-dlq.py andreasknuth.de")
print("Usage: python3 requeue-dlq.py <domain>")
sys.exit(1)
domain = sys.argv[1]
requeue_dlq(domain)
requeue_dlq(sys.argv[1])