bug fixes

This commit is contained in:
Andreas Knuth 2025-11-26 11:13:01 -06:00
parent 6a3a9264f7
commit ac697f9590
3 changed files with 92 additions and 144 deletions

View File

@ -18,7 +18,6 @@ services:
- ./docker-data/dms/mail-state/:/var/mail-state/ - ./docker-data/dms/mail-state/:/var/mail-state/
- ./docker-data/dms/mail-logs/:/var/log/mail/ - ./docker-data/dms/mail-logs/:/var/log/mail/
- ./docker-data/dms/config/:/tmp/docker-mailserver/ - ./docker-data/dms/config/:/tmp/docker-mailserver/
# - ./docker-data/dms/config/dovecot/10-master.conf:/etc/dovecot/conf.d/10-master.conf
- /etc/localtime:/etc/localtime:ro - /etc/localtime:/etc/localtime:ro
environment: environment:
# Wichtig: Rspamd und andere Services deaktivieren für ersten Test # Wichtig: Rspamd und andere Services deaktivieren für ersten Test

View File

@ -1,106 +1,65 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# monitor-queues.py
"""
Überwacht alle Email-Queues und zeigt Statistiken
"""
import boto3 import boto3
import json
from datetime import datetime from datetime import datetime
sqs = boto3.client('sqs', region_name='eu-central-1') # WICHTIG: Region korrigiert
sqs = boto3.client('sqs', region_name='us-east-2')
DOMAINS = ['andreasknuth.de', 'bizmatch.net']
def get_queue_stats(domain):
"""Zeigt Queue-Statistiken für eine Domain"""
queue_name = domain.replace('.', '-') + '-queue'
dlq_name = queue_name + '-dlq'
try:
# Main Queue URL
queue_url = sqs.get_queue_url(QueueName=queue_name)['QueueUrl']
# Queue Attributes
attrs = sqs.get_queue_attributes(
QueueUrl=queue_url,
AttributeNames=['All']
)['Attributes']
# DLQ URL
dlq_url = sqs.get_queue_url(QueueName=dlq_name)['QueueUrl']
# DLQ Attributes
dlq_attrs = sqs.get_queue_attributes(
QueueUrl=dlq_url,
AttributeNames=['ApproximateNumberOfMessages']
)['Attributes']
return {
'domain': domain,
'queue': {
'available': int(attrs.get('ApproximateNumberOfMessages', 0)),
'in_flight': int(attrs.get('ApproximateNumberOfMessagesNotVisible', 0)),
'oldest_age': int(attrs.get('ApproximateAgeOfOldestMessage', 0))
},
'dlq': {
'count': int(dlq_attrs.get('ApproximateNumberOfMessages', 0))
}
}
except Exception as e:
return {
'domain': domain,
'error': str(e)
}
def get_all_queues():
"""Findet automatisch alle Queues, die auf '-queue' enden (keine DLQs)"""
queues = []
paginator = sqs.get_paginator('list_queues')
for page in paginator.paginate():
for url in page.get('QueueUrls', []):
name = url.split('/')[-1]
if name.endswith('-queue'):
queues.append((name, url))
return queues
def main(): def main():
print(f"\n{'='*70}") print(f"\n{'='*70}")
print(f"Email Queue Monitoring - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") print(f"Email Queue Monitoring (us-east-2) - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"{'='*70}\n") print(f"{'='*70}\n")
total_available = 0 queues = get_all_queues()
total_in_flight = 0
total_dlq = 0
for domain in DOMAINS: for name, url in queues:
stats = get_queue_stats(domain) dlq_name = name + '-dlq'
domain = name.replace('-queue', '').replace('-', '.') # Grobe Rückumwandlung
if 'error' in stats: try:
print(f"{domain}: {stats['error']}") # Main Queue Stats
continue attrs = sqs.get_queue_attributes(
QueueUrl=url,
AttributeNames=['ApproximateNumberOfMessages', 'ApproximateNumberOfMessagesNotVisible', 'ApproximateAgeOfOldestMessage']
)['Attributes']
queue = stats['queue'] # DLQ Stats (Versuch URL zu finden)
dlq = stats['dlq'] try:
dlq_url = sqs.get_queue_url(QueueName=dlq_name)['QueueUrl']
dlq_attrs = sqs.get_queue_attributes(QueueUrl=dlq_url, AttributeNames=['ApproximateNumberOfMessages'])['Attributes']
dlq_count = int(dlq_attrs.get('ApproximateNumberOfMessages', 0))
except:
dlq_count = -1 # Keine DLQ gefunden
total_available += queue['available'] available = int(attrs.get('ApproximateNumberOfMessages', 0))
total_in_flight += queue['in_flight'] flight = int(attrs.get('ApproximateNumberOfMessagesNotVisible', 0))
total_dlq += dlq['count']
status = "" if dlq['count'] == 0 else "⚠️" status = ""
if dlq_count > 0: status = "⚠️ "
if available > 50: status = "🔥"
print(f"{status} {domain}") print(f"{status} Queue: {name}")
print(f" Available: {queue['available']:>5} messages") print(f" Pending: {available:<5} (Waiting for worker)")
print(f" In Flight: {queue['in_flight']:>5} messages") print(f" Processing: {flight:<5} (Currently in worker)")
print(f" Oldest Age: {queue['oldest_age']:>5}s") if dlq_count >= 0:
print(f" DLQ: {dlq['count']:>5} messages") print(f" DLQ Errors: {dlq_count:<5} (In {dlq_name})")
else:
if dlq['count'] > 0: print(f" DLQ: Not found")
print(f" ⚠️ WARNING: {dlq['count']} failed message(s) in DLQ!") print("-" * 30)
print()
print(f"{'='*70}")
print(f"TOTALS:")
print(f" Available: {total_available} messages")
print(f" In Flight: {total_in_flight} messages")
print(f" Failed: {total_dlq} messages")
print(f"{'='*70}\n")
if total_dlq > 0:
print(f"⚠️ Action required: {total_dlq} message(s) in Dead Letter Queues!")
print(f" Run: python check-dlq.py to investigate\n")
except Exception as e:
print(f"❌ Error checking {name}: {e}")
if __name__ == '__main__': if __name__ == '__main__':
main() main()

View File

@ -1,75 +1,65 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# requeue-dlq.py <domain>
"""
Verschiebt Messages aus DLQ zurück in Main Queue
"""
import sys import sys
import boto3 import boto3
sqs = boto3.client('sqs', region_name='eu-central-1') # WICHTIG: Region auf us-east-2 gesetzt
sqs = boto3.client('sqs', region_name='us-east-2')
def requeue_dlq(domain, max_messages=10): def requeue_dlq(domain, max_messages=10):
"""Verschiebt Messages aus DLQ zurück in Main Queue""" # Namen normalisieren (Punkte zu Bindestrichen)
queue_name = domain.replace('.', '-') + '-queue' queue_name = domain.replace('.', '-') + '-queue'
dlq_name = queue_name + '-dlq' dlq_name = queue_name + '-dlq'
print(f"Connecting to AWS in us-east-2 for domain: {domain}")
try: try:
queue_url = sqs.get_queue_url(QueueName=queue_name)['QueueUrl'] # URLs holen
q_url = sqs.get_queue_url(QueueName=queue_name)['QueueUrl']
dlq_url = sqs.get_queue_url(QueueName=dlq_name)['QueueUrl'] dlq_url = sqs.get_queue_url(QueueName=dlq_name)['QueueUrl']
except Exception as e: except Exception as e:
print(f"❌ Error: {e}") print(f"❌ Error finding queues: {e}")
return return
print(f"Re-queuing up to {max_messages} messages from DLQ to main queue...") # Status prüfen
print(f"Domain: {domain}") attrs = sqs.get_queue_attributes(QueueUrl=dlq_url, AttributeNames=['ApproximateNumberOfMessages'])
print(f"From: {dlq_name}") count = int(attrs['Attributes']['ApproximateNumberOfMessages'])
print(f"To: {queue_name}\n")
confirm = input("Continue? (yes/no): ") if count == 0:
if confirm.lower() != 'yes': print(f"✅ No messages in DLQ ({dlq_name}).")
return
print(f"⚠️ Found {count} messages in {dlq_name}")
print(f" Target: {queue_name}")
if input(" Move messages now? (y/n): ").lower() != 'y':
print("Cancelled.") print("Cancelled.")
return return
# Messages aus DLQ holen moved = 0
response = sqs.receive_message( while moved < max_messages:
QueueUrl=dlq_url, # Messages holen
MaxNumberOfMessages=max_messages, resp = sqs.receive_message(
WaitTimeSeconds=0
)
messages = response.get('Messages', [])
if not messages:
print("No messages in DLQ.")
return
print(f"\nRe-queuing {len(messages)} message(s)...\n")
for msg in messages:
# In Haupt-Queue schreiben
sqs.send_message(
QueueUrl=queue_url,
MessageBody=msg['Body']
)
# Aus DLQ löschen
sqs.delete_message(
QueueUrl=dlq_url, QueueUrl=dlq_url,
ReceiptHandle=msg['ReceiptHandle'] MaxNumberOfMessages=10, # Max allowed by AWS per call
WaitTimeSeconds=1
) )
print(f" ✓ Re-queued message {msg['MessageId']}") msgs = resp.get('Messages', [])
if not msgs:
break
print(f"\n✅ Done! {len(messages)} message(s) re-queued to {queue_name}") for msg in msgs:
# 1. In Main Queue senden
sqs.send_message(QueueUrl=q_url, MessageBody=msg['Body'])
# 2. Aus DLQ löschen
sqs.delete_message(QueueUrl=dlq_url, ReceiptHandle=msg['ReceiptHandle'])
moved += 1
print(f" ✓ Moved message {msg['MessageId']}")
print(f"✅ Successfully moved {moved} messages.")
if __name__ == '__main__': if __name__ == '__main__':
if len(sys.argv) < 2: if len(sys.argv) < 2:
print("Usage: python requeue-dlq.py <domain>") print("Usage: python3 requeue-dlq.py <domain>")
print("Example: python requeue-dlq.py andreasknuth.de")
sys.exit(1) sys.exit(1)
requeue_dlq(sys.argv[1])
domain = sys.argv[1]
requeue_dlq(domain)