From ac697f95902fb75ef3a7a1a4daa5e1a94dddc24b Mon Sep 17 00:00:00 2001 From: Andreas Knuth Date: Wed, 26 Nov 2025 11:13:01 -0600 Subject: [PATCH] bug fixes --- DMS/docker-compose.yml | 1 - monitor-queues.py | 139 +++++++++++++++-------------------------- requeue-dlq.py | 96 +++++++++++++--------------- 3 files changed, 92 insertions(+), 144 deletions(-) diff --git a/DMS/docker-compose.yml b/DMS/docker-compose.yml index 77bbe91..464eee6 100644 --- a/DMS/docker-compose.yml +++ b/DMS/docker-compose.yml @@ -18,7 +18,6 @@ services: - ./docker-data/dms/mail-state/:/var/mail-state/ - ./docker-data/dms/mail-logs/:/var/log/mail/ - ./docker-data/dms/config/:/tmp/docker-mailserver/ - # - ./docker-data/dms/config/dovecot/10-master.conf:/etc/dovecot/conf.d/10-master.conf - /etc/localtime:/etc/localtime:ro environment: # Wichtig: Rspamd und andere Services deaktivieren für ersten Test diff --git a/monitor-queues.py b/monitor-queues.py index 5224f25..f5cd3a7 100644 --- a/monitor-queues.py +++ b/monitor-queues.py @@ -1,106 +1,65 @@ #!/usr/bin/env python3 -# monitor-queues.py -""" -Überwacht alle Email-Queues und zeigt Statistiken -""" - import boto3 -import json from datetime import datetime -sqs = boto3.client('sqs', region_name='eu-central-1') - -DOMAINS = ['andreasknuth.de', 'bizmatch.net'] - -def get_queue_stats(domain): - """Zeigt Queue-Statistiken für eine Domain""" - queue_name = domain.replace('.', '-') + '-queue' - dlq_name = queue_name + '-dlq' - - try: - # Main Queue URL - queue_url = sqs.get_queue_url(QueueName=queue_name)['QueueUrl'] - - # Queue Attributes - attrs = sqs.get_queue_attributes( - QueueUrl=queue_url, - AttributeNames=['All'] - )['Attributes'] - - # DLQ URL - dlq_url = sqs.get_queue_url(QueueName=dlq_name)['QueueUrl'] - - # DLQ Attributes - dlq_attrs = sqs.get_queue_attributes( - QueueUrl=dlq_url, - AttributeNames=['ApproximateNumberOfMessages'] - )['Attributes'] - - return { - 'domain': domain, - 'queue': { - 'available': int(attrs.get('ApproximateNumberOfMessages', 0)), - 'in_flight': int(attrs.get('ApproximateNumberOfMessagesNotVisible', 0)), - 'oldest_age': int(attrs.get('ApproximateAgeOfOldestMessage', 0)) - }, - 'dlq': { - 'count': int(dlq_attrs.get('ApproximateNumberOfMessages', 0)) - } - } - except Exception as e: - return { - 'domain': domain, - 'error': str(e) - } +# WICHTIG: Region korrigiert +sqs = boto3.client('sqs', region_name='us-east-2') +def get_all_queues(): + """Findet automatisch alle Queues, die auf '-queue' enden (keine DLQs)""" + queues = [] + paginator = sqs.get_paginator('list_queues') + for page in paginator.paginate(): + for url in page.get('QueueUrls', []): + name = url.split('/')[-1] + if name.endswith('-queue'): + queues.append((name, url)) + return queues def main(): print(f"\n{'='*70}") - print(f"Email Queue Monitoring - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") + print(f"Email Queue Monitoring (us-east-2) - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") print(f"{'='*70}\n") - total_available = 0 - total_in_flight = 0 - total_dlq = 0 + queues = get_all_queues() - for domain in DOMAINS: - stats = get_queue_stats(domain) + for name, url in queues: + dlq_name = name + '-dlq' + domain = name.replace('-queue', '').replace('-', '.') # Grobe Rückumwandlung - if 'error' in stats: - print(f"❌ {domain}: {stats['error']}") - continue - - queue = stats['queue'] - dlq = stats['dlq'] - - total_available += queue['available'] - total_in_flight += queue['in_flight'] - total_dlq += dlq['count'] - - status = "✅" if dlq['count'] == 0 else "⚠️" - - print(f"{status} {domain}") - print(f" Available: {queue['available']:>5} messages") - print(f" In Flight: {queue['in_flight']:>5} messages") - print(f" Oldest Age: {queue['oldest_age']:>5}s") - print(f" DLQ: {dlq['count']:>5} messages") - - if dlq['count'] > 0: - print(f" ⚠️ WARNING: {dlq['count']} failed message(s) in DLQ!") - - print() - - print(f"{'='*70}") - print(f"TOTALS:") - print(f" Available: {total_available} messages") - print(f" In Flight: {total_in_flight} messages") - print(f" Failed: {total_dlq} messages") - print(f"{'='*70}\n") - - if total_dlq > 0: - print(f"⚠️ Action required: {total_dlq} message(s) in Dead Letter Queues!") - print(f" Run: python check-dlq.py to investigate\n") + try: + # Main Queue Stats + attrs = sqs.get_queue_attributes( + QueueUrl=url, + AttributeNames=['ApproximateNumberOfMessages', 'ApproximateNumberOfMessagesNotVisible', 'ApproximateAgeOfOldestMessage'] + )['Attributes'] + + # DLQ Stats (Versuch URL zu finden) + try: + dlq_url = sqs.get_queue_url(QueueName=dlq_name)['QueueUrl'] + dlq_attrs = sqs.get_queue_attributes(QueueUrl=dlq_url, AttributeNames=['ApproximateNumberOfMessages'])['Attributes'] + dlq_count = int(dlq_attrs.get('ApproximateNumberOfMessages', 0)) + except: + dlq_count = -1 # Keine DLQ gefunden + available = int(attrs.get('ApproximateNumberOfMessages', 0)) + flight = int(attrs.get('ApproximateNumberOfMessagesNotVisible', 0)) + + status = "✅" + if dlq_count > 0: status = "⚠️ " + if available > 50: status = "🔥" + + print(f"{status} Queue: {name}") + print(f" Pending: {available:<5} (Waiting for worker)") + print(f" Processing: {flight:<5} (Currently in worker)") + if dlq_count >= 0: + print(f" DLQ Errors: {dlq_count:<5} (In {dlq_name})") + else: + print(f" DLQ: Not found") + print("-" * 30) + + except Exception as e: + print(f"❌ Error checking {name}: {e}") if __name__ == '__main__': main() \ No newline at end of file diff --git a/requeue-dlq.py b/requeue-dlq.py index 487504c..dfe2521 100644 --- a/requeue-dlq.py +++ b/requeue-dlq.py @@ -1,75 +1,65 @@ #!/usr/bin/env python3 -# requeue-dlq.py -""" -Verschiebt Messages aus DLQ zurück in Main Queue -""" - import sys import boto3 -sqs = boto3.client('sqs', region_name='eu-central-1') +# WICHTIG: Region auf us-east-2 gesetzt +sqs = boto3.client('sqs', region_name='us-east-2') def requeue_dlq(domain, max_messages=10): - """Verschiebt Messages aus DLQ zurück in Main Queue""" - + # Namen normalisieren (Punkte zu Bindestrichen) queue_name = domain.replace('.', '-') + '-queue' dlq_name = queue_name + '-dlq' + print(f"Connecting to AWS in us-east-2 for domain: {domain}") + try: - queue_url = sqs.get_queue_url(QueueName=queue_name)['QueueUrl'] + # URLs holen + q_url = sqs.get_queue_url(QueueName=queue_name)['QueueUrl'] dlq_url = sqs.get_queue_url(QueueName=dlq_name)['QueueUrl'] except Exception as e: - print(f"❌ Error: {e}") + print(f"❌ Error finding queues: {e}") return + + # Status prüfen + attrs = sqs.get_queue_attributes(QueueUrl=dlq_url, AttributeNames=['ApproximateNumberOfMessages']) + count = int(attrs['Attributes']['ApproximateNumberOfMessages']) - print(f"Re-queuing up to {max_messages} messages from DLQ to main queue...") - print(f"Domain: {domain}") - print(f"From: {dlq_name}") - print(f"To: {queue_name}\n") + if count == 0: + print(f"✅ No messages in DLQ ({dlq_name}).") + return + + print(f"⚠️ Found {count} messages in {dlq_name}") + print(f" Target: {queue_name}") - confirm = input("Continue? (yes/no): ") - if confirm.lower() != 'yes': + if input(" Move messages now? (y/n): ").lower() != 'y': print("Cancelled.") return - - # Messages aus DLQ holen - response = sqs.receive_message( - QueueUrl=dlq_url, - MaxNumberOfMessages=max_messages, - WaitTimeSeconds=0 - ) - - messages = response.get('Messages', []) - - if not messages: - print("No messages in DLQ.") - return - - print(f"\nRe-queuing {len(messages)} message(s)...\n") - - for msg in messages: - # In Haupt-Queue schreiben - sqs.send_message( - QueueUrl=queue_url, - MessageBody=msg['Body'] - ) - - # Aus DLQ löschen - sqs.delete_message( - QueueUrl=dlq_url, - ReceiptHandle=msg['ReceiptHandle'] - ) - - print(f" ✓ Re-queued message {msg['MessageId']}") - - print(f"\n✅ Done! {len(messages)} message(s) re-queued to {queue_name}") + moved = 0 + while moved < max_messages: + # Messages holen + resp = sqs.receive_message( + QueueUrl=dlq_url, + MaxNumberOfMessages=10, # Max allowed by AWS per call + WaitTimeSeconds=1 + ) + + msgs = resp.get('Messages', []) + if not msgs: + break + + for msg in msgs: + # 1. In Main Queue senden + sqs.send_message(QueueUrl=q_url, MessageBody=msg['Body']) + # 2. Aus DLQ löschen + sqs.delete_message(QueueUrl=dlq_url, ReceiptHandle=msg['ReceiptHandle']) + moved += 1 + print(f" ✓ Moved message {msg['MessageId']}") + + print(f"✅ Successfully moved {moved} messages.") if __name__ == '__main__': if len(sys.argv) < 2: - print("Usage: python requeue-dlq.py ") - print("Example: python requeue-dlq.py andreasknuth.de") + print("Usage: python3 requeue-dlq.py ") sys.exit(1) - - domain = sys.argv[1] - requeue_dlq(domain) \ No newline at end of file + requeue_dlq(sys.argv[1]) \ No newline at end of file